From 0499bead73d77a5d63cde8e2c516552f750e6193 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 15:09:23 +0200 Subject: wifi: mac80211: tx: clarify conditions in if statement This really just reformats the statement, but makes it more readable. Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7fe7280e8437..3a5b41c2ee3d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2855,9 +2855,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, goto free; } - if (unlikely(!multicast && ((skb->sk && - skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) || - ctrl_flags & IEEE80211_TX_CTL_REQ_TX_STATUS))) + if (unlikely(!multicast && + ((skb->sk && + skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) || + ctrl_flags & IEEE80211_TX_CTL_REQ_TX_STATUS))) info_id = ieee80211_store_ack_skb(local, skb, &info_flags, cookie); -- cgit v1.2.3 From f498f6ab7adb461a68e13ea6d4443cb3636f2d93 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 15:09:24 +0200 Subject: wifi: mac80211: rework ack_frame_id handling a bit Take one more free bit to indicate it's IDR vs. internal usage, to be able to carve out some bits here for other internal usage, other than IDR handling with a full ACK SKB, that is. Reviewed-by: Benjamin Berg Reviewed-by: Ilan Peer Signed-off-by: Johannes Berg --- include/net/mac80211.h | 9 ++++++--- net/mac80211/cfg.c | 3 ++- net/mac80211/ieee80211_i.h | 5 +++++ net/mac80211/status.c | 4 ++-- net/mac80211/tx.c | 14 ++++++++++---- 5 files changed, 25 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7c707358d15c..7f3b6f00f8a2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1115,7 +1115,9 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) * not valid if the interface is an MLD since we won't know which * link the frame will be transmitted on * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC - * @ack_frame_id: internal frame ID for TX status, used internally + * @status_data: internal data for TX status handling, assigned privately, + * see also &enum ieee80211_status_data for the internal documentation + * @status_data_idr: indicates status data is IDR allocated ID for ack frame * @tx_time_est: TX time estimate in units of 4us, used internally * @control: union part for control data * @control.rates: TX rates array to try @@ -1155,10 +1157,11 @@ struct ieee80211_tx_info { /* common information */ u32 flags; u32 band:3, - ack_frame_id:13, + status_data_idr:1, + status_data:13, hw_queue:4, tx_time_est:10; - /* 2 free bits */ + /* 1 free bit */ union { struct { diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 45e7a5d9c7d9..29a6da5ee77f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4034,7 +4034,8 @@ int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, return -ENOMEM; } - IEEE80211_SKB_CB(skb)->ack_frame_id = id; + IEEE80211_SKB_CB(skb)->status_data_idr = 1; + IEEE80211_SKB_CB(skb)->status_data = id; *cookie = ieee80211_mgmt_tx_cookie(local); IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 06bd406846d2..7b74cf96ee0a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -85,6 +85,11 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS]; #define IEEE80211_MAX_NAN_INSTANCE_ID 255 +enum ieee80211_status_data { + IEEE80211_STATUS_TYPE_MASK = 0x00f, + IEEE80211_STATUS_TYPE_INVALID = 0, + IEEE80211_STATUS_SUBDATA_MASK = 0xff0, +}; /* * Keep a station's queues on the active list for deficit accounting purposes diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 44d83da60aee..f24aceb59db0 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -633,7 +633,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, unsigned long flags; spin_lock_irqsave(&local->ack_status_lock, flags); - skb = idr_remove(&local->ack_status_frames, info->ack_frame_id); + skb = idr_remove(&local->ack_status_frames, info->status_data); spin_unlock_irqrestore(&local->ack_status_lock, flags); if (!skb) @@ -759,7 +759,7 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, } rcu_read_unlock(); - } else if (info->ack_frame_id) { + } else if (info->status_data_idr) { ieee80211_report_ack_skb(local, skb, acked, dropped, ack_hwtstamp); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3a5b41c2ee3d..ae33f727c6a8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2942,7 +2942,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, memset(info, 0, sizeof(*info)); info->flags = info_flags; - info->ack_frame_id = info_id; + if (info_id) { + info->status_data = info_id; + info->status_data_idr = 1; + } info->band = band; if (likely(!cookie)) { @@ -4639,9 +4642,12 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, } if (unlikely(skb->sk && - skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) - info->ack_frame_id = ieee80211_store_ack_skb(local, skb, - &info->flags, NULL); + skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) { + info->status_data = ieee80211_store_ack_skb(local, skb, + &info->flags, NULL); + if (info->status_data) + info->status_data_idr = 1; + } dev_sw_netstats_tx_add(dev, skbs, len); sta->deflink.tx_stats.packets[queue] += skbs; -- cgit v1.2.3 From e665ab9c5aab79e1c576a220013419ef215c3b6a Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 28 Aug 2023 15:09:25 +0200 Subject: wifi: mac80211: Fix SMPS handling in the context of MLO When the connection is a MLO connection, a SMPS request should be sent on a specific link, as SMPS is BSS specific, and the DA and BSSID used for the action frame transmission should be the AP MLD address, as the underlying driver is expected to perform the address translation (based on the link ID). Fix the SMPS request handling to use the AP MLD address and provide the link ID for the request processing during Tx. Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 10 ++++++++-- net/mac80211/ht.c | 4 ++-- net/mac80211/ieee80211_i.h | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 29a6da5ee77f..fa20a260f9c8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3178,6 +3178,10 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION)) return -EINVAL; + if (ieee80211_vif_is_mld(&sdata->vif) && + !(sdata->vif.active_links & BIT(link->link_id))) + return 0; + old_req = link->u.mgd.req_smps; link->u.mgd.req_smps = smps_mode; @@ -3194,7 +3198,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, link->conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) return 0; - ap = link->u.mgd.bssid; + ap = sdata->vif.cfg.ap_addr; rcu_read_lock(); list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { @@ -3216,7 +3220,9 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, /* send SM PS frame to AP */ err = ieee80211_send_smps_action(sdata, smps_mode, - ap, ap); + ap, ap, + ieee80211_vif_is_mld(&sdata->vif) ? + link->link_id : -1); if (err) link->u.mgd.req_smps = old_req; else if (smps_mode != IEEE80211_SMPS_OFF && tdls_peer_found) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 33729870ad8a..802b0e738696 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -538,7 +538,7 @@ ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps) int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, - const u8 *bssid) + const u8 *bssid, int link_id) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -579,7 +579,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, /* we'll do more on status of this frame */ IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; - ieee80211_tx_skb(sdata, skb); + ieee80211_tx_skb_tid(sdata, skb, 7, link_id); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7b74cf96ee0a..1df2101d8eeb 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2098,7 +2098,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, u16 initiator, u16 reason_code); int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, - const u8 *bssid); + const u8 *bssid, int link_id); bool ieee80211_smps_is_restrictive(enum ieee80211_smps_mode smps_mode_old, enum ieee80211_smps_mode smps_mode_new); -- cgit v1.2.3 From e3640a82e573f008e5c2058a7971873fa1d438c5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 15:09:26 +0200 Subject: wifi: mac80211: fix SMPS status handling The current SMPS status handling isn't per link, so we only ever change the deflink, which is obviously wrong, it's not even used for multi-link connections, but the request API actually includes the link ID. Use the new status_data changes to move the handling to the right link, this also saves parsing the frame again on the status report, instead we can now check only if it was an SMPS frame. Of course, move the worker to be a wiphy work so that we're able to cancel it safely for the link. Signed-off-by: Johannes Berg --- net/mac80211/ht.c | 10 +++++- net/mac80211/ieee80211_i.h | 7 ++-- net/mac80211/iface.c | 11 ------ net/mac80211/mlme.c | 14 ++++++++ net/mac80211/status.c | 86 +++++++++++++++++++++++++++------------------- 5 files changed, 78 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 802b0e738696..05f98f0a91a8 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -543,6 +543,8 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *action_frame; + struct ieee80211_tx_info *info; + u8 status_link_id = link_id < 0 ? 0 : link_id; /* 27 = header + category + action + smps mode */ skb = dev_alloc_skb(27 + local->hw.extra_tx_headroom); @@ -562,6 +564,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_NUM_MODES: WARN_ON(1); + smps = IEEE80211_SMPS_OFF; fallthrough; case IEEE80211_SMPS_OFF: action_frame->u.action.u.ht_smps.smps_control = @@ -578,7 +581,12 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, } /* we'll do more on status of this frame */ - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + info = IEEE80211_SKB_CB(skb); + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + /* we have 12 bits, and need 6: link_id 4, smps 2 */ + info->status_data = IEEE80211_STATUS_TYPE_SMPS | + u16_encode_bits(status_link_id << 2 | smps, + IEEE80211_STATUS_SUBDATA_MASK); ieee80211_tx_skb_tid(sdata, skb, 7, link_id); return 0; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1df2101d8eeb..338ab9e6e6b1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -88,6 +88,7 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS]; enum ieee80211_status_data { IEEE80211_STATUS_TYPE_MASK = 0x00f, IEEE80211_STATUS_TYPE_INVALID = 0, + IEEE80211_STATUS_TYPE_SMPS = 1, IEEE80211_STATUS_SUBDATA_MASK = 0xff0, }; @@ -931,6 +932,9 @@ struct ieee80211_link_data_managed { struct wiphy_delayed_work chswitch_work; struct wiphy_work request_smps_work; + /* used to reconfigure hardware SM PS */ + struct wiphy_work recalc_smps; + bool beacon_crc_valid; u32 beacon_crc; struct ewma_beacon_signal ave_beacon_signal; @@ -1069,9 +1073,6 @@ struct ieee80211_sub_if_data { atomic_t num_tx_queued; struct mac80211_qos_map __rcu *qos_map; - /* used to reconfigure hardware SM PS */ - struct work_struct recalc_smps; - struct wiphy_work work; struct sk_buff_head skb_queue; struct sk_buff_head status_queue; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index be586bc0b5b7..4beab027e0f9 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -518,8 +518,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do del_timer_sync(&local->dynamic_ps_timer); cancel_work_sync(&local->dynamic_ps_enable_work); - cancel_work_sync(&sdata->recalc_smps); - sdata_lock(sdata); WARN(ieee80211_vif_is_mld(&sdata->vif), "destroying interface with valid links 0x%04x\n", @@ -1692,14 +1690,6 @@ static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work) } } -static void ieee80211_recalc_smps_work(struct work_struct *work) -{ - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, recalc_smps); - - ieee80211_recalc_smps(sdata, &sdata->deflink); -} - static void ieee80211_activate_links_work(struct work_struct *work) { struct ieee80211_sub_if_data *sdata = @@ -1745,7 +1735,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, skb_queue_head_init(&sdata->skb_queue); skb_queue_head_init(&sdata->status_queue); wiphy_work_init(&sdata->work, ieee80211_iface_work); - INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); INIT_WORK(&sdata->activate_links_work, ieee80211_activate_links_work); switch (type) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f93eb38ae0b8..65d3e167132c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6847,6 +6847,16 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->orig_teardown_skb = NULL; } +static void ieee80211_recalc_smps_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + struct ieee80211_link_data *link = + container_of(work, struct ieee80211_link_data, + u.mgd.recalc_smps); + + ieee80211_recalc_smps(link->sdata, link); +} + void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; @@ -6859,6 +6869,8 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) wiphy_work_init(&link->u.mgd.request_smps_work, ieee80211_request_smps_mgd_work); + wiphy_work_init(&link->u.mgd.recalc_smps, + ieee80211_recalc_smps_work); if (local->hw.wiphy->features & NL80211_FEATURE_DYNAMIC_SMPS) link->u.mgd.req_smps = IEEE80211_SMPS_AUTOMATIC; else @@ -7824,6 +7836,8 @@ void ieee80211_mgd_stop_link(struct ieee80211_link_data *link) { wiphy_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.request_smps_work); + wiphy_work_cancel(link->sdata->local->hw.wiphy, + &link->u.mgd.recalc_smps); wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.chswitch_work); } diff --git a/net/mac80211/status.c b/net/mac80211/status.c index f24aceb59db0..3355e66d96d8 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -184,8 +184,6 @@ static void ieee80211_check_pending_bar(struct sta_info *sta, u8 *addr, u8 tid) static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *) skb->data; - struct ieee80211_local *local = sta->local; - struct ieee80211_sub_if_data *sdata = sta->sdata; if (ieee80211_is_data_qos(mgmt->frame_control)) { struct ieee80211_hdr *hdr = (void *) skb->data; @@ -194,39 +192,6 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) ieee80211_check_pending_bar(sta, hdr->addr1, tid); } - - if (ieee80211_is_action(mgmt->frame_control) && - !ieee80211_has_protected(mgmt->frame_control) && - mgmt->u.action.category == WLAN_CATEGORY_HT && - mgmt->u.action.u.ht_smps.action == WLAN_HT_ACTION_SMPS && - ieee80211_sdata_running(sdata)) { - enum ieee80211_smps_mode smps_mode; - - switch (mgmt->u.action.u.ht_smps.smps_control) { - case WLAN_HT_SMPS_CONTROL_DYNAMIC: - smps_mode = IEEE80211_SMPS_DYNAMIC; - break; - case WLAN_HT_SMPS_CONTROL_STATIC: - smps_mode = IEEE80211_SMPS_STATIC; - break; - case WLAN_HT_SMPS_CONTROL_DISABLED: - default: /* shouldn't happen since we don't send that */ - smps_mode = IEEE80211_SMPS_OFF; - break; - } - - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - /* - * This update looks racy, but isn't -- if we come - * here we've definitely got a station that we're - * talking to, and on a managed interface that can - * only be the AP. And the only other place updating - * this variable in managed mode is before association. - */ - sdata->deflink.smps_mode = smps_mode; - ieee80211_queue_work(&local->hw, &sdata->recalc_smps); - } - } } static void ieee80211_set_bar_pending(struct sta_info *sta, u8 tid, u16 ssn) @@ -695,6 +660,42 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, } } +static void ieee80211_handle_smps_status(struct ieee80211_sub_if_data *sdata, + bool acked, u16 status_data) +{ + u16 sub_data = u16_get_bits(status_data, IEEE80211_STATUS_SUBDATA_MASK); + enum ieee80211_smps_mode smps_mode = sub_data & 3; + int link_id = (sub_data >> 2); + struct ieee80211_link_data *link; + + if (!sdata || !ieee80211_sdata_running(sdata)) + return; + + if (!acked) + return; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return; + + if (WARN(link_id >= ARRAY_SIZE(sdata->link), + "bad SMPS status link: %d\n", link_id)) + return; + + link = rcu_dereference(sdata->link[link_id]); + if (!link) + return; + + /* + * This update looks racy, but isn't, the only other place + * updating this variable is in managed mode before assoc, + * and we have to be associated to have a status from the + * action frame TX, since we cannot send it while we're not + * associated yet. + */ + link->smps_mode = smps_mode; + wiphy_work_queue(sdata->local->hw.wiphy, &link->u.mgd.recalc_smps); +} + static void ieee80211_report_used_skb(struct ieee80211_local *local, struct sk_buff *skb, bool dropped, ktime_t ack_hwtstamp) @@ -762,6 +763,21 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, } else if (info->status_data_idr) { ieee80211_report_ack_skb(local, skb, acked, dropped, ack_hwtstamp); + } else if (info->status_data) { + struct ieee80211_sub_if_data *sdata; + + rcu_read_lock(); + + sdata = ieee80211_sdata_from_skb(local, skb); + + switch (u16_get_bits(info->status_data, + IEEE80211_STATUS_TYPE_MASK)) { + case IEEE80211_STATUS_TYPE_SMPS: + ieee80211_handle_smps_status(sdata, acked, + info->status_data); + break; + } + rcu_read_unlock(); } if (!dropped && skb->destructor) { -- cgit v1.2.3 From 629ebb8532e96c3921fe8828c650ce03b697cfdd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:29 +0200 Subject: wifi: mac80211: debugfs: lock wiphy instead of RTNL Since we no longer really use the RTNL, there's no point in locking it here. Most drivers don't really need to have any locks here anyway, and the rest are probably completely broken, but it's a debugfs-only callback so it really doesn't matter much. Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 207f772bd8ce..2efc6ccbfcf6 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -4,7 +4,7 @@ * * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018 - 2019, 2021-2022 Intel Corporation + * Copyright (C) 2018 - 2019, 2021-2023 Intel Corporation */ #include @@ -594,9 +594,9 @@ static ssize_t format_devstat_counter(struct ieee80211_local *local, char buf[20]; int res; - rtnl_lock(); + wiphy_lock(local->hw.wiphy); res = drv_get_stats(local, &stats); - rtnl_unlock(); + wiphy_unlock(local->hw.wiphy); if (res) return res; res = printvalue(&stats, buf, sizeof(buf)); -- cgit v1.2.3 From 0ab6cba0696da4c6dff6e047ef8aa6c9c0fadf13 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:30 +0200 Subject: wifi: mac80211: hold wiphy lock in netdev/link debugfs It's no longer really needed to ensure that the debugfs file isn't going away, debugfs handles that. So there's no point in holding dev_base_lock or RTNL here, but we should instead hold the wiphy lock since drivers will be allowed to depend on that. Do that, which requires splitting the sdata and link macros a bit. Signed-off-by: Johannes Berg --- net/mac80211/debugfs_netdev.c | 123 ++++++++++++++++++++++++++++++------------ 1 file changed, 90 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 63250286dc8b..706330fadc97 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -22,18 +22,18 @@ #include "debugfs_netdev.h" #include "driver-ops.h" -static ssize_t ieee80211_if_read( - void *data, +static ssize_t ieee80211_if_read_sdata( + struct ieee80211_sub_if_data *sdata, char __user *userbuf, size_t count, loff_t *ppos, - ssize_t (*format)(const void *, char *, int)) + ssize_t (*format)(const struct ieee80211_sub_if_data *sdata, char *, int)) { char buf[200]; ssize_t ret = -EINVAL; - read_lock(&dev_base_lock); - ret = (*format)(data, buf, sizeof(buf)); - read_unlock(&dev_base_lock); + wiphy_lock(sdata->local->hw.wiphy); + ret = (*format)(sdata, buf, sizeof(buf)); + wiphy_unlock(sdata->local->hw.wiphy); if (ret >= 0) ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret); @@ -41,11 +41,11 @@ static ssize_t ieee80211_if_read( return ret; } -static ssize_t ieee80211_if_write( - void *data, +static ssize_t ieee80211_if_write_sdata( + struct ieee80211_sub_if_data *sdata, const char __user *userbuf, size_t count, loff_t *ppos, - ssize_t (*write)(void *, const char *, int)) + ssize_t (*write)(struct ieee80211_sub_if_data *sdata, const char *, int)) { char buf[64]; ssize_t ret; @@ -57,9 +57,51 @@ static ssize_t ieee80211_if_write( return -EFAULT; buf[count] = '\0'; - rtnl_lock(); - ret = (*write)(data, buf, count); - rtnl_unlock(); + wiphy_lock(sdata->local->hw.wiphy); + ret = (*write)(sdata, buf, count); + wiphy_unlock(sdata->local->hw.wiphy); + + return ret; +} + +static ssize_t ieee80211_if_read_link( + struct ieee80211_link_data *link, + char __user *userbuf, + size_t count, loff_t *ppos, + ssize_t (*format)(const struct ieee80211_link_data *link, char *, int)) +{ + char buf[200]; + ssize_t ret = -EINVAL; + + wiphy_lock(link->sdata->local->hw.wiphy); + ret = (*format)(link, buf, sizeof(buf)); + wiphy_unlock(link->sdata->local->hw.wiphy); + + if (ret >= 0) + ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret); + + return ret; +} + +static ssize_t ieee80211_if_write_link( + struct ieee80211_link_data *link, + const char __user *userbuf, + size_t count, loff_t *ppos, + ssize_t (*write)(struct ieee80211_link_data *link, const char *, int)) +{ + char buf[64]; + ssize_t ret; + + if (count >= sizeof(buf)) + return -E2BIG; + + if (copy_from_user(buf, userbuf, count)) + return -EFAULT; + buf[count] = '\0'; + + wiphy_lock(link->sdata->local->hw.wiphy); + ret = (*write)(link, buf, count); + wiphy_unlock(link->sdata->local->hw.wiphy); return ret; } @@ -126,41 +168,37 @@ static const struct file_operations name##_ops = { \ .llseek = generic_file_llseek, \ } -#define _IEEE80211_IF_FILE_R_FN(name, type) \ +#define _IEEE80211_IF_FILE_R_FN(name) \ static ssize_t ieee80211_if_read_##name(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - ssize_t (*fn)(const void *, char *, int) = (void *) \ - ((ssize_t (*)(const type, char *, int)) \ - ieee80211_if_fmt_##name); \ - return ieee80211_if_read(file->private_data, \ - userbuf, count, ppos, fn); \ + return ieee80211_if_read_sdata(file->private_data, \ + userbuf, count, ppos, \ + ieee80211_if_fmt_##name); \ } -#define _IEEE80211_IF_FILE_W_FN(name, type) \ +#define _IEEE80211_IF_FILE_W_FN(name) \ static ssize_t ieee80211_if_write_##name(struct file *file, \ const char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - ssize_t (*fn)(void *, const char *, int) = (void *) \ - ((ssize_t (*)(type, const char *, int)) \ - ieee80211_if_parse_##name); \ - return ieee80211_if_write(file->private_data, userbuf, count, \ - ppos, fn); \ + return ieee80211_if_write_sdata(file->private_data, userbuf, \ + count, ppos, \ + ieee80211_if_parse_##name); \ } #define IEEE80211_IF_FILE_R(name) \ - _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \ + _IEEE80211_IF_FILE_R_FN(name) \ _IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, NULL) #define IEEE80211_IF_FILE_W(name) \ - _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \ + _IEEE80211_IF_FILE_W_FN(name) \ _IEEE80211_IF_FILE_OPS(name, NULL, ieee80211_if_write_##name) #define IEEE80211_IF_FILE_RW(name) \ - _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \ - _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \ + _IEEE80211_IF_FILE_R_FN(name) \ + _IEEE80211_IF_FILE_W_FN(name) \ _IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, \ ieee80211_if_write_##name) @@ -168,18 +206,37 @@ static ssize_t ieee80211_if_write_##name(struct file *file, \ IEEE80211_IF_FMT_##format(name, struct ieee80211_sub_if_data, field) \ IEEE80211_IF_FILE_R(name) -/* Same but with a link_ prefix in the ops variable name and different type */ +#define _IEEE80211_IF_LINK_R_FN(name) \ +static ssize_t ieee80211_if_read_##name(struct file *file, \ + char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + return ieee80211_if_read_link(file->private_data, \ + userbuf, count, ppos, \ + ieee80211_if_fmt_##name); \ +} + +#define _IEEE80211_IF_LINK_W_FN(name) \ +static ssize_t ieee80211_if_write_##name(struct file *file, \ + const char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + return ieee80211_if_write_link(file->private_data, userbuf, \ + count, ppos, \ + ieee80211_if_parse_##name); \ +} + #define IEEE80211_IF_LINK_FILE_R(name) \ - _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_LINK_R_FN(name) \ _IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, NULL) #define IEEE80211_IF_LINK_FILE_W(name) \ - _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_LINK_W_FN(name) \ _IEEE80211_IF_FILE_OPS(link_##name, NULL, ieee80211_if_write_##name) #define IEEE80211_IF_LINK_FILE_RW(name) \ - _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \ - _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_LINK_R_FN(name) \ + _IEEE80211_IF_LINK_W_FN(name) \ _IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, \ ieee80211_if_write_##name) -- cgit v1.2.3 From e911a8192e29d7326e9062947ecd753c8572eb09 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:31 +0200 Subject: wifi: mac80211: lock wiphy for aggregation debugfs To change aggregation status may call into the driver, lock the wiphy for this. Signed-off-by: Johannes Berg --- net/mac80211/debugfs_sta.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 5a97fb248c85..06e3613bf46b 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -5,7 +5,7 @@ * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018 - 2022 Intel Corporation + * Copyright (C) 2018 - 2023 Intel Corporation */ #include @@ -420,6 +420,7 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu if (ret || tid >= IEEE80211_NUM_TIDS) return -EINVAL; + wiphy_lock(sta->local->hw.wiphy); if (tx) { if (start) ret = ieee80211_start_tx_ba_session(&sta->sta, tid, @@ -431,6 +432,7 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu 3, true); ret = 0; } + wiphy_unlock(sta->local->hw.wiphy); return ret ?: count; } -- cgit v1.2.3 From 7483a2147aebea537e1447489109eeb5e771aff5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:32 +0200 Subject: wifi: cfg80211: check RTNL when iterating devices Add a new "for_each_rdev()" macro and check that we hold the RTNL when calling it. Signed-off-by: Johannes Berg --- net/wireless/chan.c | 2 +- net/wireless/core.c | 6 +++--- net/wireless/core.h | 10 ++++++++++ net/wireless/nl80211.c | 6 +++--- net/wireless/reg.c | 18 +++++++++--------- net/wireless/sme.c | 2 +- 6 files changed, 27 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 0b7e81db383d..3c691ae904d1 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -823,7 +823,7 @@ bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, if (!(chan->flags & IEEE80211_CHAN_RADAR)) return false; - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (!reg_dfs_domain_same(wiphy, &rdev->wiphy)) continue; diff --git a/net/wireless/core.c b/net/wireless/core.c index 25bc2e50a061..fdb9d736a2e8 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -60,7 +60,7 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) ASSERT_RTNL(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (rdev->wiphy_idx == wiphy_idx) { result = rdev; break; @@ -116,7 +116,7 @@ static int cfg80211_dev_check_name(struct cfg80211_registered_device *rdev, } /* Ensure another device does not already have this name. */ - list_for_each_entry(rdev2, &cfg80211_rdev_list, list) + for_each_rdev(rdev2) if (strcmp(newname, wiphy_name(&rdev2->wiphy)) == 0) return -EINVAL; @@ -1601,7 +1601,7 @@ static void __net_exit cfg80211_pernet_exit(struct net *net) struct cfg80211_registered_device *rdev; rtnl_lock(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (net_eq(wiphy_net(&rdev->wiphy), net)) WARN_ON(cfg80211_switch_netns(rdev, &init_net)); } diff --git a/net/wireless/core.h b/net/wireless/core.h index 507d184b8b40..d4976550e7f6 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -160,6 +160,16 @@ extern struct workqueue_struct *cfg80211_wq; extern struct list_head cfg80211_rdev_list; extern int cfg80211_rdev_list_generation; +/* This is constructed like this so it can be used in if/else */ +static inline int for_each_rdev_check_rtnl(void) +{ + ASSERT_RTNL(); + return 0; +} +#define for_each_rdev(rdev) \ + if (for_each_rdev_check_rtnl()) {} else \ + list_for_each_entry(rdev, &cfg80211_rdev_list, list) + struct cfg80211_internal_bss { struct list_head list; struct list_head hidden_list; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index de47838aca4f..f4298104a2f4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -106,7 +106,7 @@ __cfg80211_wdev_from_attrs(struct cfg80211_registered_device *rdev, ASSERT_RTNL(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { struct wireless_dev *wdev; if (wiphy_net(&rdev->wiphy) != netns) @@ -3075,7 +3075,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0] = (long)state; } - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; if (++idx <= state->start) @@ -3985,7 +3985,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * filter_wiphy = cb->args[2] - 1; } - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; if (wp_idx < wp_start) { diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0317cf9da307..f861d1d82b18 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2461,7 +2461,7 @@ static void reg_check_chans_work(struct work_struct *work) pr_debug("Verifying active interfaces after reg change\n"); rtnl_lock(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) + for_each_rdev(rdev) reg_leave_invalid_chans(&rdev->wiphy); rtnl_unlock(); @@ -2515,7 +2515,7 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) ASSERT_RTNL(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { wiphy = &rdev->wiphy; wiphy_update_regulatory(wiphy, initiator); } @@ -2991,7 +2991,7 @@ static void wiphy_all_share_dfs_chan_state(struct wiphy *wiphy) ASSERT_RTNL(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (wiphy == &rdev->wiphy) continue; wiphy_share_dfs_chan_state(wiphy, &rdev->wiphy); @@ -3057,7 +3057,7 @@ static void notify_self_managed_wiphys(struct regulatory_request *request) struct cfg80211_registered_device *rdev; struct wiphy *wiphy; - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { wiphy = &rdev->wiphy; if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && request->initiator == NL80211_REGDOM_SET_BY_USER) @@ -3122,7 +3122,7 @@ static void reg_process_pending_beacon_hints(void) list_del_init(&pending_beacon->list); /* Applies the beacon hint to current wiphys */ - list_for_each_entry(rdev, &cfg80211_rdev_list, list) + for_each_rdev(rdev) wiphy_update_new_beacon(&rdev->wiphy, pending_beacon); /* Remembers the beacon hint for new wiphys or reg changes */ @@ -3177,7 +3177,7 @@ static void reg_process_self_managed_hints(void) ASSERT_RTNL(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { wiphy_lock(&rdev->wiphy); reg_process_self_managed_hint(&rdev->wiphy); wiphy_unlock(&rdev->wiphy); @@ -3517,7 +3517,7 @@ static void restore_regulatory_settings(bool reset_user, bool cached) world_alpha2[0] = cfg80211_world_regdom->alpha2[0]; world_alpha2[1] = cfg80211_world_regdom->alpha2[1]; - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) continue; if (rdev->wiphy.regulatory_flags & REGULATORY_CUSTOM_REG) @@ -3574,7 +3574,7 @@ static bool is_wiphy_all_set_reg_flag(enum ieee80211_regulatory_flags flag) struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { wdev_lock(wdev); if (!(wdev->wiphy->regulatory_flags & flag)) { @@ -4244,7 +4244,7 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy, if (WARN_ON(!cfg80211_chandef_valid(chandef))) return; - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (wiphy == &rdev->wiphy) continue; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 9bba233b5a6e..53ba46f85ceb 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -702,7 +702,7 @@ static bool cfg80211_is_all_idle(void) * need not issue a disconnect hint and reset any info such * as chan dfs state, etc. */ - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { wdev_lock(wdev); if (wdev->conn || wdev->connected || -- cgit v1.2.3 From 56cfb8ce1f7f6c4e5ca571a2ec0880e131cd0311 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:33 +0200 Subject: wifi: cfg80211: add flush functions for wiphy work There may be sometimes reasons to actually run the work if it's pending, add flush functions for both regular and delayed wiphy work that will do this. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 21 +++++++++++++++++++++ net/wireless/core.c | 34 ++++++++++++++++++++++++++++++++-- net/wireless/core.h | 3 ++- net/wireless/sysfs.c | 4 ++-- 4 files changed, 57 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 3a4b684f89bf..d1964a6d0b35 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5826,6 +5826,16 @@ void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work); */ void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work); +/** + * wiphy_work_flush - flush previously queued work + * @wiphy: the wiphy, for debug purposes + * @work: the work to flush, this can be %NULL to flush all work + * + * Flush the work (i.e. run it if pending). This must be called + * under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_work_flush(struct wiphy *wiphy, struct wiphy_work *work); + struct wiphy_delayed_work { struct wiphy_work work; struct wiphy *wiphy; @@ -5869,6 +5879,17 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy, void wiphy_delayed_work_cancel(struct wiphy *wiphy, struct wiphy_delayed_work *dwork); +/** + * wiphy_delayed work_flush - flush previously queued delayed work + * @wiphy: the wiphy, for debug purposes + * @work: the work to flush + * + * Flush the work (i.e. run it if pending). This must be called + * under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_delayed_work_flush(struct wiphy *wiphy, + struct wiphy_delayed_work *dwork); + /** * struct wireless_dev - wireless device state * diff --git a/net/wireless/core.c b/net/wireless/core.c index fdb9d736a2e8..88042a647aaa 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1049,7 +1049,8 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy) } EXPORT_SYMBOL(wiphy_rfkill_start_polling); -void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev) +void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev, + struct wiphy_work *end) { unsigned int runaway_limit = 100; unsigned long flags; @@ -1068,6 +1069,10 @@ void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev) wk->func(&rdev->wiphy, wk); spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + + if (wk == end) + break; + if (WARN_ON(--runaway_limit == 0)) INIT_LIST_HEAD(&rdev->wiphy_work_list); } @@ -1118,7 +1123,7 @@ void wiphy_unregister(struct wiphy *wiphy) #endif /* surely nothing is reachable now, clean up work */ - cfg80211_process_wiphy_works(rdev); + cfg80211_process_wiphy_works(rdev, NULL); wiphy_unlock(&rdev->wiphy); rtnl_unlock(); @@ -1640,6 +1645,21 @@ void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work) } EXPORT_SYMBOL_GPL(wiphy_work_cancel); +void wiphy_work_flush(struct wiphy *wiphy, struct wiphy_work *work) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + unsigned long flags; + bool run; + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + run = !work || !list_empty(&work->entry); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); + + if (run) + cfg80211_process_wiphy_works(rdev, work); +} +EXPORT_SYMBOL_GPL(wiphy_work_flush); + void wiphy_delayed_work_timer(struct timer_list *t) { struct wiphy_delayed_work *dwork = from_timer(dwork, t, timer); @@ -1672,6 +1692,16 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, } EXPORT_SYMBOL_GPL(wiphy_delayed_work_cancel); +void wiphy_delayed_work_flush(struct wiphy *wiphy, + struct wiphy_delayed_work *dwork) +{ + lockdep_assert_held(&wiphy->mtx); + + del_timer_sync(&dwork->timer); + wiphy_work_flush(wiphy, &dwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_delayed_work_flush); + static int __init cfg80211_init(void) { int err; diff --git a/net/wireless/core.h b/net/wireless/core.h index d4976550e7f6..b9876b444e1b 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -469,7 +469,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype ntype, struct vif_params *params); void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); -void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev); +void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev, + struct wiphy_work *end); void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index c629bac3f298..565511a3f461 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -105,14 +105,14 @@ static int wiphy_suspend(struct device *dev) cfg80211_leave_all(rdev); cfg80211_process_rdev_events(rdev); } - cfg80211_process_wiphy_works(rdev); + cfg80211_process_wiphy_works(rdev, NULL); if (rdev->ops->suspend) ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); if (ret == 1) { /* Driver refuse to configure wowlan */ cfg80211_leave_all(rdev); cfg80211_process_rdev_events(rdev); - cfg80211_process_wiphy_works(rdev); + cfg80211_process_wiphy_works(rdev, NULL); ret = rdev_suspend(rdev, NULL); } if (ret == 0) -- cgit v1.2.3 From b920590f9a7f7359d37d260726b32dbb21b833be Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:34 +0200 Subject: wifi: mac80211: flush wiphy work where appropriate Before converting more works to wiphy work, add flushing in mac80211 where we also flush the mac80211 workqueue. Not needed in suspend since cfg80211 will have taken care of it. Signed-off-by: Johannes Berg --- net/mac80211/main.c | 1 + net/mac80211/util.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 24315d7b3126..aeb21cfe789a 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -343,6 +343,7 @@ static void ieee80211_restart_work(struct work_struct *work) rtnl_lock(); /* we might do interface manipulations, so need both */ wiphy_lock(local->hw.wiphy); + wiphy_work_flush(local->hw.wiphy, NULL); WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), "%s called with hardware scan in progress\n", __func__); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 8a6917cf63cf..6d8b73796dc5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2319,6 +2319,7 @@ void ieee80211_stop_device(struct ieee80211_local *local) cancel_work_sync(&local->reconfig_filter); flush_workqueue(local->workqueue); + wiphy_work_flush(local->hw.wiphy, NULL); drv_stop(local); } -- cgit v1.2.3 From 1b6721189570b7955ff745934fb0e428313e6e51 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:35 +0200 Subject: wifi: mac80211: convert A-MPDU work to wiphy work Convert the A-MPDU work to wiphy work so it holds the wiphy mutex and we can later guarantee that to drivers. It might seem that we could run these concurrently for different stations, but they're all on the ordered mac80211 workqueue, so this shouldn't matter for that. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 12 +++++------- net/mac80211/agg-tx.c | 8 ++++---- net/mac80211/ht.c | 6 +++--- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/sta_info.c | 2 +- net/mac80211/sta_info.h | 2 +- 6 files changed, 15 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index c6fa53230450..a686f1ce66cb 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation */ /** @@ -140,7 +140,7 @@ void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap, if (ba_rx_bitmap & BIT(i)) set_bit(i, sta->ampdu_mlme.tid_rx_stop_requested); - ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(sta->local->hw.wiphy, &sta->ampdu_mlme.work); rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_stop_rx_ba_session); @@ -166,7 +166,7 @@ static void sta_rx_agg_session_timer_expired(struct timer_list *t) sta->sta.addr, tid); set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired); - ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(sta->local->hw.wiphy, &sta->ampdu_mlme.work); } static void sta_rx_agg_reorder_timer_expired(struct timer_list *t) @@ -507,7 +507,6 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, const u8 *addr, unsigned int tid) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_local *local = sdata->local; struct sta_info *sta; rcu_read_lock(); @@ -516,7 +515,7 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, goto unlock; set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl); - ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(sta->local->hw.wiphy, &sta->ampdu_mlme.work); unlock: rcu_read_unlock(); } @@ -526,7 +525,6 @@ void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif, const u8 *addr, unsigned int tid) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_local *local = sdata->local; struct sta_info *sta; rcu_read_lock(); @@ -535,7 +533,7 @@ void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif, goto unlock; set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired); - ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(sta->local->hw.wiphy, &sta->ampdu_mlme.work); unlock: rcu_read_unlock(); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index b6b772685881..3da0c55f13e2 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -743,7 +743,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, */ sta->ampdu_mlme.tid_start_tx[tid] = tid_tx; - ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work); /* this flow continues off the work */ err_unlock_sta: @@ -862,7 +862,7 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, goto out; set_bit(HT_AGG_STATE_START_CB, &tid_tx->state); - ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work); out: rcu_read_unlock(); } @@ -916,7 +916,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) } set_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state); - ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work); unlock: spin_unlock_bh(&sta->lock); @@ -976,7 +976,7 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, goto out; set_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state); - ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work); out: rcu_read_unlock(); } diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 05f98f0a91a8..e8feed05528a 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -333,7 +333,7 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, * the BA session, so handle it to properly clean tid_tx data. */ if(reason == AGG_STOP_DESTROY_STA) { - cancel_work_sync(&sta->ampdu_mlme.work); + wiphy_work_cancel(sta->local->hw.wiphy, &sta->ampdu_mlme.work); mutex_lock(&sta->ampdu_mlme.mtx); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { @@ -350,7 +350,7 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, } } -void ieee80211_ba_session_work(struct work_struct *work) +void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work) { struct sta_info *sta = container_of(work, struct sta_info, ampdu_mlme.work); @@ -416,7 +416,7 @@ void ieee80211_ba_session_work(struct work_struct *work) mutex_unlock(&sta->ampdu_mlme.mtx); - ieee80211_queue_work(&sdata->local->hw, work); + wiphy_work_queue(sdata->local->hw.wiphy, work); return; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 338ab9e6e6b1..7604e43a441c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2134,7 +2134,7 @@ void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx); void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx); -void ieee80211_ba_session_work(struct work_struct *work); +void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work); void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid); void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 7751f8ba960e..b68bf77b05d0 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -556,7 +556,7 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, spin_lock_init(&sta->lock); spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); - INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); + wiphy_work_init(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); #ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&sdata->vif)) { diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 195b563132d6..1deab7e33a7c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -291,7 +291,7 @@ struct sta_ampdu_mlme { unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long unexpected_agg[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; /* tx */ - struct work_struct work; + struct wiphy_work work; struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS]; struct tid_ampdu_tx *tid_start_tx[IEEE80211_NUM_TIDS]; unsigned long last_addba_req_time[IEEE80211_NUM_TIDS]; -- cgit v1.2.3 From 5549b0885d6fbad79e0e471a9a863bd2f45af0c5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:36 +0200 Subject: wifi: mac80211: add more ops assertions Add more might_sleep() checks and check sdata-in-driver for one additional place. type=feature ticket=jira:WIFI-314309 Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.c | 2 ++ net/mac80211/driver-ops.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'net') diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 30cd0c905a24..376dae58b5a6 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -285,6 +285,8 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local, { int ret = 0; + might_sleep(); + drv_verify_link_exists(sdata, link_conf); if (!check_sdata_in_driver(sdata)) return -EIO; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index c4505593ba7a..d95ff2282f54 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -40,6 +40,8 @@ static inline void drv_tx(struct ieee80211_local *local, static inline void drv_sync_rx_queues(struct ieee80211_local *local, struct sta_info *sta) { + might_sleep(); + if (local->ops->sync_rx_queues) { trace_drv_sync_rx_queues(local, sta->sdata, &sta->sta); local->ops->sync_rx_queues(&local->hw); @@ -569,6 +571,8 @@ static inline void drv_sta_statistics(struct ieee80211_local *local, struct ieee80211_sta *sta, struct station_info *sinfo) { + might_sleep(); + sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) return; @@ -616,6 +620,8 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx, { int ret = -EOPNOTSUPP; + might_sleep(); + trace_drv_get_survey(local, idx, survey); if (local->ops->get_survey) @@ -797,6 +803,8 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_gtk_rekey_data *data) { + might_sleep(); + if (!check_sdata_in_driver(sdata)) return; @@ -987,6 +995,8 @@ static inline void drv_stop_ap(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf) { + might_sleep(); + /* make sure link_conf is protected */ drv_verify_link_exists(sdata, link_conf); @@ -1016,6 +1026,8 @@ drv_set_default_unicast_key(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, int key_idx) { + might_sleep(); + if (!check_sdata_in_driver(sdata)) return; @@ -1046,6 +1058,8 @@ drv_channel_switch_beacon(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; + might_sleep(); + if (local->ops->channel_switch_beacon) { trace_drv_channel_switch_beacon(local, sdata, chandef); local->ops->channel_switch_beacon(&local->hw, &sdata->vif, @@ -1060,6 +1074,8 @@ drv_pre_channel_switch(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; int ret = 0; + might_sleep(); + if (!check_sdata_in_driver(sdata)) return -EIO; @@ -1077,6 +1093,8 @@ drv_post_channel_switch(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; int ret = 0; + might_sleep(); + if (!check_sdata_in_driver(sdata)) return -EIO; @@ -1092,6 +1110,8 @@ drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; + might_sleep(); + if (!check_sdata_in_driver(sdata)) return; @@ -1107,6 +1127,8 @@ drv_channel_switch_rx_beacon(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; + might_sleep(); + if (!check_sdata_in_driver(sdata)) return; @@ -1163,6 +1185,8 @@ static inline int drv_get_txpower(struct ieee80211_local *local, { int ret; + might_sleep(); + if (!local->ops->get_txpower) return -EOPNOTSUPP; @@ -1267,6 +1291,10 @@ drv_get_ftm_responder_stats(struct ieee80211_local *local, { u32 ret = -EOPNOTSUPP; + might_sleep(); + if (!check_sdata_in_driver(sdata)) + return -EIO; + if (local->ops->get_ftm_responder_stats) ret = local->ops->get_ftm_responder_stats(&local->hw, &sdata->vif, @@ -1436,6 +1464,8 @@ static inline void drv_sta_set_4addr(struct ieee80211_local *local, struct ieee80211_sta *sta, bool enabled) { sdata = get_bss_sdata(sdata); + + might_sleep(); if (!check_sdata_in_driver(sdata)) return; @@ -1451,6 +1481,8 @@ static inline void drv_sta_set_decap_offload(struct ieee80211_local *local, bool enabled) { sdata = get_bss_sdata(sdata); + + might_sleep(); if (!check_sdata_in_driver(sdata)) return; @@ -1526,6 +1558,8 @@ static inline int drv_net_setup_tc(struct ieee80211_local *local, { int ret = -EOPNOTSUPP; + might_sleep(); + sdata = get_bss_sdata(sdata); trace_drv_net_setup_tc(local, sdata, type); if (local->ops->net_setup_tc) -- cgit v1.2.3 From 766d2601a6e50b52c5dcc47dce6d64faa2cffb30 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:37 +0200 Subject: wifi: mac80211: move DFS CAC work to wiphy work Move the DFS CAC work over to hold the wiphy lock there without worry about work cancellation. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 10 +++++----- net/mac80211/ieee80211_i.h | 4 ++-- net/mac80211/iface.c | 3 ++- net/mac80211/link.c | 4 ++-- net/mac80211/mlme.c | 7 +++---- net/mac80211/util.c | 3 ++- 6 files changed, 16 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fa20a260f9c8..36e3515bf8d9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1643,7 +1643,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, if (sdata->wdev.cac_started) { chandef = link_conf->chandef; - cancel_delayed_work_sync(&link->dfs_cac_timer_work); + wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); @@ -3424,9 +3424,8 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, if (err) goto out_unlock; - ieee80211_queue_delayed_work(&sdata->local->hw, - &sdata->deflink.dfs_cac_timer_work, - msecs_to_jiffies(cac_time_ms)); + wiphy_delayed_work_queue(wiphy, &sdata->deflink.dfs_cac_timer_work, + msecs_to_jiffies(cac_time_ms)); out_unlock: mutex_unlock(&local->mtx); @@ -3445,7 +3444,8 @@ static void ieee80211_end_cac(struct wiphy *wiphy, * by the time it gets it, sdata->wdev.cac_started * will no longer be true */ - cancel_delayed_work(&sdata->deflink.dfs_cac_timer_work); + wiphy_delayed_work_cancel(wiphy, + &sdata->deflink.dfs_cac_timer_work); if (sdata->wdev.cac_started) { ieee80211_link_release_channel(&sdata->deflink); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7604e43a441c..0d9aff6a4fc8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1014,7 +1014,7 @@ struct ieee80211_link_data { int ap_power_level; /* in dBm */ bool radar_required; - struct delayed_work dfs_cac_timer_work; + struct wiphy_delayed_work dfs_cac_timer_work; union { struct ieee80211_link_data_managed mgd; @@ -2569,7 +2569,7 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, struct ieee80211_link_data *rsvd_for); bool ieee80211_is_radar_required(struct ieee80211_local *local); -void ieee80211_dfs_cac_timer_work(struct work_struct *work); +void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work); void ieee80211_dfs_cac_cancel(struct ieee80211_local *local); void ieee80211_dfs_radar_detected_work(struct work_struct *work); int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 4beab027e0f9..a8c08424c015 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -538,7 +538,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do cancel_work_sync(&sdata->deflink.csa_finalize_work); cancel_work_sync(&sdata->deflink.color_change_finalize_work); - cancel_delayed_work_sync(&sdata->deflink.dfs_cac_timer_work); + wiphy_delayed_work_cancel(local->hw.wiphy, + &sdata->deflink.dfs_cac_timer_work); if (sdata->wdev.cac_started) { chandef = sdata->vif.bss_conf.chandef; diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 6148208b320e..748d222e8d3d 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -45,8 +45,8 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, ieee80211_color_collision_detection_work); INIT_LIST_HEAD(&link->assigned_chanctx_list); INIT_LIST_HEAD(&link->reserved_chanctx_list); - INIT_DELAYED_WORK(&link->dfs_cac_timer_work, - ieee80211_dfs_cac_timer_work); + wiphy_delayed_work_init(&link->dfs_cac_timer_work, + ieee80211_dfs_cac_timer_work); if (!deflink) { switch (sdata->vif.type) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 65d3e167132c..5644e25ec5fe 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2401,12 +2401,11 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t) ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work); } -void ieee80211_dfs_cac_timer_work(struct work_struct *work) +void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work) { - struct delayed_work *delayed_work = to_delayed_work(work); struct ieee80211_link_data *link = - container_of(delayed_work, struct ieee80211_link_data, - dfs_cac_timer_work); + container_of(work, struct ieee80211_link_data, + dfs_cac_timer_work.work); struct cfg80211_chan_def chandef = link->conf->chandef; struct ieee80211_sub_if_data *sdata = link->sdata; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6d8b73796dc5..ff99aee46656 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -4343,7 +4343,8 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) * by the time it gets it, sdata->wdev.cac_started * will no longer be true */ - cancel_delayed_work(&sdata->deflink.dfs_cac_timer_work); + wiphy_delayed_work_cancel(local->hw.wiphy, + &sdata->deflink.dfs_cac_timer_work); if (sdata->wdev.cac_started) { chandef = sdata->vif.bss_conf.chandef; -- cgit v1.2.3 From 228e4f931b0e630dacca8dd867ddd863aea53913 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:38 +0200 Subject: wifi: mac80211: move radar detect work to wiphy work Move the radar detect work to wiphy work in order to lock the wiphy for it without doing it manually. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 5 +++-- net/mac80211/main.c | 9 +++++---- net/mac80211/util.c | 7 +++---- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0d9aff6a4fc8..9be569ade9f7 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1412,7 +1412,7 @@ struct ieee80211_local { /* wowlan is enabled -- don't reconfig on resume */ bool wowlan; - struct work_struct radar_detected_work; + struct wiphy_work radar_detected_work; /* number of RX chains the hardware has */ u8 rx_chains; @@ -2571,7 +2571,8 @@ bool ieee80211_is_radar_required(struct ieee80211_local *local); void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work); void ieee80211_dfs_cac_cancel(struct ieee80211_local *local); -void ieee80211_dfs_radar_detected_work(struct work_struct *work); +void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, + struct wiphy_work *work); int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings *csa_settings); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index aeb21cfe789a..b3c3b031b5b8 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -338,7 +338,6 @@ static void ieee80211_restart_work(struct work_struct *work) /* wait for scan work complete */ flush_workqueue(local->workqueue); flush_work(&local->sched_scan_stopped_work); - flush_work(&local->radar_detected_work); rtnl_lock(); /* we might do interface manipulations, so need both */ @@ -814,8 +813,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, INIT_WORK(&local->restart_work, ieee80211_restart_work); - INIT_WORK(&local->radar_detected_work, - ieee80211_dfs_radar_detected_work); + wiphy_work_init(&local->radar_detected_work, + ieee80211_dfs_radar_detected_work); INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter); local->smps_mode = IEEE80211_SMPS_OFF; @@ -1483,13 +1482,15 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) */ ieee80211_remove_interfaces(local); + wiphy_lock(local->hw.wiphy); + wiphy_work_cancel(local->hw.wiphy, &local->radar_detected_work); + wiphy_unlock(local->hw.wiphy); rtnl_unlock(); cancel_delayed_work_sync(&local->roc_work); cancel_work_sync(&local->restart_work); cancel_work_sync(&local->reconfig_filter); flush_work(&local->sched_scan_stopped_work); - flush_work(&local->radar_detected_work); ieee80211_clear_tx_pending(local); rate_control_deinitialize(local); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ff99aee46656..701f5b4eadae 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -4358,7 +4358,8 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) mutex_unlock(&local->mtx); } -void ieee80211_dfs_radar_detected_work(struct work_struct *work) +void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, radar_detected_work); @@ -4376,9 +4377,7 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work) } mutex_unlock(&local->chanctx_mtx); - wiphy_lock(local->hw.wiphy); ieee80211_dfs_cac_cancel(local); - wiphy_unlock(local->hw.wiphy); if (num_chanctx > 1) /* XXX: multi-channel is not supported yet */ @@ -4393,7 +4392,7 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw) trace_api_radar_detected(local); - schedule_work(&local->radar_detected_work); + wiphy_work_queue(hw->wiphy, &local->radar_detected_work); } EXPORT_SYMBOL(ieee80211_radar_detected); -- cgit v1.2.3 From 201712512cbbda360f62c222a4bab260350462a0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:39 +0200 Subject: wifi: mac80211: move scan work to wiphy work Move the scan work to wiphy work, which also simplifies the way we handle the work vs. the scan configuration. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 4 ++-- net/mac80211/iface.c | 2 +- net/mac80211/main.c | 4 +--- net/mac80211/scan.c | 29 ++++++++++++----------------- net/mac80211/util.c | 4 ++-- 5 files changed, 18 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9be569ade9f7..72955758a846 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1496,7 +1496,7 @@ struct ieee80211_local { unsigned long leave_oper_channel_time; enum mac80211_scan_state next_scan_state; - struct delayed_work scan_work; + struct wiphy_delayed_work scan_work; struct ieee80211_sub_if_data __rcu *scan_sdata; /* For backward compatibility only -- do not use */ struct cfg80211_chan_def _oper_chandef; @@ -1935,7 +1935,7 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed); /* scan/BSS handling */ -void ieee80211_scan_work(struct work_struct *work); +void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work); int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, const u8 *ssid, u8 ssid_len, struct ieee80211_channel **channels, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index a8c08424c015..42e130a6aee9 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -690,7 +690,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do ieee80211_recalc_ps(local); if (cancel_scan) - flush_delayed_work(&local->scan_work); + wiphy_delayed_work_flush(local->hw.wiphy, &local->scan_work); if (local->open_count == 0) { ieee80211_stop_device(local); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index b3c3b031b5b8..b6c089648441 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -335,9 +335,7 @@ static void ieee80211_restart_work(struct work_struct *work) struct ieee80211_sub_if_data *sdata; int ret; - /* wait for scan work complete */ flush_workqueue(local->workqueue); - flush_work(&local->sched_scan_stopped_work); rtnl_lock(); /* we might do interface manipulations, so need both */ @@ -809,7 +807,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, INIT_LIST_HEAD(&local->chanctx_list); mutex_init(&local->chanctx_mtx); - INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); + wiphy_delayed_work_init(&local->scan_work, ieee80211_scan_work); INIT_WORK(&local->restart_work, ieee80211_restart_work); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 0805aa8603c6..2117cb2a916a 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -274,8 +274,8 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) * the beacon/proberesp rx gives us an opportunity to upgrade * to active scan */ - set_bit(SCAN_BEACON_DONE, &local->scanning); - ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); + set_bit(SCAN_BEACON_DONE, &local->scanning); + wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0); } if (ieee80211_is_probe_resp(mgmt->frame_control)) { @@ -505,7 +505,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, memcpy(&local->scan_info, info, sizeof(*info)); - ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); + wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0); } EXPORT_SYMBOL(ieee80211_scan_completed); @@ -545,8 +545,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local, /* We need to set power level at maximum rate for scanning. */ ieee80211_hw_config(local, 0); - ieee80211_queue_delayed_work(&local->hw, - &local->scan_work, 0); + wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0); return 0; } @@ -603,8 +602,8 @@ void ieee80211_run_deferred_scan(struct ieee80211_local *local) lockdep_is_held(&local->mtx)))) return; - ieee80211_queue_delayed_work(&local->hw, &local->scan_work, - round_jiffies_relative(0)); + wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, + round_jiffies_relative(0)); } static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, @@ -795,8 +794,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, } /* Now, just wait a bit and we are all done! */ - ieee80211_queue_delayed_work(&local->hw, &local->scan_work, - next_delay); + wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, + next_delay); return 0; } else { /* Do normal software scan */ @@ -1043,7 +1042,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local, local->next_scan_state = SCAN_SET_CHANNEL; } -void ieee80211_scan_work(struct work_struct *work) +void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, scan_work.work); @@ -1137,7 +1136,8 @@ void ieee80211_scan_work(struct work_struct *work) } } while (next_delay == 0); - ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay); + wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, + next_delay); goto out; out_complete: @@ -1280,12 +1280,7 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) goto out; } - /* - * If the work is currently running, it must be blocked on - * the mutex, but we'll set scan_sdata = NULL and it'll - * simply exit once it acquires the mutex. - */ - cancel_delayed_work(&local->scan_work); + wiphy_delayed_work_cancel(local->hw.wiphy, &local->scan_work); /* and clean up */ memset(&local->scan_info, 0, sizeof(local->scan_info)); __ieee80211_scan_completed(&local->hw, true); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 701f5b4eadae..2815f54c5aa1 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2341,8 +2341,8 @@ static void ieee80211_flush_completed_scan(struct ieee80211_local *local, */ if (aborted) set_bit(SCAN_ABORTED, &local->scanning); - ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); - flush_delayed_work(&local->scan_work); + wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0); + wiphy_delayed_work_flush(local->hw.wiphy, &local->scan_work); } } -- cgit v1.2.3 From ac2f7d6f2765a10b5075e0024706df7e845e7890 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:40 +0200 Subject: wifi: mac80211: move monitor work to wiphy work Again this serves to simplify the locking in mac80211 in the future, since this is a relatively complex work. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 72955758a846..1cb29a67a2c7 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -473,7 +473,7 @@ struct ieee80211_if_managed { struct timer_list timer; struct timer_list conn_mon_timer; struct timer_list bcn_mon_timer; - struct work_struct monitor_work; + struct wiphy_work monitor_work; struct wiphy_work beacon_connection_loss_work; struct wiphy_work csa_connection_drop_work; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5644e25ec5fe..ab0be5c30860 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6683,10 +6683,11 @@ static void ieee80211_sta_conn_mon_timer(struct timer_list *t) return; } - ieee80211_queue_work(&local->hw, &ifmgd->monitor_work); + wiphy_work_queue(local->hw.wiphy, &sdata->u.mgd.monitor_work); } -static void ieee80211_sta_monitor_work(struct work_struct *work) +static void ieee80211_sta_monitor_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, @@ -6702,8 +6703,8 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) /* let's probe the connection once */ if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) - ieee80211_queue_work(&sdata->local->hw, - &sdata->u.mgd.monitor_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &sdata->u.mgd.monitor_work); } } @@ -6821,7 +6822,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work); + wiphy_work_init(&ifmgd->monitor_work, ieee80211_sta_monitor_work); wiphy_work_init(&ifmgd->beacon_connection_loss_work, ieee80211_beacon_connection_loss_work); wiphy_work_init(&ifmgd->csa_connection_drop_work, @@ -7850,7 +7851,8 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) * they will not do anything but might not have been * cancelled when disconnecting. */ - cancel_work_sync(&ifmgd->monitor_work); + wiphy_work_cancel(sdata->local->hw.wiphy, + &ifmgd->monitor_work); wiphy_work_cancel(sdata->local->hw.wiphy, &ifmgd->beacon_connection_loss_work); wiphy_work_cancel(sdata->local->hw.wiphy, -- cgit v1.2.3 From 730538edc8e0eb14b02708f65100a0deaf43e6cd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:41 +0200 Subject: wifi: mac80211: lock wiphy in IP address notifier Lock the wiphy in the IP address notifier as another place that should have it locked before calling into the driver. This needs a bit of attention since the notifier can be called while the wiphy is already locked, when we remove an interface. Handle this by not running the notifier in this case, and instead calling out to the driver directly. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 14 ++++++++++++++ net/mac80211/main.c | 22 +++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 42e130a6aee9..c0539e78a34e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -2284,6 +2284,20 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) list_for_each_entry_safe(sdata, tmp, &unreg_list, list) { bool netdev = sdata->dev; + /* + * Remove IP addresses explicitly, since the notifier will + * skip the callbacks if wdev->registered is false, since + * we can't acquire the wiphy_lock() again there if already + * inside this locked section. + */ + sdata_lock(sdata); + sdata->vif.cfg.arp_addr_cnt = 0; + if (sdata->vif.type == NL80211_IFTYPE_STATION && + sdata->u.mgd.associated) + ieee80211_vif_cfg_change_notify(sdata, + BSS_CHANGED_ARP_FILTER); + sdata_unlock(sdata); + list_del(&sdata->list); cfg80211_unregister_wdev(&sdata->wdev); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index b6c089648441..f6860889fbdd 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -437,7 +437,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, if (!wdev) return NOTIFY_DONE; - if (wdev->wiphy != local->hw.wiphy) + if (wdev->wiphy != local->hw.wiphy || !wdev->registered) return NOTIFY_DONE; sdata = IEEE80211_DEV_TO_SUB_IF(ndev); @@ -452,6 +452,25 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, return NOTIFY_DONE; ifmgd = &sdata->u.mgd; + + /* + * The nested here is needed to convince lockdep that this is + * all OK. Yes, we lock the wiphy mutex here while we already + * hold the notifier rwsem, that's the normal case. And yes, + * we also acquire the notifier rwsem again when unregistering + * a netdev while we already hold the wiphy mutex, so it does + * look like a typical ABBA deadlock. + * + * However, both of these things happen with the RTNL held + * already. Therefore, they can't actually happen, since the + * lock orders really are ABC and ACB, which is fine due to + * the RTNL (A). + * + * We still need to prevent recursion, which is accomplished + * by the !wdev->registered check above. + */ + mutex_lock_nested(&local->hw.wiphy->mtx, 1); + __acquire(&local->hw.wiphy->mtx); sdata_lock(sdata); /* Copy the addresses to the vif config list */ @@ -470,6 +489,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_ARP_FILTER); sdata_unlock(sdata); + wiphy_unlock(local->hw.wiphy); return NOTIFY_OK; } -- cgit v1.2.3 From 97c19e42b264e6b71a9ff9deea04c19f621805b9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:42 +0200 Subject: wifi: mac80211: move offchannel works to wiphy work Make the offchannel works wiphy works to have the wiphy locked for executing them. Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 4 ++-- net/mac80211/main.c | 6 +++--- net/mac80211/offchannel.c | 36 ++++++++++++++++++------------------ 3 files changed, 23 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1cb29a67a2c7..6d789087b453 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1589,9 +1589,9 @@ struct ieee80211_local { /* * Remain-on-channel support */ - struct delayed_work roc_work; + struct wiphy_delayed_work roc_work; struct list_head roc_list; - struct work_struct hw_roc_start, hw_roc_done; + struct wiphy_work hw_roc_start, hw_roc_done; unsigned long hw_roc_start_time; u64 roc_cookie_counter; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index f6860889fbdd..e686b0cc2cd8 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -377,8 +377,8 @@ static void ieee80211_restart_work(struct work_struct *work) ieee80211_scan_cancel(local); /* make sure any new ROC will consider local->in_reconfig */ - flush_delayed_work(&local->roc_work); - flush_work(&local->hw_roc_done); + wiphy_delayed_work_flush(local->hw.wiphy, &local->roc_work); + wiphy_work_flush(local->hw.wiphy, &local->hw_roc_done); /* wait for all packet processing to be done */ synchronize_net(); @@ -1501,11 +1501,11 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) ieee80211_remove_interfaces(local); wiphy_lock(local->hw.wiphy); + wiphy_delayed_work_cancel(local->hw.wiphy, &local->roc_work); wiphy_work_cancel(local->hw.wiphy, &local->radar_detected_work); wiphy_unlock(local->hw.wiphy); rtnl_unlock(); - cancel_delayed_work_sync(&local->roc_work); cancel_work_sync(&local->restart_work); cancel_work_sync(&local->reconfig_filter); flush_work(&local->sched_scan_stopped_work); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index cdf991e74ab9..5bedd9cef414 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -230,7 +230,7 @@ static bool ieee80211_recalc_sw_work(struct ieee80211_local *local, if (dur == LONG_MAX) return false; - mod_delayed_work(local->workqueue, &local->roc_work, dur); + wiphy_delayed_work_queue(local->hw.wiphy, &local->roc_work, dur); return true; } @@ -258,7 +258,7 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc, roc->notified = true; } -static void ieee80211_hw_roc_start(struct work_struct *work) +static void ieee80211_hw_roc_start(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, hw_roc_start); @@ -285,7 +285,7 @@ void ieee80211_ready_on_channel(struct ieee80211_hw *hw) trace_api_ready_on_channel(local); - ieee80211_queue_work(hw, &local->hw_roc_start); + wiphy_work_queue(hw->wiphy, &local->hw_roc_start); } EXPORT_SYMBOL_GPL(ieee80211_ready_on_channel); @@ -338,7 +338,7 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local) tmp->started = true; tmp->abort = true; } - ieee80211_queue_work(&local->hw, &local->hw_roc_done); + wiphy_work_queue(local->hw.wiphy, &local->hw_roc_done); return; } @@ -368,8 +368,8 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local) ieee80211_hw_config(local, 0); } - ieee80211_queue_delayed_work(&local->hw, &local->roc_work, - msecs_to_jiffies(min_dur)); + wiphy_delayed_work_queue(local->hw.wiphy, &local->roc_work, + msecs_to_jiffies(min_dur)); /* tell userspace or send frame(s) */ list_for_each_entry(tmp, &local->roc_list, list) { @@ -407,8 +407,8 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) _ieee80211_start_next_roc(local); } else { /* delay it a bit */ - ieee80211_queue_delayed_work(&local->hw, &local->roc_work, - round_jiffies_relative(HZ/2)); + wiphy_delayed_work_queue(local->hw.wiphy, &local->roc_work, + round_jiffies_relative(HZ / 2)); } } @@ -451,7 +451,7 @@ static void __ieee80211_roc_work(struct ieee80211_local *local) } } -static void ieee80211_roc_work(struct work_struct *work) +static void ieee80211_roc_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, roc_work.work); @@ -461,7 +461,7 @@ static void ieee80211_roc_work(struct work_struct *work) mutex_unlock(&local->mtx); } -static void ieee80211_hw_roc_done(struct work_struct *work) +static void ieee80211_hw_roc_done(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, hw_roc_done); @@ -482,7 +482,7 @@ void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw) trace_api_remain_on_channel_expired(local); - ieee80211_queue_work(hw, &local->hw_roc_done); + wiphy_work_queue(hw->wiphy, &local->hw_roc_done); } EXPORT_SYMBOL_GPL(ieee80211_remain_on_channel_expired); @@ -586,8 +586,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, /* if not HW assist, just queue & schedule work */ if (!local->ops->remain_on_channel) { list_add_tail(&roc->list, &local->roc_list); - ieee80211_queue_delayed_work(&local->hw, - &local->roc_work, 0); + wiphy_delayed_work_queue(local->hw.wiphy, + &local->roc_work, 0); } else { /* otherwise actually kick it off here * (for error handling) @@ -695,7 +695,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, if (!cookie) return -ENOENT; - flush_work(&local->hw_roc_start); + wiphy_work_flush(local->hw.wiphy, &local->hw_roc_start); mutex_lock(&local->mtx); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { @@ -745,7 +745,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, } else { /* go through work struct to return to the operating channel */ found->abort = true; - mod_delayed_work(local->workqueue, &local->roc_work, 0); + wiphy_delayed_work_queue(local->hw.wiphy, &local->roc_work, 0); } out_unlock: @@ -994,9 +994,9 @@ int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, void ieee80211_roc_setup(struct ieee80211_local *local) { - INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start); - INIT_WORK(&local->hw_roc_done, ieee80211_hw_roc_done); - INIT_DELAYED_WORK(&local->roc_work, ieee80211_roc_work); + wiphy_work_init(&local->hw_roc_start, ieee80211_hw_roc_start); + wiphy_work_init(&local->hw_roc_done, ieee80211_hw_roc_done); + wiphy_delayed_work_init(&local->roc_work, ieee80211_roc_work); INIT_LIST_HEAD(&local->roc_list); } -- cgit v1.2.3 From 7206a948715414371a285db8f1aab050f7a3941e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:43 +0200 Subject: wifi: mac80211: move link activation work to wiphy work We want to have the wiphy locked for these as well, so move it to be a wiphy work. Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/iface.c | 10 ++++++---- net/mac80211/link.c | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6d789087b453..3db1accb0903 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1112,7 +1112,7 @@ struct ieee80211_sub_if_data { struct ieee80211_link_data __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; /* for ieee80211_set_active_links_async() */ - struct work_struct activate_links_work; + struct wiphy_work activate_links_work; u16 desired_active_links; #ifdef CONFIG_MAC80211_DEBUGFS diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index c0539e78a34e..fa4514642d74 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -749,9 +749,9 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_stop_mbssid(sdata); } - cancel_work_sync(&sdata->activate_links_work); - wiphy_lock(sdata->local->hw.wiphy); + wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->activate_links_work); + ieee80211_do_stop(sdata, true); wiphy_unlock(sdata->local->hw.wiphy); @@ -1691,7 +1691,8 @@ static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work) } } -static void ieee80211_activate_links_work(struct work_struct *work) +static void ieee80211_activate_links_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, @@ -1736,7 +1737,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, skb_queue_head_init(&sdata->skb_queue); skb_queue_head_init(&sdata->status_queue); wiphy_work_init(&sdata->work, ieee80211_iface_work); - INIT_WORK(&sdata->activate_links_work, ieee80211_activate_links_work); + wiphy_work_init(&sdata->activate_links_work, + ieee80211_activate_links_work); switch (type) { case NL80211_IFTYPE_P2P_GO: diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 748d222e8d3d..bcff8a909405 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -512,6 +512,6 @@ void ieee80211_set_active_links_async(struct ieee80211_vif *vif, return; sdata->desired_active_links = active_links; - schedule_work(&sdata->activate_links_work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->activate_links_work); } EXPORT_SYMBOL_GPL(ieee80211_set_active_links_async); -- cgit v1.2.3 From 9fa659f9f4a2af348f3075f539dde3ceeb9fc9b6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:44 +0200 Subject: wifi: mac80211: move dynamic PS to wiphy work Along with everything else, move the dynamic PS work to be a wiphy work, to simplify locking later. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 10 ++++++---- net/mac80211/iface.c | 2 +- net/mac80211/main.c | 8 ++++---- net/mac80211/mlme.c | 13 ++++++++----- net/mac80211/offchannel.c | 2 +- net/mac80211/pm.c | 4 ++-- net/mac80211/tx.c | 4 ++-- 7 files changed, 24 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3db1accb0903..96cecc3d71f0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1560,8 +1560,8 @@ struct ieee80211_local { * interface (and monitors) in PS, this then points there. */ struct ieee80211_sub_if_data *ps_sdata; - struct work_struct dynamic_ps_enable_work; - struct work_struct dynamic_ps_disable_work; + struct wiphy_work dynamic_ps_enable_work; + struct wiphy_work dynamic_ps_disable_work; struct timer_list dynamic_ps_timer; struct notifier_block ifa_notifier; struct notifier_block ifa6_notifier; @@ -2348,8 +2348,10 @@ static inline int ieee80211_ac_from_tid(int tid) return ieee802_1d_to_ac[tid & 7]; } -void ieee80211_dynamic_ps_enable_work(struct work_struct *work); -void ieee80211_dynamic_ps_disable_work(struct work_struct *work); +void ieee80211_dynamic_ps_enable_work(struct wiphy *wiphy, + struct wiphy_work *work); +void ieee80211_dynamic_ps_disable_work(struct wiphy *wiphy, + struct wiphy_work *work); void ieee80211_dynamic_ps_timer(struct timer_list *t); void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index fa4514642d74..6dc6cdc1cd37 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -516,7 +516,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do } del_timer_sync(&local->dynamic_ps_timer); - cancel_work_sync(&local->dynamic_ps_enable_work); + wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); sdata_lock(sdata); WARN(ieee80211_vif_is_mld(&sdata->vif), diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e686b0cc2cd8..7c165e78115c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -837,10 +837,10 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter); local->smps_mode = IEEE80211_SMPS_OFF; - INIT_WORK(&local->dynamic_ps_enable_work, - ieee80211_dynamic_ps_enable_work); - INIT_WORK(&local->dynamic_ps_disable_work, - ieee80211_dynamic_ps_disable_work); + wiphy_work_init(&local->dynamic_ps_enable_work, + ieee80211_dynamic_ps_enable_work); + wiphy_work_init(&local->dynamic_ps_disable_work, + ieee80211_dynamic_ps_disable_work); timer_setup(&local->dynamic_ps_timer, ieee80211_dynamic_ps_timer, 0); INIT_WORK(&local->sched_scan_stopped_work, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ab0be5c30860..f36279e8792e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2211,7 +2211,8 @@ static void ieee80211_change_ps(struct ieee80211_local *local) conf->flags &= ~IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); del_timer_sync(&local->dynamic_ps_timer); - cancel_work_sync(&local->dynamic_ps_enable_work); + wiphy_work_cancel(local->hw.wiphy, + &local->dynamic_ps_enable_work); } } @@ -2308,7 +2309,8 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata) } } -void ieee80211_dynamic_ps_disable_work(struct work_struct *work) +void ieee80211_dynamic_ps_disable_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, @@ -2325,7 +2327,8 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work) false); } -void ieee80211_dynamic_ps_enable_work(struct work_struct *work) +void ieee80211_dynamic_ps_enable_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, @@ -2398,7 +2401,7 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t) { struct ieee80211_local *local = from_timer(local, t, dynamic_ps_timer); - ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work); + wiphy_work_queue(local->hw.wiphy, &local->dynamic_ps_enable_work); } void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work) @@ -3002,7 +3005,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->deflink.ap_power_level = IEEE80211_UNSET_POWER_LEVEL; del_timer_sync(&local->dynamic_ps_timer); - cancel_work_sync(&local->dynamic_ps_enable_work); + wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); /* Disable ARP filtering */ if (sdata->vif.cfg.arp_addr_cnt) diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 5bedd9cef414..df68d9838f87 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -34,7 +34,7 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) del_timer_sync(&ifmgd->bcn_mon_timer); del_timer_sync(&ifmgd->conn_mon_timer); - cancel_work_sync(&local->dynamic_ps_enable_work); + wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); if (local->hw.conf.flags & IEEE80211_CONF_PS) { offchannel_ps_enabled = true; diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 0ccb5701c7f3..10eb72b9b994 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Portions - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2021, 2023 Intel Corporation */ #include #include @@ -76,7 +76,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) * Note that this particular timer doesn't need to be * restarted at resume. */ - cancel_work_sync(&local->dynamic_ps_enable_work); + wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); del_timer_sync(&local->dynamic_ps_timer); local->wowlan = wowlan; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ae33f727c6a8..5cff936c6211 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -266,8 +266,8 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) IEEE80211_QUEUE_STOP_REASON_PS, false); ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; - ieee80211_queue_work(&local->hw, - &local->dynamic_ps_disable_work); + wiphy_work_queue(local->hw.wiphy, + &local->dynamic_ps_disable_work); } /* Don't restart the timer if we're not disassociated */ -- cgit v1.2.3 From eadfb54756aea5610d8d0a467f66305f777c85dd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:45 +0200 Subject: wifi: mac80211: move sched-scan stop work to wiphy work This also has the wiphy locked here then. We need to use the _locked version of cfg80211_sched_scan_stopped() now, which also fixes an old deadlock there. Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 5 +++-- net/mac80211/main.c | 6 +++--- net/mac80211/scan.c | 7 ++++--- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 96cecc3d71f0..47040a3a103b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1489,7 +1489,7 @@ struct ieee80211_local { int hw_scan_ies_bufsize; struct cfg80211_scan_info scan_info; - struct work_struct sched_scan_stopped_work; + struct wiphy_work sched_scan_stopped_work; struct ieee80211_sub_if_data __rcu *sched_scan_sdata; struct cfg80211_sched_scan_request __rcu *sched_scan_req; u8 scan_addr[ETH_ALEN]; @@ -1968,7 +1968,8 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req); int ieee80211_request_sched_scan_stop(struct ieee80211_local *local); void ieee80211_sched_scan_end(struct ieee80211_local *local); -void ieee80211_sched_scan_stopped_work(struct work_struct *work); +void ieee80211_sched_scan_stopped_work(struct wiphy *wiphy, + struct wiphy_work *work); /* off-channel/mgmt-tx */ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 7c165e78115c..ba5382efa950 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -843,8 +843,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, ieee80211_dynamic_ps_disable_work); timer_setup(&local->dynamic_ps_timer, ieee80211_dynamic_ps_timer, 0); - INIT_WORK(&local->sched_scan_stopped_work, - ieee80211_sched_scan_stopped_work); + wiphy_work_init(&local->sched_scan_stopped_work, + ieee80211_sched_scan_stopped_work); spin_lock_init(&local->ack_status_lock); idr_init(&local->ack_status_frames); @@ -1502,13 +1502,13 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) wiphy_lock(local->hw.wiphy); wiphy_delayed_work_cancel(local->hw.wiphy, &local->roc_work); + wiphy_work_cancel(local->hw.wiphy, &local->sched_scan_stopped_work); wiphy_work_cancel(local->hw.wiphy, &local->radar_detected_work); wiphy_unlock(local->hw.wiphy); rtnl_unlock(); cancel_work_sync(&local->restart_work); cancel_work_sync(&local->reconfig_filter); - flush_work(&local->sched_scan_stopped_work); ieee80211_clear_tx_pending(local); rate_control_deinitialize(local); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 2117cb2a916a..68ec2124c3db 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -1422,10 +1422,11 @@ void ieee80211_sched_scan_end(struct ieee80211_local *local) mutex_unlock(&local->mtx); - cfg80211_sched_scan_stopped(local->hw.wiphy, 0); + cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0); } -void ieee80211_sched_scan_stopped_work(struct work_struct *work) +void ieee80211_sched_scan_stopped_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, @@ -1448,6 +1449,6 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw) if (local->in_reconfig) return; - schedule_work(&local->sched_scan_stopped_work); + wiphy_work_queue(hw->wiphy, &local->sched_scan_stopped_work); } EXPORT_SYMBOL(ieee80211_sched_scan_stopped); -- cgit v1.2.3 From 777b26002b73127e81643d9286fadf3d41e0e477 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:46 +0200 Subject: wifi: mac80211: move TDLS work to wiphy work Again, to have the wiphy locked for it. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 4 ++-- net/mac80211/mlme.c | 7 ++++--- net/mac80211/tdls.c | 11 ++++++----- 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 47040a3a103b..1dc49bbd35bf 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -536,7 +536,7 @@ struct ieee80211_if_managed { /* TDLS support */ u8 tdls_peer[ETH_ALEN] __aligned(2); - struct delayed_work tdls_peer_del_work; + struct wiphy_delayed_work tdls_peer_del_work; struct sk_buff *orig_teardown_skb; /* The original teardown skb */ struct sk_buff *teardown_skb; /* A copy to send through the AP */ spinlock_t teardown_lock; /* To lock changing teardown_skb */ @@ -2597,7 +2597,7 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, const u8 *extra_ies, size_t extra_ies_len); int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper); -void ieee80211_tdls_peer_del_work(struct work_struct *wk); +void ieee80211_tdls_peer_del_work(struct wiphy *wiphy, struct wiphy_work *wk); int ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev, const u8 *addr, u8 oper_class, struct cfg80211_chan_def *chandef); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f36279e8792e..f5544d94efcb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6830,8 +6830,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_beacon_connection_loss_work); wiphy_work_init(&ifmgd->csa_connection_drop_work, ieee80211_csa_connection_drop_work); - INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work, - ieee80211_tdls_peer_del_work); + wiphy_delayed_work_init(&ifmgd->tdls_peer_del_work, + ieee80211_tdls_peer_del_work); wiphy_delayed_work_init(&ifmgd->ml_reconf_work, ieee80211_ml_reconf_work); timer_setup(&ifmgd->timer, ieee80211_sta_timer, 0); @@ -7860,7 +7860,8 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) &ifmgd->beacon_connection_loss_work); wiphy_work_cancel(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); - cancel_delayed_work_sync(&ifmgd->tdls_peer_del_work); + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &ifmgd->tdls_peer_del_work); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ml_reconf_work); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index a4af3b7675ef..fafbcef49ec0 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -21,7 +21,7 @@ /* give usermode some time for retries in setting up the TDLS session */ #define TDLS_PEER_SETUP_TIMEOUT (15 * HZ) -void ieee80211_tdls_peer_del_work(struct work_struct *wk) +void ieee80211_tdls_peer_del_work(struct wiphy *wiphy, struct wiphy_work *wk) { struct ieee80211_sub_if_data *sdata; struct ieee80211_local *local; @@ -1224,9 +1224,9 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, return ret; } - ieee80211_queue_delayed_work(&sdata->local->hw, - &sdata->u.mgd.tdls_peer_del_work, - TDLS_PEER_SETUP_TIMEOUT); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &sdata->u.mgd.tdls_peer_del_work, + TDLS_PEER_SETUP_TIMEOUT); return 0; out_unlock: @@ -1526,7 +1526,8 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, } if (ret == 0 && ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) { - cancel_delayed_work(&sdata->u.mgd.tdls_peer_del_work); + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &sdata->u.mgd.tdls_peer_del_work); eth_zero_addr(sdata->u.mgd.tdls_peer); } -- cgit v1.2.3 From e3208fb739e522fcae7cb8342ac82ebb45d32a2b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:47 +0200 Subject: wifi: mac80211: move key tailroom work to wiphy work This way we hold the wiphy mutex there, as a step towards removing some of the additional locks we have. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/iface.c | 4 ++-- net/mac80211/key.c | 11 +++++++---- net/mac80211/key.h | 5 +++-- net/mac80211/main.c | 3 ++- net/mac80211/pm.c | 3 ++- 6 files changed, 17 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1dc49bbd35bf..c5a414b1fd0a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1041,7 +1041,7 @@ struct ieee80211_sub_if_data { /* count for keys needing tailroom space allocation */ int crypto_tx_tailroom_needed_cnt; int crypto_tx_tailroom_pending_dec; - struct delayed_work dec_tailroom_needed_wk; + struct wiphy_delayed_work dec_tailroom_needed_wk; struct net_device *dev; struct ieee80211_local *local; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 6dc6cdc1cd37..fbfb9037b523 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -2149,8 +2149,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, INIT_LIST_HEAD(&sdata->key_list); - INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk, - ieee80211_delayed_tailroom_dec); + wiphy_delayed_work_init(&sdata->dec_tailroom_needed_wk, + ieee80211_delayed_tailroom_dec); for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 13050dc9321f..fbd9f9a9001c 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -775,8 +775,9 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key, if (delay_tailroom) { /* see ieee80211_delayed_tailroom_dec */ sdata->crypto_tx_tailroom_pending_dec++; - schedule_delayed_work(&sdata->dec_tailroom_needed_wk, - HZ/2); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &sdata->dec_tailroom_needed_wk, + HZ / 2); } else { decrease_tailroom_need_count(sdata, 1); } @@ -1122,7 +1123,8 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, struct ieee80211_key *key, *tmp; LIST_HEAD(keys); - cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk); + wiphy_delayed_work_cancel(local->hw.wiphy, + &sdata->dec_tailroom_needed_wk); mutex_lock(&local->key_mtx); @@ -1193,7 +1195,8 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, mutex_unlock(&local->key_mtx); } -void ieee80211_delayed_tailroom_dec(struct work_struct *wk) +void ieee80211_delayed_tailroom_dec(struct wiphy *wiphy, + struct wiphy_work *wk) { struct ieee80211_sub_if_data *sdata; diff --git a/net/mac80211/key.h b/net/mac80211/key.h index f3df97df4b72..231a069d2975 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -2,7 +2,7 @@ /* * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. - * Copyright (C) 2019, 2022 Intel Corporation + * Copyright (C) 2019, 2022-2023 Intel Corporation */ #ifndef IEEE80211_KEY_H @@ -174,6 +174,7 @@ int ieee80211_key_switch_links(struct ieee80211_sub_if_data *sdata, #define rcu_dereference_check_key_mtx(local, ref) \ rcu_dereference_check(ref, lockdep_is_held(&((local)->key_mtx))) -void ieee80211_delayed_tailroom_dec(struct work_struct *wk); +void ieee80211_delayed_tailroom_dec(struct wiphy *wiphy, + struct wiphy_work *wk); #endif /* IEEE80211_KEY_H */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ba5382efa950..a66cfdca42ef 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -372,7 +372,8 @@ static void ieee80211_restart_work(struct work_struct *work) sdata_unlock(sdata); } } - flush_delayed_work(&sdata->dec_tailroom_needed_wk); + wiphy_delayed_work_flush(local->hw.wiphy, + &sdata->dec_tailroom_needed_wk); } ieee80211_scan_cancel(local); diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 10eb72b9b994..e52dbf7d14fb 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -161,7 +161,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) break; } - flush_delayed_work(&sdata->dec_tailroom_needed_wk); + wiphy_delayed_work_flush(local->hw.wiphy, + &sdata->dec_tailroom_needed_wk); drv_remove_interface(local, sdata); } -- cgit v1.2.3 From aca40a5fa679708b9cc7d0de5255e6d6f4b9d2c9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:48 +0200 Subject: wifi: mac80211: move tspec work to wiphy work One more work that will now execute with the wiphy locked, for future cleanups. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 21 +++++++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c5a414b1fd0a..34f528f7c13e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -550,7 +550,7 @@ struct ieee80211_if_managed { * on the BE queue, but there's a lot of VO traffic, we might * get stuck in a downgraded situation and flush takes forever. */ - struct delayed_work tx_tspec_wk; + struct wiphy_delayed_work tx_tspec_wk; /* Information elements from the last transmitted (Re)Association * Request frame. diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f5544d94efcb..a35769dac162 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2489,8 +2489,10 @@ __ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata) ac); tx_tspec->action = TX_TSPEC_ACTION_NONE; ret = true; - schedule_delayed_work(&ifmgd->tx_tspec_wk, - tx_tspec->time_slice_start + HZ - now + 1); + wiphy_delayed_work_queue(local->hw.wiphy, + &ifmgd->tx_tspec_wk, + tx_tspec->time_slice_start + + HZ - now + 1); break; case TX_TSPEC_ACTION_NONE: /* nothing now */ @@ -2508,7 +2510,8 @@ void ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_QOS); } -static void ieee80211_sta_handle_tspec_ac_params_wk(struct work_struct *work) +static void ieee80211_sta_handle_tspec_ac_params_wk(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata; @@ -3060,7 +3063,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* existing TX TSPEC sessions no longer exist */ memset(ifmgd->tx_tspec, 0, sizeof(ifmgd->tx_tspec)); - cancel_delayed_work_sync(&ifmgd->tx_tspec_wk); + wiphy_delayed_work_cancel(local->hw.wiphy, &ifmgd->tx_tspec_wk); sdata->vif.bss_conf.pwr_reduction = 0; sdata->vif.bss_conf.tx_pwr_env_num = 0; @@ -3128,7 +3131,8 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, if (tx_tspec->downgraded) { tx_tspec->action = TX_TSPEC_ACTION_STOP_DOWNGRADE; - schedule_delayed_work(&ifmgd->tx_tspec_wk, 0); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &ifmgd->tx_tspec_wk, 0); } } @@ -3140,7 +3144,8 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, if (tx_tspec->consumed_tx_time >= tx_tspec->admitted_time) { tx_tspec->downgraded = true; tx_tspec->action = TX_TSPEC_ACTION_DOWNGRADE; - schedule_delayed_work(&ifmgd->tx_tspec_wk, 0); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &ifmgd->tx_tspec_wk, 0); } } @@ -6837,8 +6842,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) timer_setup(&ifmgd->timer, ieee80211_sta_timer, 0); timer_setup(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 0); timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0); - INIT_DELAYED_WORK(&ifmgd->tx_tspec_wk, - ieee80211_sta_handle_tspec_ac_params_wk); + wiphy_delayed_work_init(&ifmgd->tx_tspec_wk, + ieee80211_sta_handle_tspec_ac_params_wk); ifmgd->flags = 0; ifmgd->powersave = sdata->wdev.ps; -- cgit v1.2.3 From a6add8bee6a166d4f15bbd231ce7b71ad88bb8db Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:49 +0200 Subject: wifi: mac80211: move filter reconfig to wiphy work This again is intended for future cleanups that are possible when mac80211 and drivers can assume the wiphy is locked. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/iface.c | 2 +- net/mac80211/main.c | 7 ++++--- net/mac80211/util.c | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 34f528f7c13e..a5569f9bb83f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1368,7 +1368,7 @@ struct ieee80211_local { spinlock_t filter_lock; /* used for uploading changed mc list */ - struct work_struct reconfig_filter; + struct wiphy_work reconfig_filter; /* aggregated multicast list */ struct netdev_hw_addr_list mc_list; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index fbfb9037b523..82bb340ef4be 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -778,7 +778,7 @@ static void ieee80211_set_multicast_list(struct net_device *dev) spin_lock_bh(&local->filter_lock); __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); spin_unlock_bh(&local->filter_lock); - ieee80211_queue_work(&local->hw, &local->reconfig_filter); + wiphy_work_queue(local->hw.wiphy, &local->reconfig_filter); } /* diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a66cfdca42ef..37714dcf9f06 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -84,7 +84,8 @@ void ieee80211_configure_filter(struct ieee80211_local *local) local->filter_flags = new_flags & ~(1<<31); } -static void ieee80211_reconfig_filter(struct work_struct *work) +static void ieee80211_reconfig_filter(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, reconfig_filter); @@ -835,7 +836,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, wiphy_work_init(&local->radar_detected_work, ieee80211_dfs_radar_detected_work); - INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter); + wiphy_work_init(&local->reconfig_filter, ieee80211_reconfig_filter); local->smps_mode = IEEE80211_SMPS_OFF; wiphy_work_init(&local->dynamic_ps_enable_work, @@ -1503,13 +1504,13 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) wiphy_lock(local->hw.wiphy); wiphy_delayed_work_cancel(local->hw.wiphy, &local->roc_work); + wiphy_work_cancel(local->hw.wiphy, &local->reconfig_filter); wiphy_work_cancel(local->hw.wiphy, &local->sched_scan_stopped_work); wiphy_work_cancel(local->hw.wiphy, &local->radar_detected_work); wiphy_unlock(local->hw.wiphy); rtnl_unlock(); cancel_work_sync(&local->restart_work); - cancel_work_sync(&local->reconfig_filter); ieee80211_clear_tx_pending(local); rate_control_deinitialize(local); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 2815f54c5aa1..bb4c7dd03758 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2316,7 +2316,7 @@ void ieee80211_stop_device(struct ieee80211_local *local) ieee80211_led_radio(local, false); ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO); - cancel_work_sync(&local->reconfig_filter); + wiphy_work_cancel(local->hw.wiphy, &local->reconfig_filter); flush_workqueue(local->workqueue); wiphy_work_flush(local->hw.wiphy, NULL); -- cgit v1.2.3 From d7074be64a6c0ee7fa81288f182eb22a8127a26e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:50 +0200 Subject: wifi: mac80211: move CSA finalize to wiphy work This work should be made per link as well, and then will have cancellation issues. Moving it to a wiphy work already fixes those beforehand. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 8 ++++---- net/mac80211/chan.c | 4 ++-- net/mac80211/ieee80211_i.h | 4 ++-- net/mac80211/iface.c | 2 +- net/mac80211/link.c | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 36e3515bf8d9..75fa3c90b1b4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3584,11 +3584,11 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif) if (iter == sdata || iter->vif.mbssid_tx_vif != vif) continue; - ieee80211_queue_work(&iter->local->hw, - &iter->deflink.csa_finalize_work); + wiphy_work_queue(iter->local->hw.wiphy, + &iter->deflink.csa_finalize_work); } } - ieee80211_queue_work(&local->hw, &sdata->deflink.csa_finalize_work); + wiphy_work_queue(local->hw.wiphy, &sdata->deflink.csa_finalize_work); rcu_read_unlock(); } @@ -3716,7 +3716,7 @@ static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) } } -void ieee80211_csa_finalize_work(struct work_struct *work) +void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 68952752b599..f967ed9d2a3a 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1206,8 +1206,8 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_OCB: - ieee80211_queue_work(&sdata->local->hw, - &link->csa_finalize_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &link->csa_finalize_work); break; case NL80211_IFTYPE_STATION: wiphy_delayed_work_queue(sdata->local->hw.wiphy, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a5569f9bb83f..8d9ced518087 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -990,7 +990,7 @@ struct ieee80211_link_data { struct ieee80211_key __rcu *default_mgmt_key; struct ieee80211_key __rcu *default_beacon_key; - struct work_struct csa_finalize_work; + struct wiphy_work csa_finalize_work; bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */ bool operating_11g_mode; @@ -1989,7 +1989,7 @@ int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie); /* channel switch handling */ -void ieee80211_csa_finalize_work(struct work_struct *work); +void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work); int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 82bb340ef4be..3b419af7720c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -535,9 +535,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do mutex_unlock(&local->mtx); sdata_unlock(sdata); - cancel_work_sync(&sdata->deflink.csa_finalize_work); cancel_work_sync(&sdata->deflink.color_change_finalize_work); + wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work); wiphy_delayed_work_cancel(local->hw.wiphy, &sdata->deflink.dfs_cac_timer_work); diff --git a/net/mac80211/link.c b/net/mac80211/link.c index bcff8a909405..2f7e2fc60be3 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -37,8 +37,8 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, link_conf->link_id = link_id; link_conf->vif = &sdata->vif; - INIT_WORK(&link->csa_finalize_work, - ieee80211_csa_finalize_work); + wiphy_work_init(&link->csa_finalize_work, + ieee80211_csa_finalize_work); INIT_WORK(&link->color_change_finalize_work, ieee80211_color_change_finalize_work); INIT_DELAYED_WORK(&link->color_collision_detect_work, -- cgit v1.2.3 From b38579aeb5b04fb34828843457f6d9d6fa3b79c3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:51 +0200 Subject: wifi: mac80211: move color change finalize to wiphy work Again this should be per link and will get cancellation issues, move it to a wiphy work. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 7 ++++--- net/mac80211/ieee80211_i.h | 5 +++-- net/mac80211/iface.c | 4 ++-- net/mac80211/link.c | 4 ++-- 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 75fa3c90b1b4..1fa389b67aaa 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4745,7 +4745,8 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) return 0; } -void ieee80211_color_change_finalize_work(struct work_struct *work) +void ieee80211_color_change_finalize_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, @@ -4786,8 +4787,8 @@ void ieee80211_color_change_finish(struct ieee80211_vif *vif) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - ieee80211_queue_work(&sdata->local->hw, - &sdata->deflink.color_change_finalize_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &sdata->deflink.color_change_finalize_work); } EXPORT_SYMBOL_GPL(ieee80211_color_change_finish); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8d9ced518087..8d53ab9732f9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -997,7 +997,7 @@ struct ieee80211_link_data { struct cfg80211_chan_def csa_chandef; - struct work_struct color_change_finalize_work; + struct wiphy_work color_change_finalize_work; struct delayed_work color_collision_detect_work; u64 color_bitmap; @@ -1994,7 +1994,8 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params); /* color change handling */ -void ieee80211_color_change_finalize_work(struct work_struct *work); +void ieee80211_color_change_finalize_work(struct wiphy *wiphy, + struct wiphy_work *work); void ieee80211_color_collision_detection_work(struct work_struct *work); /* interface handling */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 3b419af7720c..e09fec1507a6 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -535,9 +535,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do mutex_unlock(&local->mtx); sdata_unlock(sdata); - cancel_work_sync(&sdata->deflink.color_change_finalize_work); - wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work); + wiphy_work_cancel(local->hw.wiphy, + &sdata->deflink.color_change_finalize_work); wiphy_delayed_work_cancel(local->hw.wiphy, &sdata->deflink.dfs_cac_timer_work); diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 2f7e2fc60be3..72b5000502a5 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -39,8 +39,8 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, wiphy_work_init(&link->csa_finalize_work, ieee80211_csa_finalize_work); - INIT_WORK(&link->color_change_finalize_work, - ieee80211_color_change_finalize_work); + wiphy_work_init(&link->color_change_finalize_work, + ieee80211_color_change_finalize_work); INIT_DELAYED_WORK(&link->color_collision_detect_work, ieee80211_color_collision_detection_work); INIT_LIST_HEAD(&link->assigned_chanctx_list); -- cgit v1.2.3 From 0e8185ce1ddebf9de43b1f0fa92bf6dbba6ffb86 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:52 +0200 Subject: wifi: mac80211: check wiphy mutex in ops Check that we hold the wiphy mutex in the ops when calling the driver, since we're now on our way to always hold it, and simplify the locking. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.c | 20 +++++++++++++ net/mac80211/driver-ops.h | 75 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) (limited to 'net') diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 376dae58b5a6..97043d732f2e 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -15,6 +15,7 @@ int drv_start(struct ieee80211_local *local) int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(local->started)) return -EALREADY; @@ -35,6 +36,7 @@ int drv_start(struct ieee80211_local *local) void drv_stop(struct ieee80211_local *local) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(!local->started)) return; @@ -58,6 +60,7 @@ int drv_add_interface(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && @@ -82,6 +85,7 @@ int drv_change_interface(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -96,6 +100,7 @@ void drv_remove_interface(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -116,6 +121,7 @@ int drv_sta_state(struct ieee80211_local *local, int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -149,6 +155,7 @@ int drv_sta_set_txpwr(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -190,6 +197,7 @@ int drv_conf_tx(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -223,6 +231,7 @@ u64 drv_get_tsf(struct ieee80211_local *local, u64 ret = -1ULL; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return ret; @@ -239,6 +248,7 @@ void drv_set_tsf(struct ieee80211_local *local, u64 tsf) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -254,6 +264,7 @@ void drv_offset_tsf(struct ieee80211_local *local, s64 offset) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -268,6 +279,7 @@ void drv_reset_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -286,6 +298,7 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local, int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); drv_verify_link_exists(sdata, link_conf); if (!check_sdata_in_driver(sdata)) @@ -314,6 +327,7 @@ void drv_unassign_vif_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); drv_verify_link_exists(sdata, link_conf); if (!check_sdata_in_driver(sdata)) @@ -342,6 +356,7 @@ int drv_switch_vif_chanctx(struct ieee80211_local *local, int i; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!local->ops->switch_vif_chanctx) return -EOPNOTSUPP; @@ -394,6 +409,7 @@ int drv_ampdu_action(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!sdata) return -EIO; @@ -418,6 +434,7 @@ void drv_link_info_changed(struct ieee80211_local *local, int link_id, u64 changed) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED) && @@ -460,6 +477,7 @@ int drv_set_key(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -487,6 +505,7 @@ int drv_change_vif_links(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -534,6 +553,7 @@ int drv_change_sta_links(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index d95ff2282f54..2fac7dc2eb9d 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -41,6 +41,7 @@ static inline void drv_sync_rx_queues(struct ieee80211_local *local, struct sta_info *sta) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (local->ops->sync_rx_queues) { trace_drv_sync_rx_queues(local, sta->sdata, &sta->sta); @@ -96,6 +97,7 @@ static inline int drv_suspend(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_suspend(local); ret = local->ops->suspend(&local->hw, wowlan); @@ -108,6 +110,7 @@ static inline int drv_resume(struct ieee80211_local *local) int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_resume(local); ret = local->ops->resume(&local->hw); @@ -119,6 +122,7 @@ static inline void drv_set_wakeup(struct ieee80211_local *local, bool enabled) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!local->ops->set_wakeup) return; @@ -144,6 +148,7 @@ static inline int drv_config(struct ieee80211_local *local, u32 changed) int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_config(local, changed); ret = local->ops->config(&local->hw, changed); @@ -156,6 +161,7 @@ static inline void drv_vif_cfg_changed(struct ieee80211_local *local, u64 changed) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -195,6 +201,7 @@ static inline void drv_configure_filter(struct ieee80211_local *local, u64 multicast) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_configure_filter(local, changed_flags, total_flags, multicast); @@ -209,6 +216,7 @@ static inline void drv_config_iface_filter(struct ieee80211_local *local, unsigned int changed_flags) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_config_iface_filter(local, sdata, filter_flags, changed_flags); @@ -265,6 +273,7 @@ static inline int drv_hw_scan(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -279,6 +288,7 @@ static inline void drv_cancel_hw_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -297,6 +307,7 @@ drv_sched_scan_start(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -314,6 +325,7 @@ static inline int drv_sched_scan_stop(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -330,6 +342,7 @@ static inline void drv_sw_scan_start(struct ieee80211_local *local, const u8 *mac_addr) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_sw_scan_start(local, sdata, mac_addr); if (local->ops->sw_scan_start) @@ -341,6 +354,7 @@ static inline void drv_sw_scan_complete(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_sw_scan_complete(local, sdata); if (local->ops->sw_scan_complete) @@ -354,6 +368,7 @@ static inline int drv_get_stats(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (local->ops->get_stats) ret = local->ops->get_stats(&local->hw, stats); @@ -377,6 +392,7 @@ static inline int drv_set_frag_threshold(struct ieee80211_local *local, int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_set_frag_threshold(local, value); if (local->ops->set_frag_threshold) @@ -391,6 +407,7 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local, int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_set_rts_threshold(local, value); if (local->ops->set_rts_threshold) @@ -404,6 +421,7 @@ static inline int drv_set_coverage_class(struct ieee80211_local *local, { int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_set_coverage_class(local, value); if (local->ops->set_coverage_class) @@ -437,6 +455,7 @@ static inline int drv_sta_add(struct ieee80211_local *local, int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -456,6 +475,7 @@ static inline void drv_sta_remove(struct ieee80211_local *local, struct ieee80211_sta *sta) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -475,6 +495,7 @@ static inline void drv_link_add_debugfs(struct ieee80211_local *local, struct dentry *dir) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -491,6 +512,7 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local, struct dentry *dir) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -507,6 +529,7 @@ static inline void drv_link_sta_add_debugfs(struct ieee80211_local *local, struct dentry *dir) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -523,6 +546,7 @@ static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, struct sta_info *sta) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -572,6 +596,7 @@ static inline void drv_sta_statistics(struct ieee80211_local *local, struct station_info *sinfo) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -603,6 +628,7 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local) int ret = 0; /* default unsupported op for less congestion */ might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_tx_last_beacon(local); if (local->ops->tx_last_beacon) @@ -621,6 +647,7 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_get_survey(local, idx, survey); @@ -635,6 +662,7 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx, static inline void drv_rfkill_poll(struct ieee80211_local *local) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (local->ops->rfkill_poll) local->ops->rfkill_poll(&local->hw); @@ -647,6 +675,7 @@ static inline void drv_flush(struct ieee80211_local *local, struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (sdata && !check_sdata_in_driver(sdata)) return; @@ -662,6 +691,7 @@ static inline void drv_flush_sta(struct ieee80211_local *local, struct sta_info *sta) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (sdata && !check_sdata_in_driver(sdata)) return; @@ -677,6 +707,7 @@ static inline void drv_channel_switch(struct ieee80211_local *local, struct ieee80211_channel_switch *ch_switch) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_channel_switch(local, sdata, ch_switch); local->ops->channel_switch(&local->hw, &sdata->vif, ch_switch); @@ -689,6 +720,7 @@ static inline int drv_set_antenna(struct ieee80211_local *local, { int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (local->ops->set_antenna) ret = local->ops->set_antenna(&local->hw, tx_ant, rx_ant); trace_drv_set_antenna(local, tx_ant, rx_ant, ret); @@ -700,6 +732,7 @@ static inline int drv_get_antenna(struct ieee80211_local *local, { int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (local->ops->get_antenna) ret = local->ops->get_antenna(&local->hw, tx_ant, rx_ant); trace_drv_get_antenna(local, *tx_ant, *rx_ant, ret); @@ -715,6 +748,7 @@ static inline int drv_remain_on_channel(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_remain_on_channel(local, sdata, chan, duration, type); ret = local->ops->remain_on_channel(&local->hw, &sdata->vif, @@ -731,6 +765,7 @@ drv_cancel_remain_on_channel(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_cancel_remain_on_channel(local, sdata); ret = local->ops->cancel_remain_on_channel(&local->hw, &sdata->vif); @@ -745,6 +780,7 @@ static inline int drv_set_ringparam(struct ieee80211_local *local, int ret = -ENOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_set_ringparam(local, tx, rx); if (local->ops->set_ringparam) @@ -758,6 +794,7 @@ static inline void drv_get_ringparam(struct ieee80211_local *local, u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_get_ringparam(local, tx, tx_max, rx, rx_max); if (local->ops->get_ringparam) @@ -770,6 +807,7 @@ static inline bool drv_tx_frames_pending(struct ieee80211_local *local) bool ret = false; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_tx_frames_pending(local); if (local->ops->tx_frames_pending) @@ -786,6 +824,7 @@ static inline int drv_set_bitrate_mask(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -804,6 +843,7 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local, struct cfg80211_gtk_rekey_data *data) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -859,6 +899,7 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, struct ieee80211_prep_tx_info *info) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -876,6 +917,7 @@ static inline void drv_mgd_complete_tx(struct ieee80211_local *local, struct ieee80211_prep_tx_info *info) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -893,6 +935,7 @@ drv_mgd_protect_tdls_discover(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -910,6 +953,7 @@ static inline int drv_add_chanctx(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_add_chanctx(local, ctx); if (local->ops->add_chanctx) @@ -925,6 +969,7 @@ static inline void drv_remove_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(!ctx->driver_present)) return; @@ -941,6 +986,7 @@ static inline void drv_change_chanctx(struct ieee80211_local *local, u32 changed) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_change_chanctx(local, ctx, changed); if (local->ops->change_chanctx) { @@ -980,6 +1026,7 @@ static inline int drv_start_ap(struct ieee80211_local *local, drv_verify_link_exists(sdata, link_conf); might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -996,6 +1043,7 @@ static inline void drv_stop_ap(struct ieee80211_local *local, struct ieee80211_bss_conf *link_conf) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); /* make sure link_conf is protected */ drv_verify_link_exists(sdata, link_conf); @@ -1014,6 +1062,7 @@ drv_reconfig_complete(struct ieee80211_local *local, enum ieee80211_reconfig_type reconfig_type) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_reconfig_complete(local, reconfig_type); if (local->ops->reconfig_complete) @@ -1027,6 +1076,7 @@ drv_set_default_unicast_key(struct ieee80211_local *local, int key_idx) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1059,6 +1109,7 @@ drv_channel_switch_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (local->ops->channel_switch_beacon) { trace_drv_channel_switch_beacon(local, sdata, chandef); @@ -1075,6 +1126,7 @@ drv_pre_channel_switch(struct ieee80211_sub_if_data *sdata, int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -1094,6 +1146,7 @@ drv_post_channel_switch(struct ieee80211_sub_if_data *sdata) int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -1111,6 +1164,7 @@ drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1128,6 +1182,7 @@ drv_channel_switch_rx_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1144,6 +1199,7 @@ static inline int drv_join_ibss(struct ieee80211_local *local, int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -1158,6 +1214,7 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1186,6 +1243,7 @@ static inline int drv_get_txpower(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!local->ops->get_txpower) return -EOPNOTSUPP; @@ -1206,6 +1264,7 @@ drv_tdls_channel_switch(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -1226,6 +1285,7 @@ drv_tdls_cancel_channel_switch(struct ieee80211_local *local, struct ieee80211_sta *sta) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1292,6 +1352,7 @@ drv_get_ftm_responder_stats(struct ieee80211_local *local, u32 ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -1311,6 +1372,7 @@ static inline int drv_start_pmsr(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -1330,6 +1392,7 @@ static inline void drv_abort_pmsr(struct ieee80211_local *local, trace_drv_abort_pmsr(local, sdata); might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1345,6 +1408,7 @@ static inline int drv_start_nan(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); check_sdata_in_driver(sdata); trace_drv_start_nan(local, sdata, conf); @@ -1357,6 +1421,7 @@ static inline void drv_stop_nan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); check_sdata_in_driver(sdata); trace_drv_stop_nan(local, sdata); @@ -1372,6 +1437,7 @@ static inline int drv_nan_change_conf(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); check_sdata_in_driver(sdata); if (!local->ops->nan_change_conf) @@ -1392,6 +1458,7 @@ static inline int drv_add_nan_func(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); check_sdata_in_driver(sdata); if (!local->ops->add_nan_func) @@ -1409,6 +1476,7 @@ static inline void drv_del_nan_func(struct ieee80211_local *local, u8 instance_id) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); check_sdata_in_driver(sdata); trace_drv_del_nan_func(local, sdata, instance_id); @@ -1425,6 +1493,7 @@ static inline int drv_set_tid_config(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); ret = local->ops->set_tid_config(&local->hw, &sdata->vif, sta, tid_conf); trace_drv_return_int(local, ret); @@ -1439,6 +1508,7 @@ static inline int drv_reset_tid_config(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); ret = local->ops->reset_tid_config(&local->hw, &sdata->vif, sta, tids); trace_drv_return_int(local, ret); @@ -1449,6 +1519,7 @@ static inline void drv_update_vif_offload(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); check_sdata_in_driver(sdata); if (!local->ops->update_vif_offload) @@ -1466,6 +1537,7 @@ static inline void drv_sta_set_4addr(struct ieee80211_local *local, sdata = get_bss_sdata(sdata); might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1483,6 +1555,7 @@ static inline void drv_sta_set_decap_offload(struct ieee80211_local *local, sdata = get_bss_sdata(sdata); might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1501,6 +1574,7 @@ static inline void drv_add_twt_setup(struct ieee80211_local *local, struct ieee80211_twt_params *twt_agrt; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1518,6 +1592,7 @@ static inline void drv_twt_teardown_request(struct ieee80211_local *local, u8 flowid) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; -- cgit v1.2.3 From 0320d68f568126f617a346f601a58254b55e6c31 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:53 +0200 Subject: wifi: cfg80211: reg: hold wiphy mutex for wdev iteration Since we will want to remove the wdev lock in the future, lock the wiphy here to iterate and check the flags. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/wireless/reg.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index f861d1d82b18..1cdaf273d775 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3575,14 +3575,17 @@ static bool is_wiphy_all_set_reg_flag(enum ieee80211_regulatory_flags flag) struct wireless_dev *wdev; for_each_rdev(rdev) { + wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { wdev_lock(wdev); if (!(wdev->wiphy->regulatory_flags & flag)) { wdev_unlock(wdev); + wiphy_unlock(&rdev->wiphy); return false; } wdev_unlock(wdev); } + wiphy_unlock(&rdev->wiphy); } return true; -- cgit v1.2.3 From beb2df475b7cfcc3e107afb4dd2a031bc34fe416 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:54 +0200 Subject: wifi: cfg80211: sme: hold wiphy lock for wdev iteration Since we will want to remove the wdev lock in the future, lock the wiphy here to iterate and for checking the status of the connections. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/wireless/sme.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 53ba46f85ceb..c271f30b58fa 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -703,6 +703,7 @@ static bool cfg80211_is_all_idle(void) * as chan dfs state, etc. */ for_each_rdev(rdev) { + wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { wdev_lock(wdev); if (wdev->conn || wdev->connected || @@ -710,6 +711,7 @@ static bool cfg80211_is_all_idle(void) is_all_idle = false; wdev_unlock(wdev); } + wiphy_unlock(&rdev->wiphy); } return is_all_idle; -- cgit v1.2.3 From fa8809a519d85f498f43fadaf82e10831f20625a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:55 +0200 Subject: wifi: cfg80211: hold wiphy lock in cfg80211_any_wiphy_oper_chan() We have the RTNL here for the iteration, but we need to lock each wiphy separately as well for using its data. Hold the wiphy lock for all of the ones in the iteration. Note that this implies we cannot already hold the wiphy mutex for the wiphy passed by the argument, but that's true now. Signed-off-by: Johannes Berg --- net/wireless/chan.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 3c691ae904d1..b2469e2c1e70 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -824,13 +824,17 @@ bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, return false; for_each_rdev(rdev) { + bool found; + if (!reg_dfs_domain_same(wiphy, &rdev->wiphy)) continue; - if (cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan)) - return true; + wiphy_lock(&rdev->wiphy); + found = cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan) || + cfg80211_offchan_chain_is_active(rdev, chan); + wiphy_unlock(&rdev->wiphy); - if (cfg80211_offchan_chain_is_active(rdev, chan)) + if (found) return true; } -- cgit v1.2.3 From 1474bc87fe57deac726cc10203f73daa6c3212f7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:56 +0200 Subject: wifi: cfg80211: check wiphy mutex is held for wdev mutex This might seem pretty pointless rather than changing the locking immediately, but it seems safer to run for a while with checks and the old locking scheme, and then remove the wdev lock later. Signed-off-by: Johannes Berg --- net/wireless/core.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/core.h b/net/wireless/core.h index b9876b444e1b..5dc76ea3b84e 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -238,6 +238,7 @@ void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, static inline void wdev_lock(struct wireless_dev *wdev) __acquires(wdev) { + lockdep_assert_held(&wdev->wiphy->mtx); mutex_lock(&wdev->mtx); __acquire(wdev->mtx); } @@ -245,11 +246,16 @@ static inline void wdev_lock(struct wireless_dev *wdev) static inline void wdev_unlock(struct wireless_dev *wdev) __releases(wdev) { + lockdep_assert_held(&wdev->wiphy->mtx); __release(wdev->mtx); mutex_unlock(&wdev->mtx); } -#define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx) +static inline void ASSERT_WDEV_LOCK(struct wireless_dev *wdev) +{ + lockdep_assert_held(&wdev->wiphy->mtx); + lockdep_assert_held(&wdev->mtx); +} static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) { -- cgit v1.2.3 From 01ca280d323ef4a7e6732615b1faaa8877cc417f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:57 +0200 Subject: wifi: mac80211: ethtool: hold wiphy mutex We should hold the wiphy mutex here since we're going to call the driver and want to remove the sta_mtx. Signed-off-by: Johannes Berg --- net/mac80211/ethtool.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index a3830d925cc2..c53208321c8f 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -5,7 +5,7 @@ * Copied from cfg.c - originally * Copyright 2006-2010 Johannes Berg * Copyright 2014 Intel Corporation (Author: Johannes Berg) - * Copyright (C) 2018, 2022 Intel Corporation + * Copyright (C) 2018, 2022-2023 Intel Corporation */ #include #include @@ -102,6 +102,7 @@ static void ieee80211_get_stats(struct net_device *dev, * network device. */ + wiphy_lock(local->hw.wiphy); mutex_lock(&local->sta_mtx); if (sdata->vif.type == NL80211_IFTYPE_STATION) { @@ -200,10 +201,13 @@ do_survey: mutex_unlock(&local->sta_mtx); - if (WARN_ON(i != STA_STATS_LEN)) + if (WARN_ON(i != STA_STATS_LEN)) { + wiphy_unlock(local->hw.wiphy); return; + } drv_get_et_stats(sdata, stats, &(data[STA_STATS_LEN])); + wiphy_unlock(local->hw.wiphy); } static void ieee80211_get_strings(struct net_device *dev, u32 sset, u8 *data) -- cgit v1.2.3 From 7a53b71d8ebc67b1a23b0cc8dd8b8024e3af9d27 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:58 +0200 Subject: wifi: mac80211: hold wiphy_lock around concurrency checks We want to replace the locking in mac80211 by just the wiphy mutex, so hold the lock here around concurrency checks for the future where the chanctx_mtx used inside goes away. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index e09fec1507a6..c528a4bb7f4f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -430,12 +430,13 @@ static int ieee80211_open(struct net_device *dev) if (!is_valid_ether_addr(dev->dev_addr)) return -EADDRNOTAVAIL; + wiphy_lock(sdata->local->hw.wiphy); err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type); if (err) - return err; + goto out; - wiphy_lock(sdata->local->hw.wiphy); err = ieee80211_do_open(&sdata->wdev, true); +out: wiphy_unlock(sdata->local->hw.wiphy); return err; -- cgit v1.2.3 From 332e68bc5526226f50a946b5dd980bba12902595 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 13:59:59 +0200 Subject: wifi: mac80211: extend wiphy lock in interface removal We want to extend the wiphy locking to the interface list, so move that into the section locked with the wiphy lock. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index c528a4bb7f4f..a465a18bf3bd 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -2274,6 +2274,8 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) */ cfg80211_shutdown_all_interfaces(local->hw.wiphy); + wiphy_lock(local->hw.wiphy); + WARN(local->open_count, "%s: open count remains %d\n", wiphy_name(local->hw.wiphy), local->open_count); @@ -2283,7 +2285,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) list_splice_init(&local->interfaces, &unreg_list); mutex_unlock(&local->iflist_mtx); - wiphy_lock(local->hw.wiphy); list_for_each_entry_safe(sdata, tmp, &unreg_list, list) { bool netdev = sdata->dev; -- cgit v1.2.3 From a26787aa13974fb0b3fb42bfeb4256c1b686e305 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 14:00:00 +0200 Subject: wifi: mac80211: take wiphy lock for MAC addr change We want to ensure everything holds the wiphy lock, so also extend that to the MAC change callback. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index a465a18bf3bd..343343a7a3e2 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -251,9 +251,9 @@ unlock: return ret; } -static int ieee80211_change_mac(struct net_device *dev, void *addr) +static int _ieee80211_change_mac(struct ieee80211_sub_if_data *sdata, + void *addr) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sockaddr *sa = addr; bool check_dup = true; @@ -278,7 +278,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) if (live) drv_remove_interface(local, sdata); - ret = eth_mac_addr(dev, sa); + ret = eth_mac_addr(sdata->dev, sa); if (ret == 0) { memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN); @@ -294,6 +294,19 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) return ret; } +static int ieee80211_change_mac(struct net_device *dev, void *addr) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + int ret; + + wiphy_lock(local->hw.wiphy); + ret = _ieee80211_change_mac(sdata, addr); + wiphy_unlock(local->hw.wiphy); + + return ret; +} + static inline int identical_mac_addr_allowed(int type1, int type2) { return type1 == NL80211_IFTYPE_MONITOR || -- cgit v1.2.3 From 4d3acf4311a0401e3e97c2f2302256cd9d7f5692 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 14:00:01 +0200 Subject: wifi: mac80211: remove sta_mtx We now hold the wiphy mutex everywhere that we use or needed the sta_mtx, so we don't need this mutex any more. Remove it. Most of this change was done automatically with spatch. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 112 +++++++++++++++------------------------------ net/mac80211/debugfs.c | 5 +- net/mac80211/driver-ops.c | 4 +- net/mac80211/ethtool.c | 3 -- net/mac80211/ibss.c | 4 +- net/mac80211/ieee80211_i.h | 5 +- net/mac80211/iface.c | 8 +--- net/mac80211/key.c | 4 +- net/mac80211/link.c | 3 +- net/mac80211/mlme.c | 31 ++++--------- net/mac80211/pm.c | 6 +-- net/mac80211/rx.c | 6 +-- net/mac80211/s1g.c | 15 ++---- net/mac80211/sta_info.c | 81 ++++++++++++-------------------- net/mac80211/sta_info.h | 2 +- net/mac80211/tdls.c | 26 ++++------- net/mac80211/tx.c | 4 +- net/mac80211/util.c | 7 +-- 18 files changed, 112 insertions(+), 214 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1fa389b67aaa..473de1882ded 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -214,6 +214,8 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct sta_info *sta; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + ret = ieee80211_if_change_type(sdata, type); if (ret) return ret; @@ -235,12 +237,10 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (!ifmgd->associated) return 0; - mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid); if (sta) drv_sta_set_4addr(local, sdata, &sta->sta, params->use_4addr); - mutex_unlock(&local->sta_mtx); if (params->use_4addr) ieee80211_send_4addr_nullfunc(local, sdata); @@ -472,7 +472,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_local *local = sdata->local; struct sta_info *sta = NULL; struct ieee80211_key *key; - int err; + + lockdep_assert_wiphy(local->hw.wiphy); if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; @@ -510,8 +511,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, if (params->mode == NL80211_KEY_NO_TX) key->conf.flags |= IEEE80211_KEY_FLAG_NO_AUTO_TX; - mutex_lock(&local->sta_mtx); - if (mac_addr) { sta = sta_info_get_bss(sdata, mac_addr); /* @@ -526,8 +525,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, */ if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) { ieee80211_key_free_unused(key); - err = -ENOENT; - goto out_unlock; + return -ENOENT; } } @@ -565,12 +563,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, break; } - err = ieee80211_key_link(key, link, sta); - - out_unlock: - mutex_unlock(&local->sta_mtx); - - return err; + return ieee80211_key_link(key, link, sta); } static struct ieee80211_key * @@ -598,7 +591,7 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, if (link_id >= 0) { link_sta = rcu_dereference_check(sta->link[link_id], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (!link_sta) return NULL; } else { @@ -643,7 +636,8 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_key *key; int ret; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + mutex_lock(&local->key_mtx); key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); @@ -657,7 +651,6 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, ret = 0; out_unlock: mutex_unlock(&local->key_mtx); - mutex_unlock(&local->sta_mtx); return ret; } @@ -860,7 +853,7 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; int ret = -ENOENT; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_by_idx(sdata, idx); if (sta) { @@ -869,8 +862,6 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, sta_set_sinfo(sta, sinfo, true); } - mutex_unlock(&local->sta_mtx); - return ret; } @@ -890,7 +881,7 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; int ret = -ENOENT; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mac); if (sta) { @@ -898,8 +889,6 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, sta_set_sinfo(sta, sinfo, true); } - mutex_unlock(&local->sta_mtx); - return ret; } @@ -1800,7 +1789,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, sdata_dereference(sdata->link[link_id], sdata); struct link_sta_info *link_sta = rcu_dereference_protected(sta->link[link_id], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); /* * If there are no changes, then accept a link that doesn't exist, @@ -2034,6 +2023,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata; int err; + lockdep_assert_wiphy(local->hw.wiphy); + if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); @@ -2077,9 +2068,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, * visible yet), sta_apply_parameters (and inner functions) require * the mutex due to other paths. */ - mutex_lock(&local->sta_mtx); err = sta_apply_parameters(local, sta, params); - mutex_unlock(&local->sta_mtx); if (err) { sta_info_free(local, sta); return err; @@ -2122,13 +2111,11 @@ static int ieee80211_change_station(struct wiphy *wiphy, enum cfg80211_station_type statype; int err; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mac); - if (!sta) { - err = -ENOENT; - goto out_err; - } + if (!sta) + return -ENOENT; switch (sdata->vif.type) { case NL80211_IFTYPE_MESH_POINT: @@ -2158,22 +2145,19 @@ static int ieee80211_change_station(struct wiphy *wiphy, statype = CFG80211_STA_AP_CLIENT_UNASSOC; break; default: - err = -EOPNOTSUPP; - goto out_err; + return -EOPNOTSUPP; } err = cfg80211_check_station_change(wiphy, params, statype); if (err) - goto out_err; + return err; if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (params->vlan->ieee80211_ptr->use_4addr) { - if (vlansdata->u.vlan.sta) { - err = -EBUSY; - goto out_err; - } + if (vlansdata->u.vlan.sta) + return -EBUSY; rcu_assign_pointer(vlansdata->u.vlan.sta, sta); __ieee80211_check_fast_rx_iface(vlansdata); @@ -2208,9 +2192,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, err = sta_apply_parameters(local, sta, params); } if (err) - goto out_err; - - mutex_unlock(&local->sta_mtx); + return err; if (sdata->vif.type == NL80211_IFTYPE_STATION && params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { @@ -2219,9 +2201,6 @@ static int ieee80211_change_station(struct wiphy *wiphy, } return 0; -out_err: - mutex_unlock(&local->sta_mtx); - return err; } #ifdef CONFIG_MAC80211_MESH @@ -4566,7 +4545,8 @@ static int ieee80211_set_tid_config(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; - int ret; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!sdata->local->ops->set_tid_config) return -EOPNOTSUPP; @@ -4574,17 +4554,11 @@ static int ieee80211_set_tid_config(struct wiphy *wiphy, if (!tid_conf->peer) return drv_set_tid_config(sdata->local, sdata, NULL, tid_conf); - mutex_lock(&sdata->local->sta_mtx); sta = sta_info_get_bss(sdata, tid_conf->peer); - if (!sta) { - mutex_unlock(&sdata->local->sta_mtx); + if (!sta) return -ENOENT; - } - ret = drv_set_tid_config(sdata->local, sdata, &sta->sta, tid_conf); - mutex_unlock(&sdata->local->sta_mtx); - - return ret; + return drv_set_tid_config(sdata->local, sdata, &sta->sta, tid_conf); } static int ieee80211_reset_tid_config(struct wiphy *wiphy, @@ -4593,7 +4567,8 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; - int ret; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!sdata->local->ops->reset_tid_config) return -EOPNOTSUPP; @@ -4601,17 +4576,11 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy, if (!peer) return drv_reset_tid_config(sdata->local, sdata, NULL, tids); - mutex_lock(&sdata->local->sta_mtx); sta = sta_info_get_bss(sdata, peer); - if (!sta) { - mutex_unlock(&sdata->local->sta_mtx); + if (!sta) return -ENOENT; - } - - ret = drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids); - mutex_unlock(&sdata->local->sta_mtx); - return ret; + return drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids); } static int ieee80211_set_sar_specs(struct wiphy *wiphy, @@ -4937,13 +4906,10 @@ ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); - int ret; - mutex_lock(&sdata->local->sta_mtx); - ret = sta_add_link_station(local, sdata, params); - mutex_unlock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); - return ret; + return sta_add_link_station(local, sdata, params); } static int sta_mod_link_station(struct ieee80211_local *local, @@ -4968,13 +4934,10 @@ ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); - int ret; - mutex_lock(&sdata->local->sta_mtx); - ret = sta_mod_link_station(local, sdata, params); - mutex_unlock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); - return ret; + return sta_mod_link_station(local, sdata, params); } static int sta_del_link_station(struct ieee80211_sub_if_data *sdata, @@ -5003,13 +4966,10 @@ ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_del_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - int ret; - mutex_lock(&sdata->local->sta_mtx); - ret = sta_del_link_station(sdata, params); - mutex_unlock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); - return ret; + return sta_del_link_station(sdata, params); } static int ieee80211_set_hw_timestamp(struct wiphy *wiphy, diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 2efc6ccbfcf6..b575ae90e57f 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -288,10 +288,10 @@ static ssize_t aql_txq_limit_write(struct file *file, q_limit_low_old = local->aql_txq_limit_low[ac]; q_limit_high_old = local->aql_txq_limit_high[ac]; + wiphy_lock(local->hw.wiphy); local->aql_txq_limit_low[ac] = q_limit_low; local->aql_txq_limit_high[ac] = q_limit_high; - mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { /* If a sta has customized queue limits, keep it */ if (sta->airtime[ac].aql_limit_low == q_limit_low_old && @@ -300,7 +300,8 @@ static ssize_t aql_txq_limit_write(struct file *file, sta->airtime[ac].aql_limit_high = q_limit_high; } } - mutex_unlock(&local->sta_mtx); + wiphy_unlock(local->hw.wiphy); + return count; } diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 97043d732f2e..9fc110264808 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -569,7 +569,7 @@ int drv_change_sta_links(struct ieee80211_local *local, for_each_set_bit(link_id, &links_to_rem, IEEE80211_MLD_MAX_NUM_LINKS) { link_sta = rcu_dereference_protected(info->link[link_id], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); ieee80211_link_sta_debugfs_drv_remove(link_sta); } @@ -585,7 +585,7 @@ int drv_change_sta_links(struct ieee80211_local *local, for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { link_sta = rcu_dereference_protected(info->link[link_id], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); ieee80211_link_sta_debugfs_drv_add(link_sta); } diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index c53208321c8f..9894d2024470 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -103,7 +103,6 @@ static void ieee80211_get_stats(struct net_device *dev, */ wiphy_lock(local->hw.wiphy); - mutex_lock(&local->sta_mtx); if (sdata->vif.type == NL80211_IFTYPE_STATION) { sta = sta_info_get_bss(sdata, sdata->deflink.u.mgd.bssid); @@ -199,8 +198,6 @@ do_survey: else data[i++] = -1LL; - mutex_unlock(&local->sta_mtx); - if (WARN_ON(i != STA_STATS_LEN)) { wiphy_unlock(local->hw.wiphy); return; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index e1900077bc4b..d68650cbd5ff 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1257,7 +1257,7 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT; unsigned long exp_rsn = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { unsigned long last_active = ieee80211_sta_last_active(sta); @@ -1282,8 +1282,6 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) WARN_ON(__sta_info_destroy(sta)); } } - - mutex_unlock(&local->sta_mtx); } /* diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8d53ab9732f9..1bc921fcd52b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1435,10 +1435,9 @@ struct ieee80211_local { /* Station data */ /* - * The mutex only protects the list, hash table and - * counter, reads are done with RCU. + * The list, hash table and counter are protected + * by the wiphy mutex, reads are done with RCU. */ - struct mutex sta_mtx; spinlock_t tim_lock; unsigned long num_sta; struct list_head sta_list; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 343343a7a3e2..eb93caf0be87 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1503,12 +1503,13 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, { struct ieee80211_mgmt *mgmt = (void *)skb->data; + lockdep_assert_wiphy(local->hw.wiphy); + if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_BACK) { struct sta_info *sta; int len = skb->len; - mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mgmt->sa); if (sta) { switch (mgmt->u.action.u.addba_req.action_code) { @@ -1529,7 +1530,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, break; } } - mutex_unlock(&local->sta_mtx); } else if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_VHT) { switch (mgmt->u.action.u.vht_group_notif.action_code) { @@ -1543,7 +1543,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, band = status->band; opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; - mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mgmt->sa); if (sta) @@ -1551,7 +1550,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, &sta->deflink, opmode, band); - mutex_unlock(&local->sta_mtx); break; } case WLAN_VHT_ACTION_GROUPID_MGMT: @@ -1598,7 +1596,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, * a block-ack session was active. That cannot be * right, so terminate the session. */ - mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mgmt->sa); if (sta) { u16 tid = ieee80211_get_tid(hdr); @@ -1608,7 +1605,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, WLAN_REASON_QSTA_REQUIRE_SETUP, true); } - mutex_unlock(&local->sta_mtx); } else switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: ieee80211_sta_rx_queued_mgmt(sdata, skb); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index fbd9f9a9001c..c5bbac4393ab 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -482,7 +482,7 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, if (sta) { link_sta = rcu_dereference_protected(sta->link[link_id], - lockdep_is_held(&sta->local->sta_mtx)); + lockdep_is_held(&sta->local->hw.wiphy->mtx)); if (!link_sta) return -ENOLINK; } @@ -877,7 +877,7 @@ int ieee80211_key_link(struct ieee80211_key *key, if (link_id >= 0) { link_sta = rcu_dereference_protected(sta->link[link_id], - lockdep_is_held(&sta->local->sta_mtx)); + lockdep_is_held(&sta->local->hw.wiphy->mtx)); if (!link_sta) { ret = -ENOLINK; goto out; diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 72b5000502a5..71815b8d44af 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -455,7 +455,7 @@ int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) int ret; sdata_assert_lock(sdata); - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); mutex_lock(&local->mtx); mutex_lock(&local->key_mtx); old_active = sdata->vif.active_links; @@ -475,7 +475,6 @@ int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) } mutex_unlock(&local->key_mtx); mutex_unlock(&local->mtx); - mutex_unlock(&local->sta_mtx); return ret; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a35769dac162..4296168877e8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3207,11 +3207,10 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ifmgd->probe_send_count++; if (dst) { - mutex_lock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); sta = sta_info_get(sdata, dst); if (!WARN_ON(!sta)) ieee80211_check_fast_rx(sta); - mutex_unlock(&sdata->local->sta_mtx); } if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) { @@ -3629,7 +3628,6 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; const u8 *ap_addr = ifmgd->auth_data->ap_addr; struct sta_info *sta; - bool result = true; sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; @@ -3638,22 +3636,18 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata) run_again(sdata, ifmgd->auth_data->timeout); /* move station state to auth */ - mutex_lock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); sta = sta_info_get(sdata, ap_addr); if (!sta) { WARN_ONCE(1, "%s: STA %pM not found", sdata->name, ap_addr); - result = false; - goto out; + return false; } if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) { sdata_info(sdata, "failed moving %pM to auth\n", ap_addr); - result = false; - goto out; + return false; } -out: - mutex_unlock(&sdata->local->sta_mtx); - return result; + return true; } static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, @@ -5099,7 +5093,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, u16 valid_links = 0, dormant_links = 0; int err; - mutex_lock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* * station info was already allocated and inserted before * the association and should be available to us @@ -5147,7 +5141,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, " (assoc)" : ""); link_sta = rcu_dereference_protected(sta->link[link_id], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (WARN_ON(!link_sta)) goto out_err; @@ -5234,8 +5228,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (sdata->wdev.use_4addr) drv_sta_set_4addr(local, sdata, &sta->sta, true); - mutex_unlock(&sdata->local->sta_mtx); - ieee80211_set_associated(sdata, assoc_data, changed); /* @@ -5255,7 +5247,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, return true; out_err: eth_zero_addr(sdata->vif.cfg.ap_addr); - mutex_unlock(&sdata->local->sta_mtx); return false; } @@ -6142,16 +6133,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, le16_to_cpu(mgmt->u.beacon.capab_info), erp_valid, erp_value); - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr); if (WARN_ON(!sta)) { - mutex_unlock(&local->sta_mtx); goto free; } link_sta = rcu_dereference_protected(sta->link[link->link_id], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (WARN_ON(!link_sta)) { - mutex_unlock(&local->sta_mtx); goto free; } @@ -6167,7 +6156,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, elems->vht_operation, elems->he_operation, elems->eht_operation, elems->s1g_oper, bssid, &changed)) { - mutex_unlock(&local->sta_mtx); sdata_info(sdata, "failed to follow AP %pM bandwidth change, disconnect\n", bssid); @@ -6185,7 +6173,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, ieee80211_vht_handle_opmode(sdata, link_sta, *elems->opmode_notif, rx_status->band); - mutex_unlock(&local->sta_mtx); changed |= ieee80211_handle_pwr_constr(link, chan, mgmt, elems->country_elem, diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index e52dbf7d14fb..c1fa26e09479 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -40,13 +40,12 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) if (ieee80211_hw_check(hw, AMPDU_AGGREGATION) && !(wowlan && wowlan->any)) { - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sta, &local->sta_list, list) { set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions( sta, AGG_STOP_LOCAL_REQUEST); } - mutex_unlock(&local->sta_mtx); } /* keep sched_scan only in case of 'any' trigger */ @@ -119,12 +118,11 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) local->quiescing = false; local->wowlan = false; if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) { - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sta, &local->sta_list, list) { clear_sta_flag(sta, WLAN_STA_BLOCK_BA); } - mutex_unlock(&local->sta_mtx); } ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e751cda5eef6..6fcd2a717922 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4669,7 +4669,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct sta_info *sta; - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sta, &local->sta_list, list) { if (sdata != sta->sdata && @@ -4683,9 +4683,9 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + __ieee80211_check_fast_rx_iface(sdata); - mutex_unlock(&local->sta_mtx); } static void ieee80211_rx_8023(struct ieee80211_rx_data *rx, diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c index c1f964e9991c..d4ed0c0a335c 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c @@ -2,6 +2,7 @@ /* * S1G handling * Copyright(c) 2020 Adapt-IP + * Copyright (C) 2023 Intel Corporation */ #include #include @@ -153,11 +154,11 @@ void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mgmt->sa); if (!sta) - goto out; + return; switch (mgmt->u.action.u.s1g.action_code) { case WLAN_S1G_TWT_SETUP: @@ -169,9 +170,6 @@ void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata, default: break; } - -out: - mutex_unlock(&local->sta_mtx); } void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, @@ -181,11 +179,11 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mgmt->da); if (!sta) - goto out; + return; switch (mgmt->u.action.u.s1g.action_code) { case WLAN_S1G_TWT_SETUP: @@ -195,7 +193,4 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, default: break; } - -out: - mutex_unlock(&local->sta_mtx); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index b68bf77b05d0..eee541251c48 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -88,7 +88,6 @@ static const struct rhashtable_params link_sta_rht_params = { .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE, }; -/* Caller must hold local->sta_mtx */ static int sta_info_hash_del(struct ieee80211_local *local, struct sta_info *sta) { @@ -99,19 +98,19 @@ static int sta_info_hash_del(struct ieee80211_local *local, static int link_sta_info_hash_add(struct ieee80211_local *local, struct link_sta_info *link_sta) { - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + return rhltable_insert(&local->link_sta_hash, - &link_sta->link_hash_node, - link_sta_rht_params); + &link_sta->link_hash_node, link_sta_rht_params); } static int link_sta_info_hash_del(struct ieee80211_local *local, struct link_sta_info *link_sta) { - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + return rhltable_remove(&local->link_sta_hash, - &link_sta->link_hash_node, - link_sta_rht_params); + &link_sta->link_hash_node, link_sta_rht_params); } static void __cleanup_single_sta(struct sta_info *sta) @@ -331,7 +330,7 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, int i = 0; list_for_each_entry_rcu(sta, &local->sta_list, list, - lockdep_is_held(&local->sta_mtx)) { + lockdep_is_held(&local->hw.wiphy->mtx)) { if (sdata != sta->sdata) continue; if (i < idx) { @@ -355,10 +354,9 @@ static void sta_remove_link(struct sta_info *sta, unsigned int link_id, struct sta_link_alloc *alloc = NULL; struct link_sta_info *link_sta; - link_sta = rcu_access_pointer(sta->link[link_id]); - if (link_sta != &sta->deflink) - lockdep_assert_held(&sta->local->sta_mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); + link_sta = rcu_access_pointer(sta->link[link_id]); if (WARN_ON(!link_sta)) return; @@ -437,7 +435,6 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) kfree(sta); } -/* Caller must hold local->sta_mtx */ static int sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { @@ -717,6 +714,8 @@ static int sta_info_insert_check(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + /* * Can't be a WARN_ON because it can be triggered through a race: * something inserts a STA (on one CPU) without holding the RTNL @@ -734,7 +733,6 @@ static int sta_info_insert_check(struct sta_info *sta) * for correctness. */ rcu_read_lock(); - lockdep_assert_held(&sdata->local->sta_mtx); if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) && ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) { rcu_read_unlock(); @@ -808,11 +806,6 @@ ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata) } } -/* - * should be called with sta_mtx locked - * this function replaces the mutex lock - * with a RCU lock - */ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; @@ -820,7 +813,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) struct station_info *sinfo = NULL; int err = 0; - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { @@ -884,7 +877,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) struct link_sta_info *link_sta; link_sta = rcu_dereference_protected(sta->link[i], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (!link_sta) continue; @@ -906,7 +899,6 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) /* move reference to rcu-protected */ rcu_read_lock(); - mutex_unlock(&local->sta_mtx); if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); @@ -922,7 +914,6 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) synchronize_net(); out_cleanup: cleanup_single_sta(sta); - mutex_unlock(&local->sta_mtx); kfree(sinfo); rcu_read_lock(); return err; @@ -934,13 +925,11 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) int err; might_sleep(); - - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); err = sta_info_insert_check(sta); if (err) { sta_info_free(local, sta); - mutex_unlock(&local->sta_mtx); rcu_read_lock(); return err; } @@ -1219,7 +1208,7 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) local = sta->local; sdata = sta->sdata; - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* * Before removing the station from the driver and @@ -1244,7 +1233,7 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) continue; link_sta = rcu_dereference_protected(sta->link[i], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); link_sta_info_hash_del(local, link_sta); } @@ -1398,7 +1387,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc) */ might_sleep(); - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { ret = _sta_info_move_state(sta, IEEE80211_STA_ASSOC, recalc); @@ -1474,28 +1463,22 @@ int __must_check __sta_info_destroy(struct sta_info *sta) int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct sta_info *sta; - int ret; - mutex_lock(&sdata->local->sta_mtx); - sta = sta_info_get(sdata, addr); - ret = __sta_info_destroy(sta); - mutex_unlock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); - return ret; + sta = sta_info_get(sdata, addr); + return __sta_info_destroy(sta); } int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct sta_info *sta; - int ret; - mutex_lock(&sdata->local->sta_mtx); - sta = sta_info_get_bss(sdata, addr); - ret = __sta_info_destroy(sta); - mutex_unlock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); - return ret; + sta = sta_info_get_bss(sdata, addr); + return __sta_info_destroy(sta); } static void sta_info_cleanup(struct timer_list *t) @@ -1535,7 +1518,6 @@ int sta_info_init(struct ieee80211_local *local) } spin_lock_init(&local->tim_lock); - mutex_init(&local->sta_mtx); INIT_LIST_HEAD(&local->sta_list); timer_setup(&local->sta_cleanup, sta_info_cleanup, 0); @@ -1558,11 +1540,11 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans) int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); WARN_ON(vlans && sdata->vif.type != NL80211_IFTYPE_AP); WARN_ON(vlans && !sdata->bss); - mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { if (sdata == sta->sdata || (vlans && sdata->bss == sta->sdata->bss)) { @@ -1586,7 +1568,6 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans) if (!support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sdata); } - mutex_unlock(&local->sta_mtx); return ret; } @@ -1597,7 +1578,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { unsigned long last_active = ieee80211_sta_last_active(sta); @@ -1616,8 +1597,6 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, WARN_ON(__sta_info_destroy(sta)); } } - - mutex_unlock(&local->sta_mtx); } struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, @@ -2872,7 +2851,7 @@ int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id) struct sta_link_alloc *alloc; int ret; - lockdep_assert_held(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* must represent an MLD from the start */ if (WARN_ON(!sta->sta.valid_links)) @@ -2901,7 +2880,7 @@ int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id) void ieee80211_sta_free_link(struct sta_info *sta, unsigned int link_id) { - lockdep_assert_held(&sta->sdata->local->sta_mtx); + lockdep_assert_wiphy(sta->sdata->local->hw.wiphy); sta_remove_link(sta, link_id, false); } @@ -2915,7 +2894,7 @@ int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id) int ret; link_sta = rcu_dereference_protected(sta->link[link_id], - lockdep_is_held(&sdata->local->sta_mtx)); + lockdep_is_held(&sdata->local->hw.wiphy->mtx)); if (WARN_ON(old_links == new_links || !link_sta)) return -EINVAL; @@ -2959,7 +2938,7 @@ void ieee80211_sta_remove_link(struct sta_info *sta, unsigned int link_id) struct ieee80211_sub_if_data *sdata = sta->sdata; u16 old_links = sta->sta.valid_links; - lockdep_assert_held(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); sta->sta.valid_links &= ~BIT(link_id); @@ -2998,7 +2977,7 @@ bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); - return lockdep_is_held(&sta->local->sta_mtx); + return lockdep_is_held(&sta->local->hw.wiphy->mtx); } EXPORT_SYMBOL(lockdep_sta_mutex_held); #endif diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 1deab7e33a7c..c3cce280bc0f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -827,7 +827,7 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); -/* user must hold sta_mtx or be in RCU critical section */ +/* user must hold wiphy mutex or be in RCU critical section */ struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local, const u8 *sta_addr, const u8 *vif_addr); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index fafbcef49ec0..e7bf25e3be07 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1479,10 +1479,9 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, break; } - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get(sdata, peer); if (!sta) { - mutex_unlock(&local->sta_mtx); ret = -ENOLINK; break; } @@ -1491,7 +1490,6 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, iee80211_tdls_recalc_ht_protection(sdata, sta); set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); - mutex_unlock(&local->sta_mtx); WARN_ON_ONCE(is_zero_ether_addr(sdata->u.mgd.tdls_peer) || !ether_addr_equal(sdata->u.mgd.tdls_peer, peer)); @@ -1514,9 +1512,8 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, ret = sta_info_destroy_addr(sdata, peer); - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); iee80211_tdls_recalc_ht_protection(sdata, NULL); - mutex_unlock(&local->sta_mtx); iee80211_tdls_recalc_chanctx(sdata, NULL); break; @@ -1674,7 +1671,7 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev, /* this may work, but is untested */ return -EOPNOTSUPP; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get(sdata, addr); if (!sta) { tdls_dbg(sdata, @@ -1704,7 +1701,6 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev, set_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL); out: - mutex_unlock(&local->sta_mtx); dev_kfree_skb_any(skb); return ret; } @@ -1718,26 +1714,24 @@ ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy, struct ieee80211_local *local = sdata->local; struct sta_info *sta; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + sta = sta_info_get(sdata, addr); if (!sta) { tdls_dbg(sdata, "Invalid TDLS peer %pM for channel switch cancel\n", addr); - goto out; + return; } if (!test_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL)) { tdls_dbg(sdata, "TDLS channel switch not initiated by %pM\n", addr); - goto out; + return; } drv_tdls_cancel_channel_switch(local, sdata, &sta->sta); clear_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL); - -out: - mutex_unlock(&local->sta_mtx); } static struct sk_buff * @@ -1808,7 +1802,7 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, return -EINVAL; } - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get(sdata, tf->sa); if (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) { tdls_dbg(sdata, "TDLS chan switch from non-peer sta %pM\n", @@ -1871,7 +1865,6 @@ call_drv: tf->sa, params.status); out: - mutex_unlock(&local->sta_mtx); dev_kfree_skb_any(params.tmpl_skb); kfree(elems); return ret; @@ -1985,7 +1978,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, goto free; } - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get(sdata, tf->sa); if (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) { tdls_dbg(sdata, "TDLS chan switch from non-peer sta %pM\n", @@ -2032,7 +2025,6 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, tf->sa, params.chandef->chan->center_freq, params.chandef->width); out: - mutex_unlock(&local->sta_mtx); dev_kfree_skb_any(params.tmpl_skb); free: kfree(elems); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5cff936c6211..1ff7d2368c32 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5927,7 +5927,7 @@ int ieee80211_reserve_tid(struct ieee80211_sta *pubsta, u8 tid) int ret; u32 queues; - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* only some cases are supported right now */ switch (sdata->vif.type) { @@ -5988,7 +5988,7 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *pubsta, u8 tid) struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sub_if_data *sdata = sta->sdata; - lockdep_assert_held(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* only some cases are supported right now */ switch (sdata->vif.type) { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index bb4c7dd03758..6187cb54a876 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2411,7 +2411,7 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) struct sta_info *sta; /* add STAs back */ - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sta, &local->sta_list, list) { enum ieee80211_sta_state state; @@ -2423,7 +2423,6 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) WARN_ON(drv_sta_state(local, sta->sdata, sta, state, state + 1)); } - mutex_unlock(&local->sta_mtx); } static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) @@ -2902,7 +2901,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) * are active. This is really a workaround though. */ if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) { - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sta, &local->sta_list, list) { if (!local->resuming) @@ -2910,8 +2909,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) sta, AGG_STOP_LOCAL_REQUEST); clear_sta_flag(sta, WLAN_STA_BLOCK_BA); } - - mutex_unlock(&local->sta_mtx); } /* -- cgit v1.2.3 From 2a8b665e6bcc3d554beb0d7cc1e4fd78dd94b55d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 14:00:02 +0200 Subject: wifi: mac80211: remove key_mtx We now hold the wiphy mutex everywhere that we use or needed the key_mtx, so we don't need this mutex any more. Remove it. Most of this change was done automatically with spatch. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 34 ++++------- net/mac80211/debugfs_key.c | 20 +++---- net/mac80211/ieee80211_i.h | 6 -- net/mac80211/iface.c | 4 +- net/mac80211/key.c | 144 ++++++++++++++++++++------------------------- net/mac80211/key.h | 6 -- net/mac80211/link.c | 3 +- net/mac80211/main.c | 1 - net/mac80211/util.c | 4 +- 9 files changed, 90 insertions(+), 132 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 473de1882ded..261bed3bc000 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -452,13 +452,11 @@ static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata, if (sta->ptk_idx == key_idx) return 0; - mutex_lock(&local->key_mtx); - key = key_mtx_dereference(local, sta->ptk[key_idx]); + key = wiphy_dereference(local->hw.wiphy, sta->ptk[key_idx]); if (key && key->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) ret = ieee80211_set_tx_key(key); - mutex_unlock(&local->key_mtx); return ret; } @@ -599,30 +597,29 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, } if (pairwise && key_idx < NUM_DEFAULT_KEYS) - return rcu_dereference_check_key_mtx(local, - sta->ptk[key_idx]); + return wiphy_dereference(local->hw.wiphy, + sta->ptk[key_idx]); if (!pairwise && key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) - return rcu_dereference_check_key_mtx(local, - link_sta->gtk[key_idx]); + return wiphy_dereference(local->hw.wiphy, + link_sta->gtk[key_idx]); return NULL; } if (pairwise && key_idx < NUM_DEFAULT_KEYS) - return rcu_dereference_check_key_mtx(local, - sdata->keys[key_idx]); + return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]); - key = rcu_dereference_check_key_mtx(local, link->gtk[key_idx]); + key = wiphy_dereference(local->hw.wiphy, link->gtk[key_idx]); if (key) return key; /* or maybe it was a WEP key */ if (key_idx < NUM_DEFAULT_KEYS) - return rcu_dereference_check_key_mtx(local, sdata->keys[key_idx]); + return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]); return NULL; } @@ -634,25 +631,16 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; - int ret; lockdep_assert_wiphy(local->hw.wiphy); - mutex_lock(&local->key_mtx); - key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); - if (!key) { - ret = -ENOENT; - goto out_unlock; - } + if (!key) + return -ENOENT; ieee80211_key_free(key, sdata->vif.type == NL80211_IFTYPE_STATION); - ret = 0; - out_unlock: - mutex_unlock(&local->key_mtx); - - return ret; + return 0; } static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 16a04330e7dc..7e54da508765 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -4,7 +4,7 @@ * Copyright (c) 2006 Jiri Benc * Copyright 2007 Johannes Berg * Copyright (C) 2015 Intel Deutschland GmbH - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation */ #include @@ -378,14 +378,14 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) if (!sdata->vif.debugfs_dir) return; - lockdep_assert_held(&sdata->local->key_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); debugfs_remove(sdata->debugfs.default_unicast_key); sdata->debugfs.default_unicast_key = NULL; if (sdata->default_unicast_key) { - key = key_mtx_dereference(sdata->local, - sdata->default_unicast_key); + key = wiphy_dereference(sdata->local->hw.wiphy, + sdata->default_unicast_key); sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_unicast_key = debugfs_create_symlink("default_unicast_key", @@ -396,8 +396,8 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) sdata->debugfs.default_multicast_key = NULL; if (sdata->deflink.default_multicast_key) { - key = key_mtx_dereference(sdata->local, - sdata->deflink.default_multicast_key); + key = wiphy_dereference(sdata->local->hw.wiphy, + sdata->deflink.default_multicast_key); sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_multicast_key = debugfs_create_symlink("default_multicast_key", @@ -413,8 +413,8 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) if (!sdata->vif.debugfs_dir) return; - key = key_mtx_dereference(sdata->local, - sdata->deflink.default_mgmt_key); + key = wiphy_dereference(sdata->local->hw.wiphy, + sdata->deflink.default_mgmt_key); if (key) { sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_mgmt_key = @@ -442,8 +442,8 @@ ieee80211_debugfs_key_add_beacon_default(struct ieee80211_sub_if_data *sdata) if (!sdata->vif.debugfs_dir) return; - key = key_mtx_dereference(sdata->local, - sdata->deflink.default_beacon_key); + key = wiphy_dereference(sdata->local->hw.wiphy, + sdata->deflink.default_beacon_key); if (key) { sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_beacon_key = diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1bc921fcd52b..a0bbd7070974 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1466,12 +1466,6 @@ struct ieee80211_local { struct list_head mon_list; /* only that are IFF_UP && !cooked */ struct mutex iflist_mtx; - /* - * Key mutex, protects sdata's key_list and sta_info's - * key pointers and ptk_idx (write access, they're RCU.) - */ - struct mutex key_mtx; - /* mutex for scan and work locking */ struct mutex mtx; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index eb93caf0be87..385513315f8b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1245,6 +1245,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) int res; u32 hw_reconf_flags = 0; + lockdep_assert_wiphy(local->hw.wiphy); + switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: { struct ieee80211_sub_if_data *master; @@ -1271,10 +1273,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) sizeof(sdata->vif.hw_queue)); sdata->vif.bss_conf.chandef = master->vif.bss_conf.chandef; - mutex_lock(&local->key_mtx); sdata->crypto_tx_tailroom_needed_cnt += master->crypto_tx_tailroom_needed_cnt; - mutex_unlock(&local->key_mtx); break; } diff --git a/net/mac80211/key.c b/net/mac80211/key.c index c5bbac4393ab..44053951a1da 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -53,11 +53,6 @@ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; -static void assert_key_lock(struct ieee80211_local *local) -{ - lockdep_assert_held(&local->key_mtx); -} - static void update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) { @@ -67,7 +62,7 @@ update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) return; /* crypto_tx_tailroom_needed_cnt is protected by this */ - assert_key_lock(sdata->local); + lockdep_assert_wiphy(sdata->local->hw.wiphy); rcu_read_lock(); @@ -98,7 +93,7 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net */ - assert_key_lock(sdata->local); + lockdep_assert_wiphy(sdata->local->hw.wiphy); update_vlan_tailroom_need_count(sdata, 1); @@ -114,7 +109,7 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) { - assert_key_lock(sdata->local); + lockdep_assert_wiphy(sdata->local->hw.wiphy); WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta); @@ -129,6 +124,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(key->local->hw.wiphy); if (key->flags & KEY_FLAG_TAINTED) { /* If we get here, it's during resume and the key is @@ -151,8 +147,6 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (!key->local->ops->set_key) goto out_unsupported; - assert_key_lock(key->local); - sta = key->sta; /* @@ -242,14 +236,14 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) if (!key || !key->local->ops->set_key) return; - assert_key_lock(key->local); - if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) return; sta = key->sta; sdata = key->sdata; + lockdep_assert_wiphy(key->local->hw.wiphy); + if (key->conf.link_id >= 0 && sdata->vif.active_links && !(sdata->vif.active_links & BIT(key->conf.link_id))) return; @@ -275,7 +269,7 @@ static int _ieee80211_set_tx_key(struct ieee80211_key *key, bool force) struct sta_info *sta = key->sta; struct ieee80211_local *local = key->local; - assert_key_lock(local); + lockdep_assert_wiphy(local->hw.wiphy); set_sta_flag(sta, WLAN_STA_USES_ENCRYPTION); @@ -300,7 +294,7 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old, struct sta_info *sta = new->sta; int i; - assert_key_lock(local); + lockdep_assert_wiphy(local->hw.wiphy); if (new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) { /* Extended Key ID key install, initial one or rekey */ @@ -358,12 +352,14 @@ static void __ieee80211_set_default_key(struct ieee80211_link_data *link, struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; - assert_key_lock(sdata->local); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (idx >= 0 && idx < NUM_DEFAULT_KEYS) { - key = key_mtx_dereference(sdata->local, sdata->keys[idx]); + key = wiphy_dereference(sdata->local->hw.wiphy, + sdata->keys[idx]); if (!key) - key = key_mtx_dereference(sdata->local, link->gtk[idx]); + key = wiphy_dereference(sdata->local->hw.wiphy, + link->gtk[idx]); } if (uni) { @@ -382,9 +378,9 @@ static void __ieee80211_set_default_key(struct ieee80211_link_data *link, void ieee80211_set_default_key(struct ieee80211_link_data *link, int idx, bool uni, bool multi) { - mutex_lock(&link->sdata->local->key_mtx); + lockdep_assert_wiphy(link->sdata->local->hw.wiphy); + __ieee80211_set_default_key(link, idx, uni, multi); - mutex_unlock(&link->sdata->local->key_mtx); } static void @@ -393,11 +389,12 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx) struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; - assert_key_lock(sdata->local); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (idx >= NUM_DEFAULT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) - key = key_mtx_dereference(sdata->local, link->gtk[idx]); + key = wiphy_dereference(sdata->local->hw.wiphy, + link->gtk[idx]); rcu_assign_pointer(link->default_mgmt_key, key); @@ -407,9 +404,9 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx) void ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx) { - mutex_lock(&link->sdata->local->key_mtx); + lockdep_assert_wiphy(link->sdata->local->hw.wiphy); + __ieee80211_set_default_mgmt_key(link, idx); - mutex_unlock(&link->sdata->local->key_mtx); } static void @@ -418,12 +415,13 @@ __ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx) struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; - assert_key_lock(sdata->local); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) - key = key_mtx_dereference(sdata->local, link->gtk[idx]); + key = wiphy_dereference(sdata->local->hw.wiphy, + link->gtk[idx]); rcu_assign_pointer(link->default_beacon_key, key); @@ -433,9 +431,9 @@ __ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx) void ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx) { - mutex_lock(&link->sdata->local->key_mtx); + lockdep_assert_wiphy(link->sdata->local->hw.wiphy); + __ieee80211_set_default_beacon_key(link, idx); - mutex_unlock(&link->sdata->local->key_mtx); } static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, @@ -452,6 +450,8 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, bool defunikey, defmultikey, defmgmtkey, defbeaconkey; bool is_wep; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + /* caller must provide at least one old/new */ if (WARN_ON(!new && !old)) return 0; @@ -510,12 +510,10 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, ret = ieee80211_key_enable_hw_accel(new); } } else { - if (!new->local->wowlan) { + if (!new->local->wowlan) ret = ieee80211_key_enable_hw_accel(new); - } else { - assert_key_lock(new->local); + else new->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; - } } if (ret) @@ -541,17 +539,17 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, ieee80211_check_fast_rx(sta); } else { defunikey = old && - old == key_mtx_dereference(sdata->local, - sdata->default_unicast_key); + old == wiphy_dereference(sdata->local->hw.wiphy, + sdata->default_unicast_key); defmultikey = old && - old == key_mtx_dereference(sdata->local, - link->default_multicast_key); + old == wiphy_dereference(sdata->local->hw.wiphy, + link->default_multicast_key); defmgmtkey = old && - old == key_mtx_dereference(sdata->local, - link->default_mgmt_key); + old == wiphy_dereference(sdata->local->hw.wiphy, + link->default_mgmt_key); defbeaconkey = old && - old == key_mtx_dereference(sdata->local, - link->default_beacon_key); + old == wiphy_dereference(sdata->local->hw.wiphy, + link->default_beacon_key); if (defunikey && !new) __ieee80211_set_default_key(link, -1, true, false); @@ -855,22 +853,24 @@ int ieee80211_key_link(struct ieee80211_key *key, * can cause warnings to appear. */ bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION; - int ret = -EOPNOTSUPP; + int ret; - mutex_lock(&sdata->local->key_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (sta && pairwise) { struct ieee80211_key *alt_key; - old_key = key_mtx_dereference(sdata->local, sta->ptk[idx]); - alt_key = key_mtx_dereference(sdata->local, sta->ptk[idx ^ 1]); + old_key = wiphy_dereference(sdata->local->hw.wiphy, + sta->ptk[idx]); + alt_key = wiphy_dereference(sdata->local->hw.wiphy, + sta->ptk[idx ^ 1]); /* The rekey code assumes that the old and new key are using * the same cipher. Enforce the assumption for pairwise keys. */ if ((alt_key && alt_key->conf.cipher != key->conf.cipher) || (old_key && old_key->conf.cipher != key->conf.cipher)) - goto out; + return -EOPNOTSUPP; } else if (sta) { struct link_sta_info *link_sta = &sta->deflink; int link_id = key->conf.link_id; @@ -878,26 +878,25 @@ int ieee80211_key_link(struct ieee80211_key *key, if (link_id >= 0) { link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&sta->local->hw.wiphy->mtx)); - if (!link_sta) { - ret = -ENOLINK; - goto out; - } + if (!link_sta) + return -ENOLINK; } - old_key = key_mtx_dereference(sdata->local, link_sta->gtk[idx]); + old_key = wiphy_dereference(sdata->local->hw.wiphy, + link_sta->gtk[idx]); } else { if (idx < NUM_DEFAULT_KEYS) - old_key = key_mtx_dereference(sdata->local, - sdata->keys[idx]); + old_key = wiphy_dereference(sdata->local->hw.wiphy, + sdata->keys[idx]); if (!old_key) - old_key = key_mtx_dereference(sdata->local, - link->gtk[idx]); + old_key = wiphy_dereference(sdata->local->hw.wiphy, + link->gtk[idx]); } /* Non-pairwise keys must also not switch the cipher on rekey */ if (!pairwise) { if (old_key && old_key->conf.cipher != key->conf.cipher) - goto out; + return -EOPNOTSUPP; } /* @@ -906,8 +905,7 @@ int ieee80211_key_link(struct ieee80211_key *key, */ if (ieee80211_key_identical(sdata, old_key, key)) { ieee80211_key_free_unused(key); - ret = 0; - goto out; + return 0; } key->local = sdata->local; @@ -931,9 +929,6 @@ int ieee80211_key_link(struct ieee80211_key *key, ieee80211_key_free(key, delay_tailroom); } - out: - mutex_unlock(&sdata->local->key_mtx); - return ret; } @@ -959,8 +954,6 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata) lockdep_assert_wiphy(sdata->local->hw.wiphy); - mutex_lock(&sdata->local->key_mtx); - sdata->crypto_tx_tailroom_needed_cnt = 0; sdata->crypto_tx_tailroom_pending_dec = 0; @@ -977,8 +970,6 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata) ieee80211_key_enable_hw_accel(key); } } - - mutex_unlock(&sdata->local->key_mtx); } void ieee80211_iter_keys(struct ieee80211_hw *hw, @@ -996,7 +987,6 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, lockdep_assert_wiphy(hw->wiphy); - mutex_lock(&local->key_mtx); if (vif) { sdata = vif_to_sdata(vif); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) @@ -1011,7 +1001,6 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, key->sta ? &key->sta->sta : NULL, &key->conf, iter_data); } - mutex_unlock(&local->key_mtx); } EXPORT_SYMBOL(ieee80211_iter_keys); @@ -1091,7 +1080,8 @@ void ieee80211_remove_link_keys(struct ieee80211_link_data *link, struct ieee80211_local *local = sdata->local; struct ieee80211_key *key, *tmp; - mutex_lock(&local->key_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + list_for_each_entry_safe(key, tmp, &sdata->key_list, list) { if (key->conf.link_id != link->link_id) continue; @@ -1100,7 +1090,6 @@ void ieee80211_remove_link_keys(struct ieee80211_link_data *link, key, NULL); list_add_tail(&key->list, keys); } - mutex_unlock(&local->key_mtx); } void ieee80211_free_key_list(struct ieee80211_local *local, @@ -1108,10 +1097,10 @@ void ieee80211_free_key_list(struct ieee80211_local *local, { struct ieee80211_key *key, *tmp; - mutex_lock(&local->key_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + list_for_each_entry_safe(key, tmp, keys, list) __ieee80211_key_destroy(key, false); - mutex_unlock(&local->key_mtx); } void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, @@ -1126,7 +1115,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, wiphy_delayed_work_cancel(local->hw.wiphy, &sdata->dec_tailroom_needed_wk); - mutex_lock(&local->key_mtx); + lockdep_assert_wiphy(local->hw.wiphy); ieee80211_free_keys_iface(sdata, &keys); @@ -1159,8 +1148,6 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || vlan->crypto_tx_tailroom_pending_dec); } - - mutex_unlock(&local->key_mtx); } void ieee80211_free_sta_keys(struct ieee80211_local *local, @@ -1169,9 +1156,10 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, struct ieee80211_key *key; int i; - mutex_lock(&local->key_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + for (i = 0; i < ARRAY_SIZE(sta->deflink.gtk); i++) { - key = key_mtx_dereference(local, sta->deflink.gtk[i]); + key = wiphy_dereference(local->hw.wiphy, sta->deflink.gtk[i]); if (!key) continue; ieee80211_key_replace(key->sdata, NULL, key->sta, @@ -1182,7 +1170,7 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, } for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - key = key_mtx_dereference(local, sta->ptk[i]); + key = wiphy_dereference(local->hw.wiphy, sta->ptk[i]); if (!key) continue; ieee80211_key_replace(key->sdata, NULL, key->sta, @@ -1191,8 +1179,6 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, __ieee80211_key_destroy(key, key->sdata->vif.type == NL80211_IFTYPE_STATION); } - - mutex_unlock(&local->key_mtx); } void ieee80211_delayed_tailroom_dec(struct wiphy *wiphy, @@ -1219,11 +1205,9 @@ void ieee80211_delayed_tailroom_dec(struct wiphy *wiphy, * within an ESS this usually won't happen. */ - mutex_lock(&sdata->local->key_mtx); decrease_tailroom_need_count(sdata, sdata->crypto_tx_tailroom_pending_dec); sdata->crypto_tx_tailroom_pending_dec = 0; - mutex_unlock(&sdata->local->key_mtx); } void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid, @@ -1352,7 +1336,7 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf) key = container_of(keyconf, struct ieee80211_key, conf); - assert_key_lock(key->local); + lockdep_assert_wiphy(key->local->hw.wiphy); /* * if key was uploaded, we assume the driver will/has remove(d) diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 231a069d2975..1fa0f4f78962 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -168,12 +168,6 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata); int ieee80211_key_switch_links(struct ieee80211_sub_if_data *sdata, unsigned long del_links_mask, unsigned long add_links_mask); - -#define key_mtx_dereference(local, ref) \ - rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) -#define rcu_dereference_check_key_mtx(local, ref) \ - rcu_dereference_check(ref, lockdep_is_held(&((local)->key_mtx))) - void ieee80211_delayed_tailroom_dec(struct wiphy *wiphy, struct wiphy_work *wk); diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 71815b8d44af..226c852fd5ee 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -456,8 +456,8 @@ int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) sdata_assert_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); + mutex_lock(&local->mtx); - mutex_lock(&local->key_mtx); old_active = sdata->vif.active_links; if (old_active & active_links) { /* @@ -473,7 +473,6 @@ int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) /* otherwise switch directly */ ret = _ieee80211_set_active_links(sdata, active_links); } - mutex_unlock(&local->key_mtx); mutex_unlock(&local->mtx); return ret; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 37714dcf9f06..a30eb5ca6369 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -806,7 +806,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, mutex_init(&local->iflist_mtx); mutex_init(&local->mtx); - mutex_init(&local->key_mtx); spin_lock_init(&local->filter_lock); spin_lock_init(&local->rx_path_lock); spin_lock_init(&local->queue_stop_reason_lock); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6187cb54a876..279a65198885 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2987,6 +2987,8 @@ static void ieee80211_reconfig_disconnect(struct ieee80211_vif *vif, u8 flag) sdata = vif_to_sdata(vif); local = sdata->local; + lockdep_assert_wiphy(local->hw.wiphy); + if (WARN_ON(flag & IEEE80211_SDATA_DISCONNECT_RESUME && !local->resuming)) return; @@ -3000,10 +3002,8 @@ static void ieee80211_reconfig_disconnect(struct ieee80211_vif *vif, u8 flag) sdata->flags |= flag; - mutex_lock(&local->key_mtx); list_for_each_entry(key, &sdata->key_list, list) key->flags |= KEY_FLAG_TAINTED; - mutex_unlock(&local->key_mtx); } void ieee80211_hw_restart_disconnect(struct ieee80211_vif *vif) -- cgit v1.2.3 From 5435af6e6ac0132178b13d57ffc756dab5eef626 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 14:00:03 +0200 Subject: wifi: mac80211: remove chanctx_mtx We now hold the wiphy mutex everywhere that we use or needed the chanctx_mtx, so we don't need this mutex any more. Remove it. Most of this change was done automatically with spatch. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 18 +++---- net/mac80211/chan.c | 131 ++++++++++++++++++++------------------------- net/mac80211/ibss.c | 4 +- net/mac80211/ieee80211_i.h | 7 ++- net/mac80211/iface.c | 7 +-- net/mac80211/main.c | 1 - net/mac80211/mlme.c | 22 ++++---- net/mac80211/tdls.c | 19 ++++--- net/mac80211/util.c | 46 +++++++--------- 9 files changed, 113 insertions(+), 142 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 261bed3bc000..d27beaf8a8c4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -261,9 +261,9 @@ static int ieee80211_start_p2p_device(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; - mutex_lock(&sdata->local->chanctx_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); - mutex_unlock(&sdata->local->chanctx_mtx); if (ret < 0) return ret; @@ -283,9 +283,9 @@ static int ieee80211_start_nan(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; - mutex_lock(&sdata->local->chanctx_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); - mutex_unlock(&sdata->local->chanctx_mtx); if (ret < 0) return ret; @@ -3619,7 +3619,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) sdata_assert_lock(sdata); lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* * using reservation isn't immediate as it may be deferred until later @@ -3692,7 +3692,7 @@ void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) sdata_lock(sdata); mutex_lock(&local->mtx); - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!sdata->vif.bss_conf.csa_active) @@ -3704,7 +3704,6 @@ void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) ieee80211_csa_finalize(sdata); unlock: - mutex_unlock(&local->chanctx_mtx); mutex_unlock(&local->mtx); sdata_unlock(sdata); } @@ -3864,6 +3863,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, sdata_assert_lock(sdata); lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; @@ -3879,9 +3879,8 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, if (sdata->vif.bss_conf.csa_active) return -EBUSY; - mutex_lock(&local->chanctx_mtx); conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (!conf) { err = -EBUSY; goto out; @@ -3955,7 +3954,6 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, } out: - mutex_unlock(&local->chanctx_mtx); return err; } diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index f967ed9d2a3a..31720c654b83 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -18,7 +18,7 @@ static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local, struct ieee80211_link_data *link; int num = 0; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(link, &ctx->assigned_links, assigned_chanctx_list) num++; @@ -32,7 +32,7 @@ static int ieee80211_chanctx_num_reserved(struct ieee80211_local *local, struct ieee80211_link_data *link; int num = 0; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list) num++; @@ -52,7 +52,7 @@ static int ieee80211_num_chanctx(struct ieee80211_local *local) struct ieee80211_chanctx *ctx; int num = 0; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(ctx, &local->chanctx_list, list) num++; @@ -62,7 +62,8 @@ static int ieee80211_num_chanctx(struct ieee80211_local *local) static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local) { - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + return ieee80211_num_chanctx(local) < ieee80211_max_num_channels(local); } @@ -73,7 +74,7 @@ ieee80211_link_get_chanctx(struct ieee80211_link_data *link) struct ieee80211_chanctx_conf *conf; conf = rcu_dereference_protected(link->conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (!conf) return NULL; @@ -87,7 +88,7 @@ ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local, { struct ieee80211_link_data *link; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list) { @@ -110,7 +111,7 @@ ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local, { struct ieee80211_link_data *link; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(link, &ctx->assigned_links, assigned_chanctx_list) { @@ -136,7 +137,7 @@ ieee80211_chanctx_combined_chandef(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct cfg80211_chan_def *compat) { - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); compat = ieee80211_chanctx_reserved_chandef(local, ctx, compat); if (!compat) @@ -154,7 +155,7 @@ ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct cfg80211_chan_def *def) { - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (ieee80211_chanctx_combined_chandef(local, ctx, def)) return true; @@ -173,7 +174,7 @@ ieee80211_find_reservation_chanctx(struct ieee80211_local *local, { struct ieee80211_chanctx *ctx; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (mode == IEEE80211_CHANCTX_EXCLUSIVE) return NULL; @@ -361,7 +362,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, enum nl80211_chan_width max_bw; struct cfg80211_chan_def min_def; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* don't optimize non-20MHz based and radar_enabled confs */ if (ctx->conf.def.width == NL80211_CHAN_WIDTH_5 || @@ -537,7 +538,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local, { struct ieee80211_chanctx *ctx; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (mode == IEEE80211_CHANCTX_EXCLUSIVE) return NULL; @@ -602,7 +603,7 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata; bool required = false; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); lockdep_assert_held(&local->mtx); rcu_read_lock(); @@ -641,7 +642,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local, { struct ieee80211_chanctx *ctx; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); ctx = kzalloc(sizeof(*ctx) + local->hw.chanctx_data_size, GFP_KERNEL); if (!ctx) @@ -666,7 +667,7 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local, int err; lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (!local->use_chanctx) local->hw.conf.radar_enabled = ctx->conf.radar_enabled; @@ -699,7 +700,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, int err; lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); ctx = ieee80211_alloc_chanctx(local, chandef, mode); if (!ctx) @@ -718,7 +719,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, static void ieee80211_del_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (!local->use_chanctx) { struct cfg80211_chan_def *chandef = &local->_oper_chandef; @@ -753,7 +754,7 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local, static void ieee80211_free_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); WARN_ON_ONCE(ieee80211_chanctx_refcount(local, ctx) != 0); @@ -770,7 +771,7 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, const struct cfg80211_chan_def *compat = NULL; struct sta_info *sta; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { @@ -833,7 +834,7 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, { bool radar_enabled; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* for ieee80211_is_radar_required */ lockdep_assert_held(&local->mtx); @@ -865,7 +866,7 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link, return -ENOTSUPP; conf = rcu_dereference_protected(link->conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (conf) { curr_ctx = container_of(conf, struct ieee80211_chanctx, conf); @@ -920,7 +921,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata; u8 rx_chains_static, rx_chains_dynamic; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); rx_chains_static = 1; rx_chains_dynamic = 1; @@ -1032,7 +1033,7 @@ __ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link, * to a channel context that has already been freed. */ conf = rcu_dereference_protected(link_conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); WARN_ON(!conf); if (clear) @@ -1056,11 +1057,9 @@ void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link, { struct ieee80211_local *local = link->sdata->local; - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); __ieee80211_link_copy_chanctx_to_vlans(link, clear); - - mutex_unlock(&local->chanctx_mtx); } int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link) @@ -1068,7 +1067,7 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link) struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_chanctx *ctx = link->reserved_chanctx; - lockdep_assert_held(&sdata->local->chanctx_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (WARN_ON(!ctx)) return -EINVAL; @@ -1108,7 +1107,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx *new_ctx, *curr_ctx, *ctx; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); curr_ctx = ieee80211_link_get_chanctx(link); if (curr_ctx && local->use_chanctx && !local->ops->switch_vif_chanctx) @@ -1266,7 +1265,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link) int err; lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); new_ctx = link->reserved_chanctx; old_ctx = ieee80211_link_get_chanctx(link); @@ -1390,7 +1389,7 @@ ieee80211_link_has_in_place_reservation(struct ieee80211_link_data *link) struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_chanctx *old_ctx, *new_ctx; - lockdep_assert_held(&sdata->local->chanctx_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); new_ctx = link->reserved_chanctx; old_ctx = ieee80211_link_get_chanctx(link); @@ -1416,7 +1415,7 @@ static int ieee80211_chsw_switch_hwconf(struct ieee80211_local *local, const struct cfg80211_chan_def *chandef; lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); chandef = ieee80211_chanctx_reserved_chandef(local, new_ctx, NULL); if (WARN_ON(!chandef)) @@ -1438,7 +1437,7 @@ static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local, int i, err; lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); vif_chsw = kcalloc(n_vifs, sizeof(vif_chsw[0]), GFP_KERNEL); if (!vif_chsw) @@ -1483,7 +1482,7 @@ static int ieee80211_chsw_switch_ctxs(struct ieee80211_local *local) int err; lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(ctx, &local->chanctx_list, list) { if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) @@ -1524,7 +1523,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) int n_ctx = 0, n_vifs_switch = 0, n_vifs_assign = 0, n_vifs_ctxless = 0; lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* * If there are 2 independent pairs of channel contexts performing @@ -1783,10 +1782,10 @@ static void __ieee80211_link_release_channel(struct ieee80211_link_data *link) struct ieee80211_chanctx *ctx; bool use_reserved_switch = false; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); conf = rcu_dereference_protected(link_conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (!conf) return; @@ -1829,7 +1828,7 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link, return 0; } - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); ret = cfg80211_chandef_dfs_required(local->hw.wiphy, chandef, @@ -1872,7 +1871,6 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link, if (ret) link->radar_required = false; - mutex_unlock(&local->chanctx_mtx); return ret; } @@ -1885,7 +1883,7 @@ int ieee80211_link_use_reserved_context(struct ieee80211_link_data *link) int err; lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); new_ctx = link->reserved_chanctx; old_ctx = ieee80211_link_get_chanctx(link); @@ -1948,51 +1946,40 @@ int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link, struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; const struct cfg80211_chan_def *compat; - int ret; + + lockdep_assert_wiphy(local->hw.wiphy); if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, IEEE80211_CHAN_DISABLED)) return -EINVAL; - mutex_lock(&local->chanctx_mtx); - if (cfg80211_chandef_identical(chandef, &link_conf->chandef)) { - ret = 0; - goto out; - } + if (cfg80211_chandef_identical(chandef, &link_conf->chandef)) + return 0; if (chandef->width == NL80211_CHAN_WIDTH_20_NOHT || - link_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) { - ret = -EINVAL; - goto out; - } + link_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) + return -EINVAL; conf = rcu_dereference_protected(link_conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); - if (!conf) { - ret = -EINVAL; - goto out; - } + lockdep_is_held(&local->hw.wiphy->mtx)); + if (!conf) + return -EINVAL; ctx = container_of(conf, struct ieee80211_chanctx, conf); compat = cfg80211_chandef_compatible(&conf->def, chandef); - if (!compat) { - ret = -EINVAL; - goto out; - } + if (!compat) + return -EINVAL; switch (ctx->replace_state) { case IEEE80211_CHANCTX_REPLACE_NONE: - if (!ieee80211_chanctx_reserved_chandef(local, ctx, compat)) { - ret = -EBUSY; - goto out; - } + if (!ieee80211_chanctx_reserved_chandef(local, ctx, compat)) + return -EBUSY; break; case IEEE80211_CHANCTX_WILL_BE_REPLACED: /* TODO: Perhaps the bandwidth change could be treated as a * reservation itself? */ - ret = -EBUSY; - goto out; + return -EBUSY; case IEEE80211_CHANCTX_REPLACES_OTHER: /* channel context that is going to replace another channel * context doesn't really exist and shouldn't be assigned @@ -2006,22 +1993,19 @@ int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link, ieee80211_recalc_chanctx_chantype(local, ctx); *changed |= BSS_CHANGED_BANDWIDTH; - ret = 0; - out: - mutex_unlock(&local->chanctx_mtx); - return ret; + return 0; } void ieee80211_link_release_channel(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; - mutex_lock(&sdata->local->chanctx_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (rcu_access_pointer(link->conf->chanctx_conf)) { lockdep_assert_held(&sdata->local->mtx); __ieee80211_link_release_channel(link); } - mutex_unlock(&sdata->local->chanctx_mtx); } void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link) @@ -2034,20 +2018,19 @@ void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link) struct ieee80211_sub_if_data *ap; struct ieee80211_chanctx_conf *conf; + lockdep_assert_wiphy(local->hw.wiphy); + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->bss)) return; ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); - mutex_lock(&local->chanctx_mtx); - rcu_read_lock(); ap_conf = rcu_dereference(ap->vif.link_conf[link_id]); conf = rcu_dereference_protected(ap_conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); rcu_assign_pointer(link_conf->chanctx_conf, conf); rcu_read_unlock(); - mutex_unlock(&local->chanctx_mtx); } void ieee80211_iter_chan_contexts_atomic( diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index d68650cbd5ff..bbda2764fa02 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1765,6 +1765,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, int i; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + if (params->chandef.chan->freq_offset) { /* this may work, but is untested */ return -EOPNOTSUPP; @@ -1785,10 +1787,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, chanmode = (params->channel_fixed && !ret) ? IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE; - mutex_lock(&local->chanctx_mtx); ret = ieee80211_check_combinations(sdata, ¶ms->chandef, chanmode, radar_detect_width); - mutex_unlock(&local->chanctx_mtx); if (ret < 0) return ret; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a0bbd7070974..cc671b471542 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -979,8 +979,8 @@ struct ieee80211_link_data { struct ieee80211_sub_if_data *sdata; unsigned int link_id; - struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */ - struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */ + struct list_head assigned_chanctx_list; /* protected by wiphy mutex */ + struct list_head reserved_chanctx_list; /* protected by wiphy mutex */ /* multicast keys only */ struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + @@ -1001,7 +1001,7 @@ struct ieee80211_link_data { struct delayed_work color_collision_detect_work; u64 color_bitmap; - /* context reservation -- protected with chanctx_mtx */ + /* context reservation -- protected with wiphy mutex */ struct ieee80211_chanctx *reserved_chanctx; struct cfg80211_chan_def reserved_chandef; bool reserved_radar_required; @@ -1499,7 +1499,6 @@ struct ieee80211_local { /* channel contexts */ struct list_head chanctx_list; - struct mutex chanctx_mtx; #ifdef CONFIG_MAC80211_LEDS struct led_trigger tx_led, rx_led, assoc_led, radio_led; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 385513315f8b..97733520d6b3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -324,9 +324,9 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *nsdata; - int ret; ASSERT_RTNL(); + lockdep_assert_wiphy(local->hw.wiphy); /* we hold the RTNL here so can safely walk the list */ list_for_each_entry(nsdata, &local->interfaces, list) { @@ -391,10 +391,7 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, } } - mutex_lock(&local->chanctx_mtx); - ret = ieee80211_check_combinations(sdata, NULL, 0, 0); - mutex_unlock(&local->chanctx_mtx); - return ret; + return ieee80211_check_combinations(sdata, NULL, 0, 0); } static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a30eb5ca6369..0f38b5df53b3 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -826,7 +826,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, spin_lock_init(&local->handle_wake_tx_queue_lock); INIT_LIST_HEAD(&local->chanctx_list); - mutex_init(&local->chanctx_mtx); wiphy_delayed_work_init(&local->scan_work, ieee80211_scan_work); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4296168877e8..605407e4b67f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1691,7 +1691,7 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, sdata_lock(sdata); mutex_lock(&local->mtx); - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (!ifmgd->associated) goto out; @@ -1743,7 +1743,6 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, ieee80211_sta_reset_conn_monitor(sdata); out: - mutex_unlock(&local->chanctx_mtx); mutex_unlock(&local->mtx); sdata_unlock(sdata); } @@ -1813,14 +1812,14 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; + lockdep_assert_wiphy(local->hw.wiphy); + if (!local->ops->abort_channel_switch) return; mutex_lock(&local->mtx); - mutex_lock(&local->chanctx_mtx); ieee80211_link_unreserve_chanctx(link); - mutex_unlock(&local->chanctx_mtx); if (link->csa_block_tx) ieee80211_wake_vif_queues(local, sdata, @@ -1854,6 +1853,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, int res; sdata_assert_lock(sdata); + lockdep_assert_wiphy(local->hw.wiphy); if (!cbss) return; @@ -1936,9 +1936,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, ieee80211_teardown_tdls_peers(sdata); mutex_lock(&local->mtx); - mutex_lock(&local->chanctx_mtx); conf = rcu_dereference_protected(link->conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (!conf) { sdata_info(sdata, "no channel context assigned to vif?, disconnecting\n"); @@ -1968,7 +1967,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, res); goto drop_connection; } - mutex_unlock(&local->chanctx_mtx); link->conf->csa_active = true; link->csa_chandef = csa_ie.chandef; @@ -2000,7 +1998,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, return; lock_and_drop_connection: mutex_lock(&local->mtx); - mutex_lock(&local->chanctx_mtx); drop_connection: /* * This is just so that the disconnect flow will know that @@ -2014,7 +2011,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, wiphy_work_queue(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); - mutex_unlock(&local->chanctx_mtx); mutex_unlock(&local->mtx); } @@ -3186,6 +3182,8 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) u8 unicast_limit = max(1, max_probe_tries - 3); struct sta_info *sta; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif))) return; @@ -3207,7 +3205,6 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ifmgd->probe_send_count++; if (dst) { - lockdep_assert_wiphy(sdata->local->hw.wiphy); sta = sta_info_get(sdata, dst); if (!WARN_ON(!sta)) ieee80211_check_fast_rx(sta); @@ -3629,6 +3626,8 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata) const u8 *ap_addr = ifmgd->auth_data->ap_addr; struct sta_info *sta; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC; @@ -3636,7 +3635,6 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata) run_again(sdata, ifmgd->auth_data->timeout); /* move station state to auth */ - lockdep_assert_wiphy(sdata->local->hw.wiphy); sta = sta_info_get(sdata, ap_addr); if (!sta) { WARN_ONCE(1, "%s: STA %pM not found", sdata->name, ap_addr); @@ -5899,6 +5897,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, }; sdata_assert_lock(sdata); + lockdep_assert_wiphy(local->hw.wiphy); /* Process beacon from the current BSS */ bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type); @@ -6133,7 +6132,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, le16_to_cpu(mgmt->u.beacon.capab_info), erp_valid, erp_value); - lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr); if (WARN_ON(!sta)) { goto free; diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index e7bf25e3be07..d6d3123f4e3a 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1354,9 +1354,10 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata, enum nl80211_chan_width width; struct ieee80211_supported_band *sband; - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (conf) { width = conf->def.width; sband = local->hw.wiphy->bands[conf->def.chan->band]; @@ -1384,7 +1385,6 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata, } } - mutex_unlock(&local->chanctx_mtx); } static int iee80211_tdls_have_ht_peers(struct ieee80211_sub_if_data *sdata) @@ -1447,6 +1447,8 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_local *local = sdata->local; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) return -ENOTSUPP; @@ -1479,7 +1481,6 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, break; } - lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get(sdata, peer); if (!sta) { ret = -ENOLINK; @@ -1512,7 +1513,6 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, ret = sta_info_destroy_addr(sdata, peer); - lockdep_assert_wiphy(local->hw.wiphy); iee80211_tdls_recalc_ht_protection(sdata, NULL); iee80211_tdls_recalc_chanctx(sdata, NULL); @@ -1667,11 +1667,12 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev, u32 ch_sw_tm_ie; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + if (chandef->chan->freq_offset) /* this may work, but is untested */ return -EOPNOTSUPP; - lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get(sdata, addr); if (!sta) { tdls_dbg(sdata, @@ -1793,6 +1794,8 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_tdls_ch_sw_params params = {}; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + params.action_code = WLAN_TDLS_CHANNEL_SWITCH_RESPONSE; params.timestamp = rx_status->device_timestamp; @@ -1802,7 +1805,6 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, return -EINVAL; } - lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get(sdata, tf->sa); if (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) { tdls_dbg(sdata, "TDLS chan switch from non-peer sta %pM\n", @@ -1890,6 +1892,8 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_tdls_ch_sw_params params = {}; int ret = 0; + lockdep_assert_wiphy(local->hw.wiphy); + params.action_code = WLAN_TDLS_CHANNEL_SWITCH_REQUEST; params.timestamp = rx_status->device_timestamp; @@ -1978,7 +1982,6 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, goto free; } - lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get(sdata, tf->sa); if (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) { tdls_dbg(sdata, "TDLS chan switch from non-peer sta %pM\n", diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 279a65198885..dce8c2043096 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2351,6 +2351,8 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata; struct ieee80211_chanctx *ctx; + lockdep_assert_wiphy(local->hw.wiphy); + /* * We get here if during resume the device can't be restarted properly. * We might also get here if this happens during HW reset, which is a @@ -2379,10 +2381,8 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) /* Mark channel contexts as not being in the driver any more to avoid * removing them from the driver during the shutdown process... */ - mutex_lock(&local->chanctx_mtx); list_for_each_entry(ctx, &local->chanctx_list, list) ctx->driver_present = false; - mutex_unlock(&local->chanctx_mtx); } static void ieee80211_assign_chanctx(struct ieee80211_local *local, @@ -2392,17 +2392,17 @@ static void ieee80211_assign_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; + lockdep_assert_wiphy(local->hw.wiphy); + if (!local->use_chanctx) return; - mutex_lock(&local->chanctx_mtx); conf = rcu_dereference_protected(link->conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (conf) { ctx = container_of(conf, struct ieee80211_chanctx, conf); drv_assign_vif_chanctx(local, sdata, link->conf, ctx); } - mutex_unlock(&local->chanctx_mtx); } static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) @@ -2410,8 +2410,9 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct sta_info *sta; - /* add STAs back */ lockdep_assert_wiphy(local->hw.wiphy); + + /* add STAs back */ list_for_each_entry(sta, &local->sta_list, list) { enum ieee80211_sta_state state; @@ -2509,6 +2510,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) bool suspended = local->suspended; bool in_reconfig = false; + lockdep_assert_wiphy(local->hw.wiphy); + /* nothing to do if HW shouldn't run */ if (!local->open_count) goto wake_up; @@ -2624,12 +2627,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* add channel contexts */ if (local->use_chanctx) { - mutex_lock(&local->chanctx_mtx); list_for_each_entry(ctx, &local->chanctx_list, list) if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) WARN_ON(drv_add_chanctx(local, ctx)); - mutex_unlock(&local->chanctx_mtx); sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); @@ -2901,8 +2902,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) * are active. This is really a workaround though. */ if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) { - lockdep_assert_wiphy(local->hw.wiphy); - list_for_each_entry(sta, &local->sta_list, list) { if (!local->resuming) ieee80211_sta_tear_down_BA_sessions( @@ -3025,10 +3024,10 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_chanctx *chanctx; - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); chanctx_conf = rcu_dereference_protected(link->conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); /* * This function can be called from a work, thus it may be possible @@ -3037,12 +3036,10 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata, * So nothing should be done in such case. */ if (!chanctx_conf) - goto unlock; + return; chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); ieee80211_recalc_smps_chanctx(local, chanctx); - unlock: - mutex_unlock(&local->chanctx_mtx); } void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata, @@ -3053,7 +3050,7 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *chanctx; int i; - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); for (i = 0; i < ARRAY_SIZE(sdata->vif.link_conf); i++) { struct ieee80211_bss_conf *bss_conf; @@ -3069,9 +3066,9 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata, } chanctx_conf = rcu_dereference_protected(bss_conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); /* - * Since we hold the chanctx_mtx (checked above) + * Since we hold the wiphy mutex (checked above) * we can take the chanctx_conf pointer out of the * RCU critical section, it cannot go away without * the mutex. Just the way we reached it could - in @@ -3081,14 +3078,12 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); if (!chanctx_conf) - goto unlock; + return; chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); ieee80211_recalc_chanctx_min_def(local, chanctx, NULL); } - unlock: - mutex_unlock(&local->chanctx_mtx); } size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset) @@ -4364,7 +4359,7 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, struct ieee80211_chanctx *ctx; int num_chanctx = 0; - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(ctx, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) continue; @@ -4372,7 +4367,6 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, num_chanctx++; chandef = ctx->conf.def; } - mutex_unlock(&local->chanctx_mtx); ieee80211_dfs_cac_cancel(local); @@ -4773,7 +4767,7 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local, struct ieee80211_link_data *link; u8 radar_detect = 0; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)) return 0; @@ -4814,7 +4808,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, .radar_detect = radar_detect, }; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(hweight32(radar_detect) > 1)) return -EINVAL; @@ -4904,7 +4898,7 @@ int ieee80211_max_num_channels(struct ieee80211_local *local) int err; struct iface_combination_params params = {0}; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(ctx, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) -- cgit v1.2.3 From 463559b7c3fe5fab1a4b60cd3454ef84a5dc51b8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 14:00:04 +0200 Subject: wifi: mac80211: remove ampdu_mlme.mtx We now hold the wiphy mutex everywhere that we use or needed the A-MPDU locking, so we don't need this mutex any more. Remove it. Most of this change was done automatically with spatch. Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 51 +++++++++++++--------------------------------- net/mac80211/agg-tx.c | 49 +++++++++++++++----------------------------- net/mac80211/ht.c | 38 ++++++++++++++++------------------ net/mac80211/ieee80211_i.h | 14 +++++-------- net/mac80211/key.c | 6 ++---- net/mac80211/sta_info.c | 1 - net/mac80211/sta_info.h | 18 ++++++---------- 7 files changed, 60 insertions(+), 117 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index a686f1ce66cb..9bffac7a4974 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -55,8 +55,8 @@ static void ieee80211_free_tid_rx(struct rcu_head *h) kfree(tid_rx); } -void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, - u16 initiator, u16 reason, bool tx) +void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, + u16 initiator, u16 reason, bool tx) { struct ieee80211_local *local = sta->local; struct tid_ampdu_rx *tid_rx; @@ -69,10 +69,10 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, .ssn = 0, }; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); tid_rx = rcu_dereference_protected(sta->ampdu_mlme.tid_rx[tid], - lockdep_is_held(&sta->ampdu_mlme.mtx)); + lockdep_is_held(&sta->local->hw.wiphy->mtx)); if (!test_bit(tid, sta->ampdu_mlme.agg_session_valid)) return; @@ -114,14 +114,6 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); } -void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, - u16 initiator, u16 reason, bool tx) -{ - mutex_lock(&sta->ampdu_mlme.mtx); - ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason, tx); - mutex_unlock(&sta->ampdu_mlme.mtx); -} - void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap, const u8 *addr) { @@ -250,11 +242,11 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, ieee80211_tx_skb(sdata, skb); } -void ___ieee80211_start_rx_ba_session(struct sta_info *sta, - u8 dialog_token, u16 timeout, - u16 start_seq_num, u16 ba_policy, u16 tid, - u16 buf_size, bool tx, bool auto_seq, - const struct ieee80211_addba_ext_ie *addbaext) +void __ieee80211_start_rx_ba_session(struct sta_info *sta, + u8 dialog_token, u16 timeout, + u16 start_seq_num, u16 ba_policy, u16 tid, + u16 buf_size, bool tx, bool auto_seq, + const struct ieee80211_addba_ext_ie *addbaext) { struct ieee80211_local *local = sta->sdata->local; struct tid_ampdu_rx *tid_agg_rx; @@ -270,6 +262,8 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, u16 status = WLAN_STATUS_REQUEST_DECLINED; u16 max_buf_size; + lockdep_assert_wiphy(sta->local->hw.wiphy); + if (tid >= IEEE80211_FIRST_TSPEC_TSID) { ht_dbg(sta->sdata, "STA %pM requests BA session on unsupported tid %d\n", @@ -325,9 +319,6 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, ht_dbg(sta->sdata, "AddBA Req buf_size=%d for %pM\n", buf_size, sta->sta.addr); - /* examine state machine */ - lockdep_assert_held(&sta->ampdu_mlme.mtx); - if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) { if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) { struct tid_ampdu_rx *tid_rx; @@ -355,9 +346,9 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, sta->sta.addr, tid); /* delete existing Rx BA session on the same tid */ - ___ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, - WLAN_STATUS_UNSPECIFIED_QOS, - false); + __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, + WLAN_STATUS_UNSPECIFIED_QOS, + false); } if (ieee80211_hw_check(&local->hw, SUPPORTS_REORDERING_BUFFER)) { @@ -444,20 +435,6 @@ end: timeout, addbaext); } -static void __ieee80211_start_rx_ba_session(struct sta_info *sta, - u8 dialog_token, u16 timeout, - u16 start_seq_num, u16 ba_policy, - u16 tid, u16 buf_size, bool tx, - bool auto_seq, - const struct ieee80211_addba_ext_ie *addbaext) -{ - mutex_lock(&sta->ampdu_mlme.mtx); - ___ieee80211_start_rx_ba_session(sta, dialog_token, timeout, - start_seq_num, ba_policy, tid, - buf_size, tx, auto_seq, addbaext); - mutex_unlock(&sta->ampdu_mlme.mtx); -} - void ieee80211_process_addba_request(struct ieee80211_local *local, struct sta_info *sta, struct ieee80211_mgmt *mgmt, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 3da0c55f13e2..0627abb09f0e 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -142,7 +142,7 @@ EXPORT_SYMBOL(ieee80211_send_bar); void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx) { - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); lockdep_assert_held(&sta->lock); rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx); } @@ -213,7 +213,7 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) struct ieee80211_txq *txq = sta->sta.txq[tid]; struct txq_info *txqi; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); if (!txq) return; @@ -271,7 +271,7 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); lockdep_assert_held(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); @@ -296,8 +296,8 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) kfree_rcu(tid_tx, rcu_head); } -int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_agg_stop_reason reason) +int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, + enum ieee80211_agg_stop_reason reason) { struct ieee80211_local *local = sta->local; struct tid_ampdu_tx *tid_tx; @@ -311,7 +311,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, }; int ret; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); switch (reason) { case AGG_STOP_DECLINED: @@ -461,7 +461,7 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state))) return; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); @@ -764,7 +764,7 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, .ssn = 0, }; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); params.buf_size = tid_tx->buf_size; @@ -801,7 +801,7 @@ void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid, struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))) return; @@ -868,20 +868,6 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, } EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); -int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_agg_stop_reason reason) -{ - int ret; - - mutex_lock(&sta->ampdu_mlme.mtx); - - ret = ___ieee80211_stop_tx_ba_session(sta, tid, reason); - - mutex_unlock(&sta->ampdu_mlme.mtx); - - return ret; -} - int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); @@ -993,6 +979,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, u16 capab, tid, buf_size; bool amsdu; + lockdep_assert_wiphy(sta->local->hw.wiphy); + capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK; tid = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_TID_MASK); @@ -1003,16 +991,14 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, if (!amsdu && txq) set_bit(IEEE80211_TXQ_NO_AMSDU, &to_txq_info(txq)->flags); - mutex_lock(&sta->ampdu_mlme.mtx); - tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx) - goto out; + return; if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { ht_dbg(sta->sdata, "wrong addBA response token, %pM tid %d\n", sta->sta.addr, tid); - goto out; + return; } del_timer_sync(&tid_tx->addba_resp_timer); @@ -1030,7 +1016,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, ht_dbg(sta->sdata, "got addBA resp for %pM tid %d but we already gave up\n", sta->sta.addr, tid); - goto out; + return; } /* @@ -1044,7 +1030,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { /* ignore duplicate response */ - goto out; + return; } tid_tx->buf_size = buf_size; @@ -1065,9 +1051,6 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, } } else { - ___ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_DECLINED); + __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_DECLINED); } - - out: - mutex_unlock(&sta->ampdu_mlme.mtx); } diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index e8feed05528a..68cea2685224 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -316,16 +316,16 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, { int i; - mutex_lock(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); + for (i = 0; i < IEEE80211_NUM_TIDS; i++) - ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_LEAVE_QBSS, - reason != AGG_STOP_DESTROY_STA && - reason != AGG_STOP_PEER_REQUEST); + __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_LEAVE_QBSS, + reason != AGG_STOP_DESTROY_STA && + reason != AGG_STOP_PEER_REQUEST); for (i = 0; i < IEEE80211_NUM_TIDS; i++) - ___ieee80211_stop_tx_ba_session(sta, i, reason); - mutex_unlock(&sta->ampdu_mlme.mtx); + __ieee80211_stop_tx_ba_session(sta, i, reason); /* * In case the tear down is part of a reconfigure due to HW restart @@ -335,7 +335,6 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, if(reason == AGG_STOP_DESTROY_STA) { wiphy_work_cancel(sta->local->hw.wiphy, &sta->ampdu_mlme.work); - mutex_lock(&sta->ampdu_mlme.mtx); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { struct tid_ampdu_tx *tid_tx = rcu_dereference_protected_tid_tx(sta, i); @@ -346,7 +345,6 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state)) ieee80211_stop_tx_ba_cb(sta, i, tid_tx); } - mutex_unlock(&sta->ampdu_mlme.mtx); } } @@ -358,32 +356,33 @@ void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work) bool blocked; int tid; + lockdep_assert_wiphy(sta->local->hw.wiphy); + /* When this flag is set, new sessions should be blocked. */ blocked = test_sta_flag(sta, WLAN_STA_BLOCK_BA); - mutex_lock(&sta->ampdu_mlme.mtx); for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) { if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired)) - ___ieee80211_stop_rx_ba_session( + __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_TIMEOUT, true); if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_stop_requested)) - ___ieee80211_stop_rx_ba_session( + __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_UNSPECIFIED, true); if (!blocked && test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl)) - ___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, - IEEE80211_MAX_AMPDU_BUF_HT, - false, true, NULL); + __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, + IEEE80211_MAX_AMPDU_BUF_HT, + false, true, NULL); if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, sta->ampdu_mlme.tid_rx_manage_offl)) - ___ieee80211_stop_rx_ba_session( + __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, 0, false); @@ -414,8 +413,6 @@ void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work) */ synchronize_net(); - mutex_unlock(&sta->ampdu_mlme.mtx); - wiphy_work_queue(sdata->local->hw.wiphy, work); return; } @@ -448,12 +445,11 @@ void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work) test_and_clear_bit(HT_AGG_STATE_START_CB, &tid_tx->state)) ieee80211_start_tx_ba_cb(sta, tid, tid_tx); if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state)) - ___ieee80211_stop_tx_ba_session(sta, tid, - AGG_STOP_LOCAL_REQUEST); + __ieee80211_stop_tx_ba_session(sta, tid, + AGG_STOP_LOCAL_REQUEST); if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state)) ieee80211_stop_tx_ba_cb(sta, tid, tid_tx); } - mutex_unlock(&sta->ampdu_mlme.mtx); } void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index cc671b471542..7a8f950644ff 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2097,15 +2097,13 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, bool ieee80211_smps_is_restrictive(enum ieee80211_smps_mode smps_mode_old, enum ieee80211_smps_mode smps_mode_new); -void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, - u16 initiator, u16 reason, bool stop); void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); -void ___ieee80211_start_rx_ba_session(struct sta_info *sta, - u8 dialog_token, u16 timeout, - u16 start_seq_num, u16 ba_policy, u16 tid, - u16 buf_size, bool tx, bool auto_seq, - const struct ieee80211_addba_ext_ie *addbaext); +void __ieee80211_start_rx_ba_session(struct sta_info *sta, + u8 dialog_token, u16 timeout, + u16 start_seq_num, u16 ba_policy, u16 tid, + u16 buf_size, bool tx, bool auto_seq, + const struct ieee80211_addba_ext_ie *addbaext); void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, enum ieee80211_agg_stop_reason reason); void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, @@ -2122,8 +2120,6 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_agg_stop_reason reason); -int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_agg_stop_reason reason); void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx); void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 44053951a1da..ac410f6632b5 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -311,11 +311,9 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old, * job done for the few ms we need it.) */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); - mutex_lock(&sta->ampdu_mlme.mtx); for (i = 0; i < IEEE80211_NUM_TIDS; i++) - ___ieee80211_stop_tx_ba_session(sta, i, - AGG_STOP_LOCAL_REQUEST); - mutex_unlock(&sta->ampdu_mlme.mtx); + __ieee80211_stop_tx_ba_session(sta, i, + AGG_STOP_LOCAL_REQUEST); } } else if (old) { /* Rekey without Extended Key ID. diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index eee541251c48..abcc280acd38 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -554,7 +554,6 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); wiphy_work_init(&sta->ampdu_mlme.work, ieee80211_ba_session_work); - mutex_init(&sta->ampdu_mlme.mtx); #ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&sdata->vif)) { sta->mesh = kzalloc(sizeof(*sta->mesh), gfp); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index c3cce280bc0f..aaf45d8523a1 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -259,9 +259,6 @@ struct tid_ampdu_rx { /** * struct sta_ampdu_mlme - STA aggregation information. * - * @mtx: mutex to protect all TX data (except non-NULL assignments - * to tid_tx[idx], which are protected by the sta spinlock) - * tid_start_tx is also protected by sta->lock. * @tid_rx: aggregation info for Rx per TID -- RCU protected * @tid_rx_token: dialog tokens for valid aggregation sessions * @tid_rx_timer_expired: bitmap indicating on which TIDs the @@ -275,13 +272,13 @@ struct tid_ampdu_rx { * unexpected aggregation related frames outside a session * @work: work struct for starting/stopping aggregation * @tid_tx: aggregation info for Tx per TID - * @tid_start_tx: sessions where start was requested + * @tid_start_tx: sessions where start was requested, not just protected + * by wiphy mutex but also sta->lock * @last_addba_req_time: timestamp of the last addBA request. * @addba_req_num: number of times addBA request has been sent. * @dialog_token_allocator: dialog token enumerator for each new session; */ struct sta_ampdu_mlme { - struct mutex mtx; /* rx */ struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS]; u8 tid_rx_token[IEEE80211_NUM_TIDS]; @@ -796,13 +793,10 @@ static inline void sta_info_pre_move_state(struct sta_info *sta, void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx); -static inline struct tid_ampdu_tx * -rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) -{ - return rcu_dereference_protected(sta->ampdu_mlme.tid_tx[tid], - lockdep_is_held(&sta->lock) || - lockdep_is_held(&sta->ampdu_mlme.mtx)); -} +#define rcu_dereference_protected_tid_tx(sta, tid) \ + rcu_dereference_protected((sta)->ampdu_mlme.tid_tx[tid], \ + lockdep_is_held(&(sta)->lock) || \ + lockdep_is_held(&(sta)->local->hw.wiphy->mtx)); /* Maximum number of frames to buffer per power saving station per AC */ #define STA_MAX_TX_BUFFER 64 -- cgit v1.2.3 From 0cd8080e46b834fa72026112488ab61d4b82f03a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 14:00:05 +0200 Subject: wifi: mac80211: remove local->mtx We now hold the wiphy mutex everywhere that we use or needed the local->mtx, so we don't need this mutex any more. Remove it. Most of this change was done automatically with spatch. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 72 +++++++++++------------------- net/mac80211/chan.c | 23 ++-------- net/mac80211/ibss.c | 10 ++--- net/mac80211/ieee80211_i.h | 3 -- net/mac80211/iface.c | 32 +++----------- net/mac80211/link.c | 2 - net/mac80211/main.c | 3 -- net/mac80211/mlme.c | 65 ++++++++++----------------- net/mac80211/ocb.c | 8 ++-- net/mac80211/offchannel.c | 48 ++++++++------------ net/mac80211/scan.c | 107 +++++++++++++++++---------------------------- net/mac80211/tdls.c | 12 ++--- net/mac80211/tx.c | 8 ++-- net/mac80211/util.c | 11 ++--- 14 files changed, 135 insertions(+), 269 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d27beaf8a8c4..dada3ca0b9e0 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -887,6 +887,8 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata; int ret = 0; + lockdep_assert_wiphy(local->hw.wiphy); + if (cfg80211_chandef_identical(&local->monitor_chandef, chandef)) return 0; @@ -895,21 +897,17 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, local->monitor_sdata); if (sdata) { sdata_lock(sdata); - mutex_lock(&local->mtx); ieee80211_link_release_channel(&sdata->deflink); ret = ieee80211_link_use_channel(&sdata->deflink, chandef, IEEE80211_CHANCTX_EXCLUSIVE); - mutex_unlock(&local->mtx); sdata_unlock(sdata); } } else { - mutex_lock(&local->mtx); if (local->open_count == local->monitors) { local->_oper_chandef = *chandef; ieee80211_hw_config(local, 0); } - mutex_unlock(&local->mtx); } if (ret == 0) @@ -1252,6 +1250,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_link_data *link; struct ieee80211_bss_conf *link_conf; + lockdep_assert_wiphy(local->hw.wiphy); + link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return -ENOLINK; @@ -1361,12 +1361,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, return err; } - mutex_lock(&local->mtx); err = ieee80211_link_use_channel(link, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); if (!err) ieee80211_link_copy_chanctx_to_vlans(link, false); - mutex_unlock(&local->mtx); if (err) { link_conf->beacon_int = prev_beacon_int; return err; @@ -1477,9 +1475,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, return 0; error: - mutex_lock(&local->mtx); ieee80211_link_release_channel(link); - mutex_unlock(&local->mtx); return err; } @@ -1554,6 +1550,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_bss_conf *link_conf = link->conf; sdata_assert_lock(sdata); + lockdep_assert_wiphy(local->hw.wiphy); old_beacon = sdata_dereference(link->u.ap.beacon, sdata); if (!old_beacon) @@ -1567,7 +1564,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, sdata); /* abort any running channel switch or color change */ - mutex_lock(&local->mtx); link_conf->csa_active = false; link_conf->color_change_active = false; if (link->csa_block_tx) { @@ -1576,8 +1572,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, link->csa_block_tx = false; } - mutex_unlock(&local->mtx); - ieee80211_free_next_beacon(link); /* turn off carrier for this interface and dependent VLANs */ @@ -1632,10 +1626,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf); - mutex_lock(&local->mtx); ieee80211_link_copy_chanctx_to_vlans(link, true); ieee80211_link_release_channel(link); - mutex_unlock(&local->mtx); return 0; } @@ -2601,6 +2593,8 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; int err; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + memcpy(&ifmsh->mshcfg, conf, sizeof(struct mesh_config)); err = copy_mesh_setup(ifmsh, setup); if (err) @@ -2612,10 +2606,8 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = sdata->local->rx_chains; - mutex_lock(&sdata->local->mtx); err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef, IEEE80211_CHANCTX_SHARED); - mutex_unlock(&sdata->local->mtx); if (err) return err; @@ -2626,11 +2618,11 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + ieee80211_stop_mesh(sdata); - mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); kfree(sdata->u.mesh.ie); - mutex_unlock(&sdata->local->mtx); return 0; } @@ -3376,7 +3368,8 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, struct ieee80211_local *local = sdata->local; int err; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); + if (!list_empty(&local->roc_list) || local->scanning) { err = -EBUSY; goto out_unlock; @@ -3395,7 +3388,6 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, msecs_to_jiffies(cac_time_ms)); out_unlock: - mutex_unlock(&local->mtx); return err; } @@ -3405,7 +3397,8 @@ static void ieee80211_end_cac(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); + list_for_each_entry(sdata, &local->interfaces, list) { /* it might be waiting for the local->mtx, but then * by the time it gets it, sdata->wdev.cac_started @@ -3419,7 +3412,6 @@ static void ieee80211_end_cac(struct wiphy *wiphy, sdata->wdev.cac_started = false; } } - mutex_unlock(&local->mtx); } static struct cfg80211_beacon_data * @@ -3618,7 +3610,6 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) int err; sdata_assert_lock(sdata); - lockdep_assert_held(&local->mtx); lockdep_assert_wiphy(local->hw.wiphy); /* @@ -3691,7 +3682,6 @@ void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) struct ieee80211_local *local = sdata->local; sdata_lock(sdata); - mutex_lock(&local->mtx); lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ @@ -3704,7 +3694,6 @@ void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) ieee80211_csa_finalize(sdata); unlock: - mutex_unlock(&local->mtx); sdata_unlock(sdata); } @@ -3862,7 +3851,6 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, int err; sdata_assert_lock(sdata); - lockdep_assert_held(&local->mtx); lockdep_assert_wiphy(local->hw.wiphy); if (!list_empty(&local->roc_list) || local->scanning) @@ -3962,18 +3950,15 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - int err; - mutex_lock(&local->mtx); - err = __ieee80211_channel_switch(wiphy, dev, params); - mutex_unlock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); - return err; + return __ieee80211_channel_switch(wiphy, dev, params); } u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) { - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); local->roc_cookie_counter++; @@ -4102,7 +4087,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, int ret; /* the lock is needed to assign the cookie later */ - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); sta = sta_info_get_bss(sdata, peer); @@ -4173,7 +4158,6 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, ret = 0; unlock: rcu_read_unlock(); - mutex_unlock(&local->mtx); return ret; } @@ -4682,7 +4666,7 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) int err; sdata_assert_lock(sdata); - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); sdata->vif.bss_conf.color_change_active = false; @@ -4709,7 +4693,7 @@ void ieee80211_color_change_finalize_work(struct wiphy *wiphy, struct ieee80211_local *local = sdata->local; sdata_lock(sdata); - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!sdata->vif.bss_conf.color_change_active) @@ -4721,7 +4705,6 @@ void ieee80211_color_change_finalize_work(struct wiphy *wiphy, ieee80211_color_change_finalize(sdata); unlock: - mutex_unlock(&local->mtx); sdata_unlock(sdata); } @@ -4780,12 +4763,11 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, int err; sdata_assert_lock(sdata); + lockdep_assert_wiphy(local->hw.wiphy); if (sdata->vif.bss_conf.nontransmitted) return -EINVAL; - mutex_lock(&local->mtx); - /* don't allow another color change if one is already active or if csa * is active */ @@ -4810,7 +4792,6 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, ieee80211_color_change_finalize(sdata); out: - mutex_unlock(&local->mtx); return err; } @@ -4832,16 +4813,13 @@ static int ieee80211_add_intf_link(struct wiphy *wiphy, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - int res; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (wdev->use_4addr) return -EOPNOTSUPP; - mutex_lock(&sdata->local->mtx); - res = ieee80211_vif_set_links(sdata, wdev->valid_links, 0); - mutex_unlock(&sdata->local->mtx); - - return res; + return ieee80211_vif_set_links(sdata, wdev->valid_links, 0); } static void ieee80211_del_intf_link(struct wiphy *wiphy, @@ -4850,9 +4828,9 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - mutex_lock(&sdata->local->mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + ieee80211_vif_set_links(sdata, wdev->valid_links, 0); - mutex_unlock(&sdata->local->mtx); } static int sta_add_link_station(struct ieee80211_local *local, diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 31720c654b83..1d928f29ad6f 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -573,7 +573,7 @@ bool ieee80211_is_radar_required(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { @@ -604,7 +604,6 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local, bool required = false; lockdep_assert_wiphy(local->hw.wiphy); - lockdep_assert_held(&local->mtx); rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { @@ -666,7 +665,6 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local, u32 changed; int err; - lockdep_assert_held(&local->mtx); lockdep_assert_wiphy(local->hw.wiphy); if (!local->use_chanctx) @@ -699,7 +697,6 @@ ieee80211_new_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx; int err; - lockdep_assert_held(&local->mtx); lockdep_assert_wiphy(local->hw.wiphy); ctx = ieee80211_alloc_chanctx(local, chandef, mode); @@ -835,8 +832,6 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, bool radar_enabled; lockdep_assert_wiphy(local->hw.wiphy); - /* for ieee80211_is_radar_required */ - lockdep_assert_held(&local->mtx); radar_enabled = ieee80211_chanctx_radar_required(local, chanctx); @@ -1024,7 +1019,7 @@ __ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link, if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP)) return; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* Check that conf exists, even when clearing this function * must be called with the AP's channel context still there @@ -1264,7 +1259,6 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link) u64 changed = 0; int err; - lockdep_assert_held(&local->mtx); lockdep_assert_wiphy(local->hw.wiphy); new_ctx = link->reserved_chanctx; @@ -1414,7 +1408,6 @@ static int ieee80211_chsw_switch_hwconf(struct ieee80211_local *local, { const struct cfg80211_chan_def *chandef; - lockdep_assert_held(&local->mtx); lockdep_assert_wiphy(local->hw.wiphy); chandef = ieee80211_chanctx_reserved_chandef(local, new_ctx, NULL); @@ -1436,7 +1429,6 @@ static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, *old_ctx; int i, err; - lockdep_assert_held(&local->mtx); lockdep_assert_wiphy(local->hw.wiphy); vif_chsw = kcalloc(n_vifs, sizeof(vif_chsw[0]), GFP_KERNEL); @@ -1481,7 +1473,6 @@ static int ieee80211_chsw_switch_ctxs(struct ieee80211_local *local) struct ieee80211_chanctx *ctx; int err; - lockdep_assert_held(&local->mtx); lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(ctx, &local->chanctx_list, list) { @@ -1522,7 +1513,6 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) int err, n_assigned, n_reserved, n_ready; int n_ctx = 0, n_vifs_switch = 0, n_vifs_assign = 0, n_vifs_ctxless = 0; - lockdep_assert_held(&local->mtx); lockdep_assert_wiphy(local->hw.wiphy); /* @@ -1820,7 +1810,7 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link, u8 radar_detect_width = 0; int ret; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (sdata->vif.active_links && !(sdata->vif.active_links & BIT(link->link_id))) { @@ -1828,8 +1818,6 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link, return 0; } - lockdep_assert_wiphy(local->hw.wiphy); - ret = cfg80211_chandef_dfs_required(local->hw.wiphy, chandef, sdata->wdev.iftype); @@ -1882,7 +1870,6 @@ int ieee80211_link_use_reserved_context(struct ieee80211_link_data *link) struct ieee80211_chanctx *old_ctx; int err; - lockdep_assert_held(&local->mtx); lockdep_assert_wiphy(local->hw.wiphy); new_ctx = link->reserved_chanctx; @@ -2002,10 +1989,8 @@ void ieee80211_link_release_channel(struct ieee80211_link_data *link) lockdep_assert_wiphy(sdata->local->hw.wiphy); - if (rcu_access_pointer(link->conf->chanctx_conf)) { - lockdep_assert_held(&sdata->local->mtx); + if (rcu_access_pointer(link->conf->chanctx_conf)) __ieee80211_link_release_channel(link); - } } void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index bbda2764fa02..3c6370377234 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -236,6 +236,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, int err; sdata_assert_lock(sdata); + lockdep_assert_wiphy(local->hw.wiphy); /* Reset own TSF to allow time synchronization work. */ drv_reset_tsf(local, sdata); @@ -299,17 +300,14 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, radar_required = err; - mutex_lock(&local->mtx); if (ieee80211_link_use_channel(&sdata->deflink, &chandef, ifibss->fixed_channel ? IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE)) { sdata_info(sdata, "Failed to join IBSS, no channel context\n"); - mutex_unlock(&local->mtx); return; } sdata->deflink.radar_required = radar_required; - mutex_unlock(&local->mtx); memcpy(ifibss->bssid, bssid, ETH_ALEN); @@ -367,9 +365,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->vif.cfg.ssid_len = 0; RCU_INIT_POINTER(ifibss->presp, NULL); kfree_rcu(presp, rcu_head); - mutex_lock(&local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&local->mtx); sdata_info(sdata, "Failed to join IBSS, driver failure: %d\n", err); return; @@ -680,6 +676,8 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata) struct beacon_data *presp; struct sta_info *sta; + lockdep_assert_wiphy(local->hw.wiphy); + if (!is_zero_ether_addr(ifibss->bssid)) { cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan, ifibss->bssid, ifibss->ssid, @@ -726,9 +724,7 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); drv_leave_ibss(local, sdata); - mutex_lock(&local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&local->mtx); } static void ieee80211_csa_connection_drop_work(struct wiphy *wiphy, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7a8f950644ff..2d2a4445714e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1466,9 +1466,6 @@ struct ieee80211_local { struct list_head mon_list; /* only that are IFF_UP && !cooked */ struct mutex iflist_mtx; - /* mutex for scan and work locking */ - struct mutex mtx; - /* Scanning and BSS list */ unsigned long scanning; struct cfg80211_ssid scan_ssid; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 97733520d6b3..9724a3d4545b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -110,7 +110,7 @@ static u32 __ieee80211_recalc_idle(struct ieee80211_local *local, bool working, scanning, active; unsigned int led_trig_start = 0, led_trig_stop = 0; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); active = force_active || !list_empty(&local->chanctx_list) || @@ -207,6 +207,8 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata struct ieee80211_sub_if_data *scan_sdata; int ret = 0; + lockdep_assert_wiphy(local->hw.wiphy); + /* To be the most flexible here we want to only limit changing the * address if the specific interface is doing offchannel work or * scanning. @@ -214,8 +216,6 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata if (netif_carrier_ok(sdata->dev)) return -EBUSY; - mutex_lock(&local->mtx); - /* First check no ROC work is happening on this iface */ list_for_each_entry(roc, &local->roc_list, list) { if (roc->sdata != sdata) @@ -230,7 +230,7 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata /* And if this iface is scanning */ if (local->scanning) { scan_sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (sdata == scan_sdata) ret = -EBUSY; } @@ -247,7 +247,6 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata } unlock: - mutex_unlock(&local->mtx); return ret; } @@ -464,6 +463,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do bool cancel_scan; struct cfg80211_nan_func *func; + lockdep_assert_wiphy(local->hw.wiphy); + clear_bit(SDATA_STATE_RUNNING, &sdata->state); synchronize_rcu(); /* flush _ieee80211_wake_txqs() */ @@ -534,7 +535,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do "destroying interface with valid links 0x%04x\n", sdata->vif.valid_links); - mutex_lock(&local->mtx); sdata->vif.bss_conf.csa_active = false; if (sdata->vif.type == NL80211_IFTYPE_STATION) sdata->deflink.u.mgd.csa_waiting_bcn = false; @@ -543,7 +543,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do IEEE80211_QUEUE_STOP_REASON_CSA); sdata->deflink.csa_block_tx = false; } - mutex_unlock(&local->mtx); sdata_unlock(sdata); wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work); @@ -555,9 +554,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do if (sdata->wdev.cac_started) { chandef = sdata->vif.bss_conf.chandef; WARN_ON(local->suspended); - mutex_lock(&local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&local->mtx); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); @@ -585,9 +582,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: - mutex_lock(&local->mtx); list_del(&sdata->u.vlan.list); - mutex_unlock(&local->mtx); RCU_INIT_POINTER(sdata->vif.bss_conf.chanctx_conf, NULL); /* see comment in the default case below */ ieee80211_free_keys(sdata, true); @@ -685,9 +680,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do if (local->monitors == 0) ieee80211_del_virtual_monitor(local); - mutex_lock(&local->mtx); ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) break; @@ -1169,10 +1162,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) mutex_unlock(&local->iflist_mtx); sdata_lock(sdata); - mutex_lock(&local->mtx); ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chandef, IEEE80211_CHANCTX_EXCLUSIVE); - mutex_unlock(&local->mtx); sdata_unlock(sdata); if (ret) { mutex_lock(&local->iflist_mtx); @@ -1217,9 +1208,7 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) synchronize_net(); sdata_lock(sdata); - mutex_lock(&local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&local->mtx); sdata_unlock(sdata); drv_remove_interface(local, sdata); @@ -1251,9 +1240,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (!sdata->bss) return -ENOLINK; - mutex_lock(&local->mtx); list_add(&sdata->u.vlan.list, &sdata->bss->vlans); - mutex_unlock(&local->mtx); master = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); @@ -1362,9 +1349,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) ieee80211_adjust_monitor_flags(sdata, 1); ieee80211_configure_filter(local); ieee80211_recalc_offload(local); - mutex_lock(&local->mtx); ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); netif_carrier_on(dev); break; @@ -1469,11 +1454,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) drv_stop(local); err_del_bss: sdata->bss = NULL; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { - mutex_lock(&local->mtx); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) list_del(&sdata->u.vlan.list); - mutex_unlock(&local->mtx); - } /* might already be clear but that doesn't matter */ clear_bit(SDATA_STATE_RUNNING, &sdata->state); return res; diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 226c852fd5ee..80571dcc57f5 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -457,7 +457,6 @@ int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) sdata_assert_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); - mutex_lock(&local->mtx); old_active = sdata->vif.active_links; if (old_active & active_links) { /* @@ -473,7 +472,6 @@ int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) /* otherwise switch directly */ ret = _ieee80211_set_active_links(sdata, active_links); } - mutex_unlock(&local->mtx); return ret; } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 0f38b5df53b3..411e44239bb9 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -804,8 +804,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, __hw_addr_init(&local->mc_list); mutex_init(&local->iflist_mtx); - mutex_init(&local->mtx); - spin_lock_init(&local->filter_lock); spin_lock_init(&local->rx_path_lock); spin_lock_init(&local->queue_stop_reason_lock); @@ -1539,7 +1537,6 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) enum nl80211_band band; mutex_destroy(&local->iflist_mtx); - mutex_destroy(&local->mtx); if (local->wiphy_ciphers_allocated) { kfree(local->hw.wiphy->cipher_suites); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 605407e4b67f..43bf2f409000 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1690,7 +1690,6 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, return; sdata_lock(sdata); - mutex_lock(&local->mtx); lockdep_assert_wiphy(local->hw.wiphy); if (!ifmgd->associated) @@ -1743,7 +1742,6 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, ieee80211_sta_reset_conn_monitor(sdata); out: - mutex_unlock(&local->mtx); sdata_unlock(sdata); } @@ -1817,8 +1815,6 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) if (!local->ops->abort_channel_switch) return; - mutex_lock(&local->mtx); - ieee80211_link_unreserve_chanctx(link); if (link->csa_block_tx) @@ -1828,8 +1824,6 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) link->csa_block_tx = false; link->conf->csa_active = false; - mutex_unlock(&local->mtx); - drv_abort_channel_switch(sdata); } @@ -1875,7 +1869,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } if (res < 0) - goto lock_and_drop_connection; + goto drop_connection; if (beacon && link->conf->csa_active && !link->u.mgd.csa_waiting_bcn) { @@ -1897,7 +1891,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, csa_ie.chandef.chan->center_freq, csa_ie.chandef.width, csa_ie.chandef.center_freq1, csa_ie.chandef.center_freq2); - goto lock_and_drop_connection; + goto drop_connection; } if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chandef, @@ -1912,7 +1906,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, csa_ie.chandef.width, csa_ie.chandef.center_freq1, csa_ie.chandef.freq1_offset, csa_ie.chandef.center_freq2); - goto lock_and_drop_connection; + goto drop_connection; } if (cfg80211_chandef_identical(&csa_ie.chandef, @@ -1935,7 +1929,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, */ ieee80211_teardown_tdls_peers(sdata); - mutex_lock(&local->mtx); conf = rcu_dereference_protected(link->conf->chanctx_conf, lockdep_is_held(&local->hw.wiphy->mtx)); if (!conf) { @@ -1977,7 +1970,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, if (link->csa_block_tx) ieee80211_stop_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - mutex_unlock(&local->mtx); cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef, link->link_id, csa_ie.count, @@ -1996,8 +1988,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, &link->u.mgd.chswitch_work, timeout); return; - lock_and_drop_connection: - mutex_lock(&local->mtx); drop_connection: /* * This is just so that the disconnect flow will know that @@ -2011,7 +2001,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, wiphy_work_queue(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); - mutex_unlock(&local->mtx); } static bool @@ -2408,14 +2397,14 @@ void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work) struct cfg80211_chan_def chandef = link->conf->chandef; struct ieee80211_sub_if_data *sdata = link->sdata; - mutex_lock(&sdata->local->mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (sdata->wdev.cac_started) { ieee80211_link_release_channel(link); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_FINISHED, GFP_KERNEL); } - mutex_unlock(&sdata->local->mtx); } static bool @@ -2682,7 +2671,7 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local, static void __ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata) { - lockdep_assert_held(&sdata->local->mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); sdata->u.mgd.flags &= ~IEEE80211_STA_CONNECTION_POLL; ieee80211_run_deferred_scan(sdata->local); @@ -2690,9 +2679,9 @@ static void __ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata) static void ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata) { - mutex_lock(&sdata->local->mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + __ieee80211_stop_poll(sdata); - mutex_unlock(&sdata->local->mtx); } static u64 ieee80211_handle_bss_capability(struct ieee80211_link_data *link, @@ -2896,6 +2885,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, }; sdata_assert_lock(sdata); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON_ONCE(tx && !frame_buf)) return; @@ -3036,7 +3026,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags = 0; sdata->deflink.u.mgd.conn_flags = 0; - mutex_lock(&local->mtx); for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link; @@ -3055,7 +3044,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, IEEE80211_QUEUE_STOP_REASON_CSA); sdata->deflink.csa_block_tx = false; } - mutex_unlock(&local->mtx); /* existing TX TSPEC sessions no longer exist */ memset(ifmgd->tx_tspec, 0, sizeof(ifmgd->tx_tspec)); @@ -3074,9 +3062,10 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); + if (!(ifmgd->flags & IEEE80211_STA_CONNECTION_POLL)) - goto out; + return; __ieee80211_stop_poll(sdata); @@ -3085,7 +3074,7 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->iflist_mtx); if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) - goto out; + return; /* * We've received a probe response, but are not sure whether @@ -3097,8 +3086,6 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) mod_timer(&ifmgd->conn_mon_timer, round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME)); -out: - mutex_unlock(&local->mtx); } static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, @@ -3230,6 +3217,8 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; bool already = false; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (WARN_ON_ONCE(ieee80211_vif_is_mld(&sdata->vif))) return; @@ -3241,16 +3230,12 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (!ifmgd->associated) goto out; - mutex_lock(&sdata->local->mtx); - if (sdata->local->tmp_channel || sdata->local->scanning) { - mutex_unlock(&sdata->local->mtx); goto out; } if (sdata->local->suspending) { /* reschedule after resume */ - mutex_unlock(&sdata->local->mtx); ieee80211_reset_ap_probe(sdata); goto out; } @@ -3279,8 +3264,6 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_CONNECTION_POLL; - mutex_unlock(&sdata->local->mtx); - if (already) goto out; @@ -3363,6 +3346,8 @@ static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; bool tx; + lockdep_assert_wiphy(local->hw.wiphy); + if (!ifmgd->associated) return; @@ -3398,7 +3383,6 @@ static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DEAUTH_LEAVING : WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, tx, frame_buf); - mutex_lock(&local->mtx); /* the other links will be destroyed */ sdata->vif.bss_conf.csa_active = false; sdata->deflink.u.mgd.csa_waiting_bcn = false; @@ -3407,7 +3391,6 @@ static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) IEEE80211_QUEUE_STOP_REASON_CSA); sdata->deflink.csa_block_tx = false; } - mutex_unlock(&local->mtx); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, @@ -3504,6 +3487,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data; sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!assoc) { /* @@ -3521,10 +3505,8 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_BSSID); sdata->u.mgd.flags = 0; - mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); ieee80211_vif_set_links(sdata, 0, 0); - mutex_unlock(&sdata->local->mtx); } cfg80211_put_bss(sdata->local->hw.wiphy, auth_data->bss); @@ -3545,6 +3527,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (status != ASSOC_SUCCESS) { /* @@ -3580,10 +3563,8 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, cfg80211_assoc_failure(sdata->dev, &data); } - mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); ieee80211_vif_set_links(sdata, 0, 0); - mutex_unlock(&sdata->local->mtx); } kfree(assoc_data); @@ -4817,6 +4798,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, u32 i; bool have_80mhz; + lockdep_assert_wiphy(local->hw.wiphy); + rcu_read_lock(); ies = rcu_dereference(cbss->ies); @@ -5018,7 +5001,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, /* will change later if needed */ link->smps_mode = IEEE80211_SMPS_OFF; - mutex_lock(&local->mtx); /* * If this fails (possibly due to channel context sharing * on incompatible channels, e.g. 80+80 and 160 sharing the @@ -5039,7 +5021,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, IEEE80211_CHANCTX_SHARED); } out: - mutex_unlock(&local->mtx); return ret; } @@ -7083,6 +7064,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, int err; bool cont_auth; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + /* prepare auth data structure */ switch (req->auth_type) { @@ -7227,9 +7210,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, eth_zero_addr(sdata->deflink.u.mgd.bssid); ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_BSSID); - mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&sdata->local->mtx); } ifmgd->auth_data = NULL; kfree(auth_data); diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index b44896e14522..7661e96454b2 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -178,6 +178,8 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata, u64 changed = BSS_CHANGED_OCB | BSS_CHANGED_BSSID; int err; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (ifocb->joined == true) return -EINVAL; @@ -185,10 +187,8 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata, sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = sdata->local->rx_chains; - mutex_lock(&sdata->local->mtx); err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef, IEEE80211_CHANCTX_SHARED); - mutex_unlock(&sdata->local->mtx); if (err) return err; @@ -209,6 +209,8 @@ int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct sta_info *sta; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + ifocb->joined = false; sta_info_flush(sdata); @@ -228,9 +230,7 @@ int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata) clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_OCB); - mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&sdata->local->mtx); skb_queue_purge(&sdata->skb_queue); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index df68d9838f87..0e7e7561d0eb 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -197,7 +197,7 @@ static unsigned long ieee80211_end_finished_rocs(struct ieee80211_local *local, struct ieee80211_roc_work *roc, *tmp; long remaining_dur_min = LONG_MAX; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { long remaining; @@ -264,7 +264,7 @@ static void ieee80211_hw_roc_start(struct wiphy *wiphy, struct wiphy_work *work) container_of(work, struct ieee80211_local, hw_roc_start); struct ieee80211_roc_work *roc; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(roc, &local->roc_list, list) { if (!roc->started) @@ -273,8 +273,6 @@ static void ieee80211_hw_roc_start(struct wiphy *wiphy, struct wiphy_work *work) roc->hw_begun = true; ieee80211_handle_roc_started(roc, local->hw_roc_start_time); } - - mutex_unlock(&local->mtx); } void ieee80211_ready_on_channel(struct ieee80211_hw *hw) @@ -295,7 +293,7 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local) enum ieee80211_roc_type type; u32 min_dur, max_dur; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(list_empty(&local->roc_list))) return; @@ -386,7 +384,7 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) { struct ieee80211_roc_work *roc; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (list_empty(&local->roc_list)) { ieee80211_run_deferred_scan(local); @@ -417,7 +415,7 @@ static void __ieee80211_roc_work(struct ieee80211_local *local) struct ieee80211_roc_work *roc; bool on_channel; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(local->ops->remain_on_channel)) return; @@ -456,9 +454,9 @@ static void ieee80211_roc_work(struct wiphy *wiphy, struct wiphy_work *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, roc_work.work); - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); + __ieee80211_roc_work(local); - mutex_unlock(&local->mtx); } static void ieee80211_hw_roc_done(struct wiphy *wiphy, struct wiphy_work *work) @@ -466,14 +464,12 @@ static void ieee80211_hw_roc_done(struct wiphy *wiphy, struct wiphy_work *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, hw_roc_done); - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); ieee80211_end_finished_rocs(local, jiffies); /* if there's another roc, start it now */ ieee80211_start_next_roc(local); - - mutex_unlock(&local->mtx); } void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw) @@ -537,7 +533,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, bool queued = false, combine_started = true; int ret; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (channel->freq_offset) /* this may work, but is untested */ @@ -675,15 +671,12 @@ int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; - int ret; - mutex_lock(&local->mtx); - ret = ieee80211_start_roc_work(local, sdata, chan, - duration, cookie, NULL, - IEEE80211_ROC_TYPE_NORMAL); - mutex_unlock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); - return ret; + return ieee80211_start_roc_work(local, sdata, chan, + duration, cookie, NULL, + IEEE80211_ROC_TYPE_NORMAL); } static int ieee80211_cancel_roc(struct ieee80211_local *local, @@ -692,12 +685,13 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, struct ieee80211_roc_work *roc, *tmp, *found = NULL; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + if (!cookie) return -ENOENT; wiphy_work_flush(local->hw.wiphy, &local->hw_roc_start); - mutex_lock(&local->mtx); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { if (!mgmt_tx && roc->cookie != cookie) continue; @@ -709,7 +703,6 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, } if (!found) { - mutex_unlock(&local->mtx); return -ENOENT; } @@ -721,7 +714,6 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, if (local->ops->remain_on_channel) { ret = drv_cancel_remain_on_channel(local, roc->sdata); if (WARN_ON_ONCE(ret)) { - mutex_unlock(&local->mtx); return ret; } @@ -749,7 +741,6 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, } out_unlock: - mutex_unlock(&local->mtx); return 0; } @@ -778,6 +769,8 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, int ret; u8 *data; + lockdep_assert_wiphy(local->hw.wiphy); + if (params->dont_wait_for_ack) flags = IEEE80211_TX_CTL_NO_ACK; else @@ -855,8 +848,6 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, if (need_offchan && !params->chan) return -EINVAL; - mutex_lock(&local->mtx); - /* Check if the operating channel is the requested channel */ if (!params->chan && mlo_sta) { need_offchan = false; @@ -980,7 +971,6 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, if (ret) ieee80211_free_txskb(&local->hw, skb); out_unlock: - mutex_unlock(&local->mtx); return ret; } @@ -1006,7 +996,8 @@ void ieee80211_roc_purge(struct ieee80211_local *local, struct ieee80211_roc_work *roc, *tmp; bool work_to_do = false; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { if (sdata && roc->sdata != sdata) continue; @@ -1026,5 +1017,4 @@ void ieee80211_roc_purge(struct ieee80211_local *local, } if (work_to_do) __ieee80211_roc_work(local); - mutex_unlock(&local->mtx); } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 68ec2124c3db..0ea86a418eda 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -344,7 +344,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata) u32 flags = 0; req = rcu_dereference_protected(local->scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (test_bit(SCAN_HW_CANCELLED, &local->scanning)) return false; @@ -409,7 +409,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) struct ieee80211_sub_if_data *scan_sdata; struct ieee80211_sub_if_data *sdata; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* * It's ok to abort a not-yet-running scan (that @@ -424,7 +424,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) return; scan_sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (hw_scan && !aborted && !ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS) && @@ -433,7 +433,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) rc = drv_hw_scan(local, rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)), + lockdep_is_held(&local->hw.wiphy->mtx)), local->hw_scan_req); if (rc == 0) @@ -450,7 +450,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) local->hw_scan_req = NULL; scan_req = rcu_dereference_protected(local->scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); RCU_INIT_POINTER(local->scan_req, NULL); RCU_INIT_POINTER(local->scan_sdata, NULL); @@ -591,7 +591,7 @@ static bool ieee80211_can_scan(struct ieee80211_local *local, void ieee80211_run_deferred_scan(struct ieee80211_local *local) { - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (!local->scan_req || local->scanning) return; @@ -599,7 +599,7 @@ void ieee80211_run_deferred_scan(struct ieee80211_local *local) if (!ieee80211_can_scan(local, rcu_dereference_protected( local->scan_sdata, - lockdep_is_held(&local->mtx)))) + lockdep_is_held(&local->hw.wiphy->mtx)))) return; wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, @@ -644,7 +644,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, u32 flags = 0, tx_flags; scan_req = rcu_dereference_protected(local->scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK; if (scan_req->no_cck) @@ -655,7 +655,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, flags |= IEEE80211_PROBE_FLAG_RANDOM_SN; sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); for (i = 0; i < scan_req->n_ssids; i++) ieee80211_send_scan_probe_req( @@ -680,7 +680,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, bool hw_scan = local->ops->hw_scan; int rc; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (local->scan_req) return -EBUSY; @@ -884,7 +884,7 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, mutex_unlock(&local->iflist_mtx); scan_req = rcu_dereference_protected(local->scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); next_chan = scan_req->channels[local->scan_channel_idx]; @@ -925,7 +925,7 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, struct cfg80211_scan_request *scan_req; scan_req = rcu_dereference_protected(local->scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); skip = 0; chan = scan_req->channels[local->scan_channel_idx]; @@ -1051,7 +1051,7 @@ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work) unsigned long next_delay = 0; bool aborted; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (!ieee80211_can_run_worker(local)) { aborted = true; @@ -1059,9 +1059,9 @@ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work) } sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); scan_req = rcu_dereference_protected(local->scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); /* When scanning on-channel, the first-callback means completed. */ if (test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning)) { @@ -1075,7 +1075,7 @@ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work) } if (!sdata || !scan_req) - goto out; + return; if (!local->scanning) { int rc; @@ -1084,13 +1084,12 @@ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work) RCU_INIT_POINTER(local->scan_sdata, NULL); rc = __ieee80211_start_scan(sdata, scan_req); - if (rc) { - /* need to complete scan in cfg80211 */ - rcu_assign_pointer(local->scan_req, scan_req); - aborted = true; - goto out_complete; - } else - goto out; + if (!rc) + return; + /* need to complete scan in cfg80211 */ + rcu_assign_pointer(local->scan_req, scan_req); + aborted = true; + goto out_complete; } clear_bit(SCAN_BEACON_WAIT, &local->scanning); @@ -1138,24 +1137,18 @@ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work) wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, next_delay); - goto out; + return; out_complete: __ieee80211_scan_completed(&local->hw, aborted); -out: - mutex_unlock(&local->mtx); } int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req) { - int res; + lockdep_assert_wiphy(sdata->local->hw.wiphy); - mutex_lock(&sdata->local->mtx); - res = __ieee80211_start_scan(sdata, req); - mutex_unlock(&sdata->local->mtx); - - return res; + return __ieee80211_start_scan(sdata, req); } int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, @@ -1168,7 +1161,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, int ret = -EBUSY, i, n_ch = 0; enum nl80211_band band; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* busy scanning */ if (local->scan_req) @@ -1225,7 +1218,6 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req); unlock: - mutex_unlock(&local->mtx); return ret; } @@ -1252,9 +1244,8 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) * after the scan was completed/aborted. */ - mutex_lock(&local->mtx); if (!local->scan_req) - goto out; + return; /* * We have a scan running and the driver already reported completion, @@ -1264,7 +1255,7 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) if (test_bit(SCAN_HW_SCANNING, &local->scanning) && test_bit(SCAN_COMPLETED, &local->scanning)) { set_bit(SCAN_HW_CANCELLED, &local->scanning); - goto out; + return; } if (test_bit(SCAN_HW_SCANNING, &local->scanning)) { @@ -1276,16 +1267,14 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) if (local->ops->cancel_hw_scan) drv_cancel_hw_scan(local, rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx))); - goto out; + lockdep_is_held(&local->hw.wiphy->mtx))); + return; } wiphy_delayed_work_cancel(local->hw.wiphy, &local->scan_work); /* and clean up */ memset(&local->scan_info, 0, sizeof(local->scan_info)); __ieee80211_scan_completed(&local->hw, true); -out: - mutex_unlock(&local->mtx); } int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, @@ -1300,9 +1289,9 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, u8 *ie; u32 flags = 0; - iebufsz = local->scan_ies_len + req->ie_len; + lockdep_assert_wiphy(local->hw.wiphy); - lockdep_assert_held(&local->mtx); + iebufsz = local->scan_ies_len + req->ie_len; if (!local->ops->sched_scan_start) return -ENOTSUPP; @@ -1353,19 +1342,13 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req) { struct ieee80211_local *local = sdata->local; - int ret; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); - if (rcu_access_pointer(local->sched_scan_sdata)) { - mutex_unlock(&local->mtx); + if (rcu_access_pointer(local->sched_scan_sdata)) return -EBUSY; - } - - ret = __ieee80211_request_sched_scan_start(sdata, req); - mutex_unlock(&local->mtx); - return ret; + return __ieee80211_request_sched_scan_start(sdata, req); } int ieee80211_request_sched_scan_stop(struct ieee80211_local *local) @@ -1373,25 +1356,21 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_local *local) struct ieee80211_sub_if_data *sched_scan_sdata; int ret = -ENOENT; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); - if (!local->ops->sched_scan_stop) { - ret = -ENOTSUPP; - goto out; - } + if (!local->ops->sched_scan_stop) + return -ENOTSUPP; /* We don't want to restart sched scan anymore. */ RCU_INIT_POINTER(local->sched_scan_req, NULL); sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (sched_scan_sdata) { ret = drv_sched_scan_stop(local, sched_scan_sdata); if (!ret) RCU_INIT_POINTER(local->sched_scan_sdata, NULL); } -out: - mutex_unlock(&local->mtx); return ret; } @@ -1408,20 +1387,16 @@ EXPORT_SYMBOL(ieee80211_sched_scan_results); void ieee80211_sched_scan_end(struct ieee80211_local *local) { - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); - if (!rcu_access_pointer(local->sched_scan_sdata)) { - mutex_unlock(&local->mtx); + if (!rcu_access_pointer(local->sched_scan_sdata)) return; - } RCU_INIT_POINTER(local->sched_scan_sdata, NULL); /* If sched scan was aborted by the driver. */ RCU_INIT_POINTER(local->sched_scan_req, NULL); - mutex_unlock(&local->mtx); - cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0); } diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index d6d3123f4e3a..9bcb0c2bba7d 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -30,13 +30,13 @@ void ieee80211_tdls_peer_del_work(struct wiphy *wiphy, struct wiphy_work *wk) u.mgd.tdls_peer_del_work.work); local = sdata->local; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); + if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer)) { tdls_dbg(sdata, "TDLS del peer %pM\n", sdata->u.mgd.tdls_peer); sta_info_destroy_addr(sdata, sdata->u.mgd.tdls_peer); eth_zero_addr(sdata->u.mgd.tdls_peer); } - mutex_unlock(&local->mtx); } static void ieee80211_tdls_add_ext_capab(struct ieee80211_link_data *link, @@ -1180,7 +1180,7 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, return -ENOTSUPP; } - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* we don't support concurrent TDLS peer setups */ if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer) && @@ -1208,7 +1208,6 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, ieee80211_flush_queues(local, sdata, false); memcpy(sdata->u.mgd.tdls_peer, peer, ETH_ALEN); - mutex_unlock(&local->mtx); /* we cannot take the mutex while preparing the setup packet */ ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, @@ -1218,9 +1217,7 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, extra_ies, extra_ies_len, 0, NULL); if (ret < 0) { - mutex_lock(&local->mtx); eth_zero_addr(sdata->u.mgd.tdls_peer); - mutex_unlock(&local->mtx); return ret; } @@ -1230,7 +1227,6 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, return 0; out_unlock: - mutex_unlock(&local->mtx); return ret; } @@ -1470,7 +1466,6 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, * ieee80211_bss_info_change_notify() */ sdata_lock(sdata); - mutex_lock(&local->mtx); tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer); switch (oper) { @@ -1532,7 +1527,6 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, wiphy_work_queue(sdata->local->hw.wiphy, &sdata->deflink.u.mgd.request_smps_work); - mutex_unlock(&local->mtx); sdata_unlock(sdata); return ret; } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1ff7d2368c32..d30b9f204d1b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -6109,6 +6109,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, u32 flags = 0; int err; + /* mutex lock is only needed for incrementing the cookie counter */ + lockdep_assert_wiphy(local->hw.wiphy); + /* Only accept CONTROL_PORT_PROTOCOL configured in CONNECT/ASSOCIATE * or Pre-Authentication */ @@ -6199,15 +6202,10 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, rcu_read_unlock(); start_xmit: - /* mutex lock is only needed for incrementing the cookie counter */ - mutex_lock(&local->mtx); - local_bh_disable(); __ieee80211_subif_start_xmit(skb, skb->dev, flags, ctrl_flags, cookie); local_bh_enable(); - mutex_unlock(&local->mtx); - return 0; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index dce8c2043096..0873c7d60a4d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2861,11 +2861,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) ieee80211_reenable_keys(sdata); /* Reconfigure sched scan if it was interrupted by FW restart */ - mutex_lock(&local->mtx); sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); sched_scan_req = rcu_dereference_protected(local->sched_scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (sched_scan_sdata && sched_scan_req) /* * Sched scan stopped, but we don't want to report it. Instead, @@ -2881,7 +2880,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) RCU_INIT_POINTER(local->sched_scan_req, NULL); sched_scan_stopped = true; } - mutex_unlock(&local->mtx); if (sched_scan_stopped) cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0); @@ -2923,9 +2921,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) barrier(); /* Restart deferred ROCs */ - mutex_lock(&local->mtx); ieee80211_start_next_roc(local); - mutex_unlock(&local->mtx); /* Requeue all works */ list_for_each_entry(sdata, &local->interfaces, list) @@ -4329,7 +4325,6 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) /* for interface list, to avoid linking iflist_mtx and chanctx_mtx */ lockdep_assert_wiphy(local->hw.wiphy); - mutex_lock(&local->mtx); list_for_each_entry(sdata, &local->interfaces, list) { /* it might be waiting for the local->mtx, but then * by the time it gets it, sdata->wdev.cac_started @@ -4347,7 +4342,6 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) GFP_KERNEL); } } - mutex_unlock(&local->mtx); } void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, @@ -4360,6 +4354,7 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, int num_chanctx = 0; lockdep_assert_wiphy(local->hw.wiphy); + list_for_each_entry(ctx, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) continue; -- cgit v1.2.3 From be0df01dae0f21303a7a523dbba35159cf6dfe77 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 14:00:06 +0200 Subject: wifi: mac80211: reduce iflist_mtx We now hold the wiphy mutex everywhere that we use or needed the iflist_mtx, so we don't need this mutex any more in mac80211. However, drivers may also iterate, and in some cases (e.g. mt76) do so from high-priority contexts. Thus, keep the mutex around but remove its usage in mac80211 apart from those driver-visible parts that are still needed. Most of this change was done automatically with spatch, with the parts that are still needed as described above reverted manually. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 8 ++++---- net/mac80211/ibss.c | 4 ++-- net/mac80211/iface.c | 28 ++++++++++++---------------- net/mac80211/mlme.c | 10 ++-------- net/mac80211/offchannel.c | 8 ++++---- net/mac80211/scan.c | 12 +++++------- net/mac80211/util.c | 1 - 7 files changed, 29 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index dada3ca0b9e0..0201a3320136 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2980,6 +2980,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, bool update_txp_type = false; bool has_monitor = false; + lockdep_assert_wiphy(local->hw.wiphy); + if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); @@ -3027,7 +3029,6 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, break; } - mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { has_monitor = true; @@ -3043,7 +3044,6 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, continue; ieee80211_recalc_txpower(sdata, update_txp_type); } - mutex_unlock(&local->iflist_mtx); if (has_monitor) { sdata = wiphy_dereference(local->hw.wiphy, @@ -4636,6 +4636,8 @@ static void ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, u8 color, int enable, u64 changed) { + lockdep_assert_wiphy(sdata->local->hw.wiphy); + sdata->vif.bss_conf.he_bss_color.color = color; sdata->vif.bss_conf.he_bss_color.enabled = enable; changed |= BSS_CHANGED_HE_BSS_COLOR; @@ -4645,7 +4647,6 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) { struct ieee80211_sub_if_data *child; - mutex_lock(&sdata->local->iflist_mtx); list_for_each_entry(child, &sdata->local->interfaces, list) { if (child != sdata && child->vif.mbssid_tx_vif == &sdata->vif) { child->vif.bss_conf.he_bss_color.color = color; @@ -4655,7 +4656,6 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_HE_BSS_COLOR); } } - mutex_unlock(&sdata->local->iflist_mtx); } } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 3c6370377234..b95098c13153 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1738,7 +1738,8 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - mutex_lock(&local->iflist_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; @@ -1746,7 +1747,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) continue; sdata->u.ibss.last_scan_completed = jiffies; } - mutex_unlock(&local->iflist_mtx); } int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 9724a3d4545b..f7f58c3e8349 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -33,14 +33,13 @@ * The interface list in each struct ieee80211_local is protected * three-fold: * - * (1) modifications may only be done under the RTNL - * (2) modifications and readers are protected against each other by - * the iflist_mtx. - * (3) modifications are done in an RCU manner so atomic readers + * (1) modifications may only be done under the RTNL *and* wiphy mutex + * *and* iflist_mtx + * (2) modifications are done in an RCU manner so atomic readers * can traverse the list in RCU-safe blocks. * * As a consequence, reads (traversals) of the list can be protected - * by either the RTNL, the iflist_mtx or RCU. + * by either the RTNL, the wiphy mutex, the iflist_mtx or RCU. */ static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work); @@ -160,6 +159,8 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, u8 *m; int ret = 0; + lockdep_assert_wiphy(local->hw.wiphy); + if (is_zero_ether_addr(local->hw.wiphy->addr_mask)) return 0; @@ -176,7 +177,6 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, if (!check_dup) return ret; - mutex_lock(&local->iflist_mtx); list_for_each_entry(iter, &local->interfaces, list) { if (iter == sdata) continue; @@ -195,7 +195,6 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, break; } } - mutex_unlock(&local->iflist_mtx); return ret; } @@ -1049,7 +1048,7 @@ void ieee80211_recalc_offload(struct ieee80211_local *local) if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_ENCAP_OFFLOAD)) return; - mutex_lock(&local->iflist_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) @@ -1057,8 +1056,6 @@ void ieee80211_recalc_offload(struct ieee80211_local *local) ieee80211_recalc_sdata_offload(sdata); } - - mutex_unlock(&local->iflist_mtx); } void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata, @@ -1917,6 +1914,8 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, u8 tmp_addr[ETH_ALEN]; int i; + lockdep_assert_wiphy(local->hw.wiphy); + /* default ... something at least */ memcpy(perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN); @@ -1924,8 +1923,6 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, local->hw.wiphy->n_addresses <= 1) return; - mutex_lock(&local->iflist_mtx); - switch (type) { case NL80211_IFTYPE_MONITOR: /* doesn't matter */ @@ -1949,7 +1946,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, if (!ieee80211_sdata_running(sdata)) continue; memcpy(perm_addr, sdata->vif.addr, ETH_ALEN); - goto out_unlock; + return; } } fallthrough; @@ -2035,9 +2032,6 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, break; } - - out_unlock: - mutex_unlock(&local->iflist_mtx); } int ieee80211_if_add(struct ieee80211_local *local, const char *name, @@ -2051,6 +2045,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, int ret, i; ASSERT_RTNL(); + lockdep_assert_wiphy(local->hw.wiphy); if (type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN) { struct wireless_dev *wdev; @@ -2217,6 +2212,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) { ASSERT_RTNL(); + lockdep_assert_wiphy(sdata->local->hw.wiphy); mutex_lock(&sdata->local->iflist_mtx); list_del_rcu(&sdata->list); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 43bf2f409000..195e7202d51d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2799,6 +2799,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, u64 vif_changed = BSS_CHANGED_ASSOC; unsigned int link_id; + lockdep_assert_wiphy(local->hw.wiphy); + sdata->u.mgd.associated = true; for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { @@ -2860,9 +2862,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, vif_changed | changed[0]); } - mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local); - mutex_unlock(&local->iflist_mtx); /* leave this here to not change ordering in non-MLO cases */ if (!ieee80211_vif_is_mld(&sdata->vif)) @@ -3069,9 +3069,7 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) __ieee80211_stop_poll(sdata); - mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local); - mutex_unlock(&local->iflist_mtx); if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) return; @@ -3267,9 +3265,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (already) goto out; - mutex_lock(&sdata->local->iflist_mtx); ieee80211_recalc_ps(sdata->local); - mutex_unlock(&sdata->local->iflist_mtx); ifmgd->probe_send_count = 0; ieee80211_mgd_probe_ap_send(sdata); @@ -6094,9 +6090,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, changed |= BSS_CHANGED_BEACON_INFO; link->u.mgd.have_beacon = true; - mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local); - mutex_unlock(&local->iflist_mtx); ieee80211_recalc_ps_vif(sdata); } diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 0e7e7561d0eb..8325fbb1645e 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -84,6 +84,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; + lockdep_assert_wiphy(local->hw.wiphy); + if (WARN_ON(local->use_chanctx)) return; @@ -101,7 +103,6 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) false); ieee80211_flush_queues(local, NULL, false); - mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; @@ -127,17 +128,17 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) sdata->u.mgd.associated) ieee80211_offchannel_ps_enable(sdata); } - mutex_unlock(&local->iflist_mtx); } void ieee80211_offchannel_return(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; + lockdep_assert_wiphy(local->hw.wiphy); + if (WARN_ON(local->use_chanctx)) return; - mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) continue; @@ -161,7 +162,6 @@ void ieee80211_offchannel_return(struct ieee80211_local *local) BSS_CHANGED_BEACON_ENABLED); } } - mutex_unlock(&local->iflist_mtx); ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 0ea86a418eda..58d525e41f6b 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -555,20 +555,18 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *sdata_iter; + lockdep_assert_wiphy(local->hw.wiphy); + if (!ieee80211_is_radar_required(local)) return true; if (!regulatory_pre_cac_allowed(local->hw.wiphy)) return false; - mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata_iter, &local->interfaces, list) { - if (sdata_iter->wdev.cac_started) { - mutex_unlock(&local->iflist_mtx); + if (sdata_iter->wdev.cac_started) return false; - } } - mutex_unlock(&local->iflist_mtx); return true; } @@ -860,12 +858,13 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, enum mac80211_scan_state next_scan_state; struct cfg80211_scan_request *scan_req; + lockdep_assert_wiphy(local->hw.wiphy); + /* * check if at least one STA interface is associated, * check if at least one STA interface has pending tx frames * and grab the lowest used beacon interval */ - mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; @@ -881,7 +880,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, } } } - mutex_unlock(&local->iflist_mtx); scan_req = rcu_dereference_protected(local->scan_req, lockdep_is_held(&local->hw.wiphy->mtx)); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0873c7d60a4d..7833043b0a4e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -4322,7 +4322,6 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata; struct cfg80211_chan_def chandef; - /* for interface list, to avoid linking iflist_mtx and chanctx_mtx */ lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sdata, &local->interfaces, list) { -- cgit v1.2.3 From a7614b482d64a1d7f595178b12d71f12936ba9a3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 14:00:07 +0200 Subject: wifi: mac80211: set wiphy for virtual monitors Drivers might plausibly want to have this, but also the locking assertions will need it later. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f7f58c3e8349..fc407be04ce9 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1134,6 +1134,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) wiphy_name(local->hw.wiphy)); sdata->wdev.iftype = NL80211_IFTYPE_MONITOR; mutex_init(&sdata->wdev.mtx); + sdata->wdev.wiphy = local->hw.wiphy; ieee80211_sdata_init(local, sdata); -- cgit v1.2.3 From 076fc8775dafe995e94c106bb732bf2d42dedcea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 29 Aug 2023 12:18:56 +0200 Subject: wifi: cfg80211: remove wdev mutex Since we're now protecting everything with the wiphy mutex (and were really using it for almost everything before), there's no longer any real reason to have a separate wdev mutex. It may feel better, but really has no value. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 4 +- drivers/net/wireless/marvell/mwifiex/11h.c | 4 +- drivers/net/wireless/quantenna/qtnfmac/event.c | 4 +- include/net/cfg80211.h | 6 +- include/net/mac80211.h | 20 +- net/mac80211/cfg.c | 43 +-- net/mac80211/debugfs_netdev.c | 10 +- net/mac80211/driver-ops.c | 2 - net/mac80211/driver-ops.h | 14 - net/mac80211/ibss.c | 39 +-- net/mac80211/ieee80211_i.h | 30 +- net/mac80211/iface.c | 11 - net/mac80211/link.c | 34 +-- net/mac80211/main.c | 7 +- net/mac80211/mesh.c | 16 +- net/mac80211/mlme.c | 123 +++----- net/mac80211/ocb.c | 6 +- net/mac80211/offchannel.c | 2 - net/mac80211/tdls.c | 26 +- net/mac80211/util.c | 5 - net/wireless/ap.c | 24 +- net/wireless/chan.c | 32 +- net/wireless/core.c | 24 +- net/wireless/core.h | 36 --- net/wireless/ibss.c | 76 +---- net/wireless/mesh.c | 23 +- net/wireless/mlme.c | 21 +- net/wireless/nl80211.c | 398 ++++++------------------- net/wireless/ocb.c | 43 +-- net/wireless/pmsr.c | 4 +- net/wireless/reg.c | 16 +- net/wireless/sme.c | 55 ++-- net/wireless/util.c | 14 +- net/wireless/wext-compat.c | 43 +-- net/wireless/wext-sme.c | 59 +--- 35 files changed, 300 insertions(+), 974 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 0c2b8b1a10d5..1dba55c2d9dc 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1118,9 +1118,9 @@ void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq, ath6kl_band_2ghz.ht_cap.ht_supported) ? NL80211_CHAN_HT20 : NL80211_CHAN_NO_HT); - mutex_lock(&vif->wdev.mtx); + wiphy_lock(vif->ar->wiphy); cfg80211_ch_switch_notify(vif->ndev, &chandef, 0, 0); - mutex_unlock(&vif->wdev.mtx); + wiphy_unlock(vif->ar->wiphy); } static int ath6kl_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c index 2ea03725f188..da211372a481 100644 --- a/drivers/net/wireless/marvell/mwifiex/11h.c +++ b/drivers/net/wireless/marvell/mwifiex/11h.c @@ -287,7 +287,7 @@ void mwifiex_dfs_chan_sw_work_queue(struct work_struct *work) mwifiex_dbg(priv->adapter, MSG, "indicating channel switch completion to kernel\n"); - mutex_lock(&priv->wdev.mtx); + wiphy_lock(priv->wdev.wiphy); cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef, 0, 0); - mutex_unlock(&priv->wdev.mtx); + wiphy_unlock(priv->wdev.wiphy); } diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 31bc58e96ac0..3b283e93a13e 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -477,9 +477,9 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, if (!vif->netdev) continue; - mutex_lock(&vif->wdev.mtx); + wiphy_lock(priv_to_wiphy(vif->mac)); cfg80211_ch_switch_notify(vif->netdev, &chandef, 0, 0); - mutex_unlock(&vif->wdev.mtx); + wiphy_unlock(priv_to_wiphy(vif->mac)); } return 0; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d1964a6d0b35..aa9c26a03f30 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5938,8 +5938,6 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy, * @mgmt_registrations: list of registrations for management frames * @mgmt_registrations_need_update: mgmt registrations were updated, * need to propagate the update to the driver - * @mtx: mutex used to lock data in this struct, may be used by drivers - * and some API functions require it held * @beacon_interval: beacon interval used on this device for transmitting * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL @@ -5985,8 +5983,6 @@ struct wireless_dev { struct list_head mgmt_registrations; u8 mgmt_registrations_need_update:1; - struct mutex mtx; - bool use_4addr, is_running, registered, registering; u8 address[ETH_ALEN] __aligned(sizeof(u16)); @@ -8589,7 +8585,7 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, * @link_id: the link ID for MLO, must be 0 for non-MLO * @punct_bitmap: the new puncturing bitmap * - * Caller must acquire wdev_lock, therefore must only be called from sleepable + * Caller must hold wiphy mutex, therefore must only be called from sleepable * driver context! */ void cfg80211_ch_switch_notify(struct net_device *dev, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7f3b6f00f8a2..154592ce48e5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -643,9 +643,7 @@ struct ieee80211_fils_discovery { * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT * @eht_puncturing: bitmap to indicate which channels are punctured in this BSS - * @csa_active: marks whether a channel switch is going on. Internally it is - * write-protected by sdata_lock and local->mtx so holding either is fine - * for read access. + * @csa_active: marks whether a channel switch is going on. * @csa_punct_bitmap: new puncturing bitmap for channel switch * @mu_mimo_owner: indicates interface owns MU-MIMO capability * @chanctx_conf: The channel context this interface is assigned to, or %NULL @@ -653,9 +651,7 @@ struct ieee80211_fils_discovery { * path needing to access it; even though the netdev carrier will always * be off when it is %NULL there can still be races and packets could be * processed after it switches back to %NULL. - * @color_change_active: marks whether a color change is ongoing. Internally it is - * write-protected by sdata_lock and local->mtx so holding either is fine - * for read access. + * @color_change_active: marks whether a color change is ongoing. * @color_change_color: the bss color that will be used after the change. * @ht_ldpc: in AP mode, indicates interface has HT LDPC capability. * @vht_ldpc: in AP mode, indicates interface has VHT LDPC capability. @@ -1974,22 +1970,18 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev); */ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif); -/** - * lockdep_vif_mutex_held - for lockdep checks on link poiners - * @vif: the interface to check - */ -static inline bool lockdep_vif_mutex_held(struct ieee80211_vif *vif) +static inline bool lockdep_vif_wiphy_mutex_held(struct ieee80211_vif *vif) { - return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->mtx); + return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->wiphy->mtx); } #define link_conf_dereference_protected(vif, link_id) \ rcu_dereference_protected((vif)->link_conf[link_id], \ - lockdep_vif_mutex_held(vif)) + lockdep_vif_wiphy_mutex_held(vif)) #define link_conf_dereference_check(vif, link_id) \ rcu_dereference_check((vif)->link_conf[link_id], \ - lockdep_vif_mutex_held(vif)) + lockdep_vif_wiphy_mutex_held(vif)) /** * enum ieee80211_key_flags - key flags diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0201a3320136..851d6ed68367 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -573,8 +573,7 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, struct ieee80211_key *key; if (link_id >= 0) { - link = rcu_dereference_check(sdata->link[link_id], - lockdep_is_held(&sdata->wdev.mtx)); + link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return NULL; } @@ -896,12 +895,10 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (sdata) { - sdata_lock(sdata); ieee80211_link_release_channel(&sdata->deflink); ret = ieee80211_link_use_channel(&sdata->deflink, chandef, IEEE80211_CHANCTX_EXCLUSIVE); - sdata_unlock(sdata); } } else { if (local->open_count == local->monitors) { @@ -1490,7 +1487,7 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_bss_conf *link_conf; u64 changed = 0; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(wiphy); link = sdata_dereference(sdata->link[params->link_id], sdata); if (!link) @@ -1549,7 +1546,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, sdata_dereference(sdata->link[link_id], sdata); struct ieee80211_bss_conf *link_conf = link->conf; - sdata_assert_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); old_beacon = sdata_dereference(link->u.ap.beacon, sdata); @@ -2163,14 +2159,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, } } - /* we use sta_info_get_bss() so this might be different */ - if (sdata != sta->sdata) { - mutex_lock_nested(&sta->sdata->wdev.mtx, 1); - err = sta_apply_parameters(local, sta, params); - mutex_unlock(&sta->sdata->wdev.mtx); - } else { - err = sta_apply_parameters(local, sta, params); - } + err = sta_apply_parameters(local, sta, params); if (err) return err; @@ -3132,7 +3121,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; bool tdls_peer_found = false; - lockdep_assert_held(&sdata->wdev.mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION)) return -EINVAL; @@ -3211,7 +3200,6 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ - sdata_lock(sdata); for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link; @@ -3222,7 +3210,6 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, __ieee80211_request_smps_mgd(sdata, link, link->u.mgd.req_smps); } - sdata_unlock(sdata); if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); @@ -3609,7 +3596,6 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) u64 changed = 0; int err; - sdata_assert_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); /* @@ -3681,20 +3667,16 @@ void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) deflink.csa_finalize_work); struct ieee80211_local *local = sdata->local; - sdata_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!sdata->vif.bss_conf.csa_active) - goto unlock; + return; if (!ieee80211_sdata_running(sdata)) - goto unlock; + return; ieee80211_csa_finalize(sdata); - -unlock: - sdata_unlock(sdata); } static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, @@ -3850,7 +3832,6 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, u64 changed = 0; int err; - sdata_assert_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); if (!list_empty(&local->roc_list) || local->scanning) @@ -4665,7 +4646,6 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) u64 changed = 0; int err; - sdata_assert_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); sdata->vif.bss_conf.color_change_active = false; @@ -4692,20 +4672,16 @@ void ieee80211_color_change_finalize_work(struct wiphy *wiphy, deflink.color_change_finalize_work); struct ieee80211_local *local = sdata->local; - sdata_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!sdata->vif.bss_conf.color_change_active) - goto unlock; + return; if (!ieee80211_sdata_running(sdata)) - goto unlock; + return; ieee80211_color_change_finalize(sdata); - -unlock: - sdata_unlock(sdata); } void ieee80211_color_collision_detection_work(struct work_struct *work) @@ -4716,9 +4692,7 @@ void ieee80211_color_collision_detection_work(struct work_struct *work) color_collision_detect_work); struct ieee80211_sub_if_data *sdata = link->sdata; - sdata_lock(sdata); cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap); - sdata_unlock(sdata); } void ieee80211_color_change_finish(struct ieee80211_vif *vif) @@ -4762,7 +4736,6 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, u64 changed = 0; int err; - sdata_assert_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); if (sdata->vif.bss_conf.nontransmitted) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 706330fadc97..14a40348959a 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -322,7 +322,6 @@ static int ieee80211_set_smps(struct ieee80211_link_data *link, { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; - int err; if (sdata->vif.driver_flags & IEEE80211_VIF_DISABLE_SMPS_OVERRIDE) return -EOPNOTSUPP; @@ -340,11 +339,7 @@ static int ieee80211_set_smps(struct ieee80211_link_data *link, if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; - sdata_lock(sdata); - err = __ieee80211_request_smps_mgd(link->sdata, link, smps_mode); - sdata_unlock(sdata); - - return err; + return __ieee80211_request_smps_mgd(link->sdata, link, smps_mode); } static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = { @@ -416,16 +411,13 @@ static ssize_t ieee80211_if_parse_tkip_mic_test( case NL80211_IFTYPE_STATION: fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ - sdata_lock(sdata); if (!sdata->u.mgd.associated) { - sdata_unlock(sdata); dev_kfree_skb(skb); return -ENOTCONN; } memcpy(hdr->addr1, sdata->deflink.u.mgd.bssid, ETH_ALEN); memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); memcpy(hdr->addr3, addr, ETH_ALEN); - sdata_unlock(sdata); break; default: dev_kfree_skb(skb); diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 9fc110264808..919300750527 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -300,7 +300,6 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); - drv_verify_link_exists(sdata, link_conf); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -329,7 +328,6 @@ void drv_unassign_vif_chanctx(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); - drv_verify_link_exists(sdata, link_conf); if (!check_sdata_in_driver(sdata)) return; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 2fac7dc2eb9d..554c7aa10cc2 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -996,14 +996,6 @@ static inline void drv_change_chanctx(struct ieee80211_local *local, trace_drv_return_void(local); } -static inline void drv_verify_link_exists(struct ieee80211_sub_if_data *sdata, - struct ieee80211_bss_conf *link_conf) -{ - /* deflink always exists, so need to check only for other links */ - if (sdata->deflink.conf != link_conf) - sdata_assert_lock(sdata); -} - int drv_assign_vif_chanctx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf, @@ -1022,9 +1014,6 @@ static inline int drv_start_ap(struct ieee80211_local *local, { int ret = 0; - /* make sure link_conf is protected */ - drv_verify_link_exists(sdata, link_conf); - might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); @@ -1045,9 +1034,6 @@ static inline void drv_stop_ap(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); - /* make sure link_conf is protected */ - drv_verify_link_exists(sdata, link_conf); - if (!check_sdata_in_driver(sdata)) return; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index b95098c13153..9907cea6457c 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -235,7 +235,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, bool radar_required; int err; - sdata_assert_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); /* Reset own TSF to allow time synchronization work. */ @@ -403,7 +402,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, u32 rate_flags; int shift; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (beacon_int < 10) beacon_int = 10; @@ -484,7 +483,7 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, u16 capability = WLAN_CAPABILITY_IBSS; u64 tsf; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (ifibss->privacy) capability |= WLAN_CAPABILITY_PRIVACY; @@ -526,7 +525,7 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed) struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct cfg80211_bss *cbss; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* When not connected/joined, sending CSA doesn't make sense. */ if (ifibss->state != IEEE80211_IBSS_MLME_JOINED) @@ -648,7 +647,7 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) int active = 0; struct sta_info *sta; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); rcu_read_lock(); @@ -734,16 +733,12 @@ static void ieee80211_csa_connection_drop_work(struct wiphy *wiphy, container_of(work, struct ieee80211_sub_if_data, u.ibss.csa_connection_drop_work); - sdata_lock(sdata); - ieee80211_ibss_disconnect(sdata); synchronize_rcu(); skb_queue_purge(&sdata->skb_queue); /* trigger a scan to find another IBSS network to join */ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); - - sdata_unlock(sdata); } static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata) @@ -775,7 +770,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, ieee80211_conn_flags_t conn_flags; u32 vht_cap_info = 0; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); conn_flags = IEEE80211_CONN_DISABLE_VHT; @@ -947,7 +942,7 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, { u16 auth_alg, auth_transaction; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (len < 24 + 6) return; @@ -1289,7 +1284,7 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; enum nl80211_bss_scan_width scan_width; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); @@ -1321,7 +1316,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) u16 capability; int i; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (ifibss->fixed_bssid) { memcpy(bssid, ifibss->bssid, ETH_ALEN); @@ -1432,7 +1427,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) enum nl80211_bss_scan_width scan_width; int active_ibss; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); active_ibss = ieee80211_sta_active_ibss(sdata); ibss_dbg(sdata, "sta_find_ibss (active_ibss=%d)\n", active_ibss); @@ -1526,7 +1521,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; u8 *pos, *end; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); presp = sdata_dereference(ifibss->presp, sdata); @@ -1622,10 +1617,8 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - sdata_lock(sdata); - if (!sdata->u.ibss.ssid_len) - goto mgmt_out; /* not ready to merge yet */ + return; /* not ready to merge yet */ switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_PROBE_REQ: @@ -1665,9 +1658,6 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; } } - - mgmt_out: - sdata_unlock(sdata); } void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) @@ -1675,15 +1665,13 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct sta_info *sta; - sdata_lock(sdata); - /* * Work could be scheduled after scan or similar * when we aren't even joined (or trying) with a * network. */ if (!ifibss->ssid_len) - goto out; + return; spin_lock_bh(&ifibss->incomplete_lock); while (!list_empty(&ifibss->incomplete_stations)) { @@ -1709,9 +1697,6 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) WARN_ON(1); break; } - - out: - sdata_unlock(sdata); } static void ieee80211_ibss_timer(struct timer_list *t) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2d2a4445714e..b8465d205076 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -991,7 +991,7 @@ struct ieee80211_link_data { struct ieee80211_key __rcu *default_beacon_key; struct wiphy_work csa_finalize_work; - bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */ + bool csa_block_tx; bool operating_11g_mode; @@ -1135,28 +1135,8 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) return container_of(p, struct ieee80211_sub_if_data, vif); } -static inline void sdata_lock(struct ieee80211_sub_if_data *sdata) - __acquires(&sdata->wdev.mtx) -{ - mutex_lock(&sdata->wdev.mtx); - __acquire(&sdata->wdev.mtx); -} - -static inline void sdata_unlock(struct ieee80211_sub_if_data *sdata) - __releases(&sdata->wdev.mtx) -{ - mutex_unlock(&sdata->wdev.mtx); - __release(&sdata->wdev.mtx); -} - #define sdata_dereference(p, sdata) \ - rcu_dereference_protected(p, lockdep_is_held(&sdata->wdev.mtx)) - -static inline void -sdata_assert_lock(struct ieee80211_sub_if_data *sdata) -{ - lockdep_assert_held(&sdata->wdev.mtx); -} + wiphy_dereference(sdata->local->hw.wiphy, p) static inline int ieee80211_chanwidth_get_shift(enum nl80211_chan_width width) @@ -2034,8 +2014,10 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, void ieee80211_link_stop(struct ieee80211_link_data *link); int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata, u16 new_links, u16 dormant_links); -void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata); -int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links); +static inline void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata) +{ + ieee80211_vif_set_links(sdata, 0, 0); +} /* tx handling */ void ieee80211_clear_tx_pending(struct ieee80211_local *local); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index fc407be04ce9..7e3acf670f0f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -529,7 +529,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do del_timer_sync(&local->dynamic_ps_timer); wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); - sdata_lock(sdata); WARN(ieee80211_vif_is_mld(&sdata->vif), "destroying interface with valid links 0x%04x\n", sdata->vif.valid_links); @@ -542,7 +541,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do IEEE80211_QUEUE_STOP_REASON_CSA); sdata->deflink.csa_block_tx = false; } - sdata_unlock(sdata); wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work); wiphy_work_cancel(local->hw.wiphy, @@ -1133,7 +1131,6 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) snprintf(sdata->name, IFNAMSIZ, "%s-monitor", wiphy_name(local->hw.wiphy)); sdata->wdev.iftype = NL80211_IFTYPE_MONITOR; - mutex_init(&sdata->wdev.mtx); sdata->wdev.wiphy = local->hw.wiphy; ieee80211_sdata_init(local, sdata); @@ -1159,17 +1156,14 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) rcu_assign_pointer(local->monitor_sdata, sdata); mutex_unlock(&local->iflist_mtx); - sdata_lock(sdata); ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chandef, IEEE80211_CHANCTX_EXCLUSIVE); - sdata_unlock(sdata); if (ret) { mutex_lock(&local->iflist_mtx); RCU_INIT_POINTER(local->monitor_sdata, NULL); mutex_unlock(&local->iflist_mtx); synchronize_net(); drv_remove_interface(local, sdata); - mutex_destroy(&sdata->wdev.mtx); kfree(sdata); return ret; } @@ -1205,13 +1199,10 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) synchronize_net(); - sdata_lock(sdata); ieee80211_link_release_channel(&sdata->deflink); - sdata_unlock(sdata); drv_remove_interface(local, sdata); - mutex_destroy(&sdata->wdev.mtx); kfree(sdata); } @@ -2279,13 +2270,11 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) * we can't acquire the wiphy_lock() again there if already * inside this locked section. */ - sdata_lock(sdata); sdata->vif.cfg.arp_addr_cnt = 0; if (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.associated) ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_ARP_FILTER); - sdata_unlock(sdata); list_del(&sdata->list); cfg80211_unregister_wdev(&sdata->wdev); diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 80571dcc57f5..2a78374f6f04 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -191,7 +191,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *old_data[IEEE80211_MLD_MAX_NUM_LINKS]; bool use_deflink = old_links == 0; /* set for error case */ - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); memset(to_free, 0, sizeof(links)); @@ -303,23 +303,6 @@ int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata, return ret; } -void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata) -{ - struct link_container *links[IEEE80211_MLD_MAX_NUM_LINKS]; - - /* - * The locking here is different because when we free links - * in the station case we need to be able to cancel_work_sync() - * something that also takes the lock. - */ - - sdata_lock(sdata); - ieee80211_vif_update_links(sdata, links, 0, 0); - sdata_unlock(sdata); - - ieee80211_free_links(sdata, links); -} - static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, u16 active_links) { @@ -447,14 +430,13 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, return 0; } -int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) +int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; u16 old_active; int ret; - sdata_assert_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); old_active = sdata->vif.active_links; @@ -475,18 +457,6 @@ int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) return ret; } - -int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - int ret; - - sdata_lock(sdata); - ret = __ieee80211_set_active_links(vif, active_links); - sdata_unlock(sdata); - - return ret; -} EXPORT_SYMBOL_GPL(ieee80211_set_active_links); void ieee80211_set_active_links_async(struct ieee80211_vif *vif, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 411e44239bb9..0ab603850a85 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -365,13 +365,10 @@ static void ieee80211_restart_work(struct work_struct *work) */ wiphy_work_cancel(local->hw.wiphy, &sdata->u.mgd.csa_connection_drop_work); - if (sdata->vif.bss_conf.csa_active) { - sdata_lock(sdata); + if (sdata->vif.bss_conf.csa_active) ieee80211_sta_connection_lost(sdata, WLAN_REASON_UNSPECIFIED, false); - sdata_unlock(sdata); - } } wiphy_delayed_work_flush(local->hw.wiphy, &sdata->dec_tailroom_needed_wk); @@ -473,7 +470,6 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, */ mutex_lock_nested(&local->hw.wiphy->mtx, 1); __acquire(&local->hw.wiphy->mtx); - sdata_lock(sdata); /* Copy the addresses to the vif config list */ ifa = rtnl_dereference(idev->ifa_list); @@ -490,7 +486,6 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, if (ifmgd->associated) ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_ARP_FILTER); - sdata_unlock(sdata); wiphy_unlock(local->hw.wiphy); return NOTIFY_OK; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index af8c5fc2db14..0d0fbae51b61 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1291,7 +1291,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, ieee80211_conn_flags_t conn_flags = 0; u32 vht_cap_info = 0; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); sband = ieee80211_get_sband(sdata); if (!sband) @@ -1559,7 +1559,7 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, struct mesh_csa_settings *tmp_csa_settings; int ret = 0; - lockdep_assert_held(&sdata->wdev.mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); tmp_csa_settings = kmalloc(sizeof(*tmp_csa_settings), GFP_ATOMIC); @@ -1691,11 +1691,11 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; u16 stype; - sdata_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* mesh already went down */ if (!sdata->u.mesh.mesh_id_len) - goto out; + return; rx_status = IEEE80211_SKB_RXCB(skb); mgmt = (struct ieee80211_mgmt *) skb->data; @@ -1714,8 +1714,6 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); break; } -out: - sdata_unlock(sdata); } static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata) @@ -1745,11 +1743,11 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - sdata_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* mesh already went down */ if (!sdata->u.mesh.mesh_id_len) - goto out; + return; if (ifmsh->preq_queue_len && time_after(jiffies, @@ -1767,8 +1765,6 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags)) mesh_bss_info_changed(sdata); -out: - sdata_unlock(sdata); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 195e7202d51d..6d0a29749e8c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1,4 +1,3 @@ -// SPDX-License-Identifier: GPL-2.0-only /* * BSS client mode implementation * Copyright 2003-2008, Jouni Malinen @@ -175,7 +174,7 @@ ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link, static void run_again(struct ieee80211_sub_if_data *sdata, unsigned long timeout) { - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!timer_pending(&sdata->u.mgd.timer) || time_before(timeout, sdata->u.mgd.timer.expires)) @@ -1401,7 +1400,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) assoc_data->ie, assoc_data->ie_len); - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); size = local->hw.extra_tx_headroom + sizeof(*mgmt) + /* bit too much but doesn't matter */ @@ -1689,14 +1688,13 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, if (!ieee80211_sdata_running(sdata)) return; - sdata_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); if (!ifmgd->associated) - goto out; + return; if (!link->conf->csa_active) - goto out; + return; /* * using reservation isn't immediate as it may be deferred until later @@ -1712,7 +1710,7 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, * reservations */ if (link->reserved_ready) - goto out; + return; ret = ieee80211_link_use_reserved_context(link); if (ret) { @@ -1721,10 +1719,8 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, ret); wiphy_work_queue(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); - goto out; } - - goto out; + return; } if (!cfg80211_chandef_identical(&link->conf->chandef, @@ -1733,16 +1729,13 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, "failed to finalize channel switch, disconnecting\n"); wiphy_work_queue(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); - goto out; + return; } link->u.mgd.csa_waiting_bcn = true; ieee80211_sta_reset_beacon_monitor(sdata); ieee80211_sta_reset_conn_monitor(sdata); - -out: - sdata_unlock(sdata); } static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) @@ -1752,7 +1745,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; int ret; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); WARN_ON(!link->conf->csa_active); @@ -1846,7 +1839,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, unsigned long timeout; int res; - sdata_assert_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); if (!cbss) @@ -2884,7 +2876,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, .subtype = stype, }; - sdata_assert_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON_ONCE(tx && !frame_buf)) @@ -3223,19 +3214,16 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (!ieee80211_sdata_running(sdata)) return; - sdata_lock(sdata); - if (!ifmgd->associated) - goto out; + return; - if (sdata->local->tmp_channel || sdata->local->scanning) { - goto out; - } + if (sdata->local->tmp_channel || sdata->local->scanning) + return; if (sdata->local->suspending) { /* reschedule after resume */ ieee80211_reset_ap_probe(sdata); - goto out; + return; } if (beacon) { @@ -3263,14 +3251,12 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_CONNECTION_POLL; if (already) - goto out; + return; ieee80211_recalc_ps(sdata->local); ifmgd->probe_send_count = 0; ieee80211_mgd_probe_ap_send(sdata); - out: - sdata_unlock(sdata); } struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, @@ -3283,12 +3269,12 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, const struct element *ssid; int ssid_len; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION || ieee80211_vif_is_mld(&sdata->vif))) return NULL; - sdata_assert_lock(sdata); - if (ifmgd->associated) cbss = sdata->deflink.u.mgd.bss; else if (ifmgd->auth_data) @@ -3335,7 +3321,7 @@ static void ieee80211_report_disconnect(struct ieee80211_sub_if_data *sdata, drv_event_callback(sdata->local, sdata, &event); } -static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) +static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -3394,13 +3380,6 @@ static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) ifmgd->reconnect = false; } -static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) -{ - sdata_lock(sdata); - ___ieee80211_disconnect(sdata); - sdata_unlock(sdata); -} - static void ieee80211_beacon_connection_loss_work(struct wiphy *wiphy, struct wiphy_work *work) { @@ -3482,7 +3461,6 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data; - sdata_assert_lock(sdata); lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!assoc) { @@ -3522,7 +3500,6 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; - sdata_assert_lock(sdata); lockdep_assert_wiphy(sdata->local->hw.wiphy); if (status != ASSOC_SUCCESS) { @@ -3638,7 +3615,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, .subtype = IEEE80211_STYPE_AUTH, }; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (len < 24 + 6) return; @@ -3796,7 +3773,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (len < 24 + 2) return; @@ -3840,7 +3817,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u16 reason_code; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (len < 24 + 2) return; @@ -5253,7 +5230,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, u8 ap_mld_addr[ETH_ALEN] __aligned(2); unsigned int link_id; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!assoc_data) return; @@ -5453,7 +5430,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_link_data *link, struct ieee80211_bss *bss; struct ieee80211_channel *channel; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); channel = ieee80211_get_channel_khz(local->hw.wiphy, ieee80211_rx_status_to_khz(rx_status)); @@ -5480,7 +5457,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_link_data *link, ifmgd = &sdata->u.mgd; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* * According to Draft P802.11ax D6.0 clause 26.17.2.3.2: @@ -5691,21 +5668,16 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy, u16 new_valid_links, new_active_links, new_dormant_links; int ret; - sdata_lock(sdata); - if (!sdata->u.mgd.removed_links) { - sdata_unlock(sdata); + if (!sdata->u.mgd.removed_links) return; - } sdata_info(sdata, "MLO Reconfiguration: work: valid=0x%x, removed=0x%x\n", sdata->vif.valid_links, sdata->u.mgd.removed_links); new_valid_links = sdata->vif.valid_links & ~sdata->u.mgd.removed_links; - if (new_valid_links == sdata->vif.valid_links) { - sdata_unlock(sdata); + if (new_valid_links == sdata->vif.valid_links) return; - } if (!new_valid_links || !(new_valid_links & ~sdata->vif.dormant_links)) { @@ -5721,8 +5693,7 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy, BIT(ffs(new_valid_links & ~sdata->vif.dormant_links) - 1); - ret = __ieee80211_set_active_links(&sdata->vif, - new_active_links); + ret = ieee80211_set_active_links(&sdata->vif, new_active_links); if (ret) { sdata_info(sdata, "Failed setting active links\n"); @@ -5741,11 +5712,9 @@ out: if (!ret) cfg80211_links_removed(sdata->dev, sdata->u.mgd.removed_links); else - ___ieee80211_disconnect(sdata); + __ieee80211_disconnect(sdata); sdata->u.mgd.removed_links = 0; - - sdata_unlock(sdata); } static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, @@ -5873,7 +5842,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, .from_ap = true, }; - sdata_assert_lock(sdata); lockdep_assert_wiphy(local->hw.wiphy); /* Process beacon from the current BSS */ @@ -6183,17 +6151,17 @@ void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr; u16 fc; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + rx_status = (struct ieee80211_rx_status *) skb->cb; hdr = (struct ieee80211_hdr *) skb->data; fc = le16_to_cpu(hdr->frame_control); - sdata_lock(sdata); switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_S1G_BEACON: ieee80211_rx_mgmt_beacon(link, hdr, skb->len, rx_status); break; } - sdata_unlock(sdata); } void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, @@ -6205,17 +6173,17 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, u16 fc; int ies_len; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + rx_status = (struct ieee80211_rx_status *) skb->cb; mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - sdata_lock(sdata); - if (rx_status->link_valid) { link = sdata_dereference(sdata->link[rx_status->link_id], sdata); if (!link) - goto out; + return; } switch (fc & IEEE80211_FCTL_STYPE) { @@ -6298,8 +6266,6 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, } break; } -out: - sdata_unlock(sdata); } static void ieee80211_sta_timer(struct timer_list *t) @@ -6334,7 +6300,7 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata) .subtype = IEEE80211_STYPE_AUTH, }; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (WARN_ON_ONCE(!auth_data)) return -EINVAL; @@ -6403,7 +6369,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; int ret; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); assoc_data->tries++; if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) { @@ -6459,7 +6425,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - sdata_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (ifmgd->status_received) { __le16 fc = ifmgd->status_fc; @@ -6594,8 +6560,6 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false); } } - - sdata_unlock(sdata); } static void ieee80211_sta_bcn_mon_timer(struct timer_list *t) @@ -6682,7 +6646,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - sdata_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (ifmgd->auth_data || ifmgd->assoc_data) { const u8 *ap_addr = ifmgd->auth_data ? @@ -6734,8 +6698,6 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) memcpy(bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); ieee80211_mgd_deauth(sdata, &req); } - - sdata_unlock(sdata); } #endif @@ -6743,11 +6705,10 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - sdata_lock(sdata); - if (!ifmgd->associated) { - sdata_unlock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + if (!ifmgd->associated) return; - } if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) { sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME; @@ -6755,7 +6716,6 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) ieee80211_sta_connection_lost(sdata, WLAN_REASON_UNSPECIFIED, true); - sdata_unlock(sdata); return; } @@ -6765,11 +6725,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) ieee80211_sta_connection_lost(sdata, WLAN_REASON_UNSPECIFIED, true); - sdata_unlock(sdata); return; } - - sdata_unlock(sdata); } static void ieee80211_request_smps_mgd_work(struct wiphy *wiphy, @@ -6779,10 +6736,8 @@ static void ieee80211_request_smps_mgd_work(struct wiphy *wiphy, container_of(work, struct ieee80211_link_data, u.mgd.request_smps_work); - sdata_lock(link->sdata); __ieee80211_request_smps_mgd(link->sdata, link, link->u.mgd.driver_smps_mode); - sdata_unlock(link->sdata); } /* interface setup */ @@ -7830,7 +7785,6 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ml_reconf_work); - sdata_lock(sdata); if (ifmgd->assoc_data) ieee80211_destroy_assoc_data(sdata, ASSOC_TIMEOUT); if (ifmgd->auth_data) @@ -7846,7 +7800,6 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) ifmgd->assoc_req_ies_len = 0; spin_unlock_bh(&ifmgd->teardown_lock); del_timer_sync(&ifmgd->timer); - sdata_unlock(sdata); } void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index 7661e96454b2..6e2965ffb809 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -124,11 +124,11 @@ void ieee80211_ocb_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; struct sta_info *sta; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (ifocb->joined != true) return; - sdata_lock(sdata); - spin_lock_bh(&ifocb->incomplete_lock); while (!list_empty(&ifocb->incomplete_stations)) { sta = list_first_entry(&ifocb->incomplete_stations, @@ -144,8 +144,6 @@ void ieee80211_ocb_work(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(OCB_WORK_HOUSEKEEPING, &ifocb->wrkq_flags)) ieee80211_ocb_housekeeping(sdata); - - sdata_unlock(sdata); } static void ieee80211_ocb_housekeeping_timer(struct timer_list *t) diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 8325fbb1645e..be377ed12baa 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -826,13 +826,11 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - sdata_lock(sdata); if (!sdata->u.mgd.associated || (params->offchan && params->wait && local->ops->remain_on_channel && memcmp(sdata->vif.cfg.ap_addr, mgmt->bssid, ETH_ALEN))) need_offchan = true; - sdata_unlock(sdata); break; case NL80211_IFTYPE_P2P_DEVICE: need_offchan = true; diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 9bcb0c2bba7d..ba14f570cda7 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1465,22 +1465,18 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, /* protect possible bss_conf changes and avoid concurrency in * ieee80211_bss_info_change_notify() */ - sdata_lock(sdata); tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer); switch (oper) { case NL80211_TDLS_ENABLE_LINK: if (sdata->vif.bss_conf.csa_active) { tdls_dbg(sdata, "TDLS: disallow link during CSA\n"); - ret = -EBUSY; - break; + return -EBUSY; } sta = sta_info_get(sdata, peer); - if (!sta) { - ret = -ENOLINK; - break; - } + if (!sta) + return -ENOLINK; iee80211_tdls_recalc_chanctx(sdata, sta); iee80211_tdls_recalc_ht_protection(sdata, sta); @@ -1489,7 +1485,6 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, WARN_ON_ONCE(is_zero_ether_addr(sdata->u.mgd.tdls_peer) || !ether_addr_equal(sdata->u.mgd.tdls_peer, peer)); - ret = 0; break; case NL80211_TDLS_DISABLE_LINK: /* @@ -1511,24 +1506,23 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, iee80211_tdls_recalc_ht_protection(sdata, NULL); iee80211_tdls_recalc_chanctx(sdata, NULL); + if (ret) + return ret; break; default: - ret = -ENOTSUPP; - break; + return -ENOTSUPP; } - if (ret == 0 && ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) { + if (ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) { wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.tdls_peer_del_work); eth_zero_addr(sdata->u.mgd.tdls_peer); } - if (ret == 0) - wiphy_work_queue(sdata->local->hw.wiphy, - &sdata->deflink.u.mgd.request_smps_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &sdata->deflink.u.mgd.request_smps_work); - sdata_unlock(sdata); - return ret; + return 0; } void ieee80211_tdls_oper_request(struct ieee80211_vif *vif, const u8 *peer, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 7833043b0a4e..4aefb9483aa9 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2664,7 +2664,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (!ieee80211_sdata_running(sdata)) continue; - sdata_lock(sdata); if (ieee80211_vif_is_mld(&sdata->vif)) { struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS] = { [0] = &sdata->vif.bss_conf, @@ -2796,7 +2795,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_NAN: res = ieee80211_reconfig_nan(sdata); if (res < 0) { - sdata_unlock(sdata); ieee80211_handle_reconfig_failure(local); return res; } @@ -2814,7 +2812,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) WARN_ON(1); break; } - sdata_unlock(sdata); if (active_links) ieee80211_set_active_links(&sdata->vif, active_links); @@ -2844,7 +2841,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (!ieee80211_sdata_running(sdata)) continue; - sdata_lock(sdata); switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_AP: @@ -2853,7 +2849,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) default: break; } - sdata_unlock(sdata); } /* add back keys */ diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 0962770303b2..9a9a870806f5 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Parts of this file are - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation */ #include #include @@ -18,7 +18,7 @@ static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!rdev->ops->stop_ap) return -EOPNOTSUPP; @@ -52,9 +52,9 @@ static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev, return err; } -int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, int link_id, - bool notify) +int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, int link_id, + bool notify) { unsigned int link; int ret = 0; @@ -72,17 +72,3 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, return ret; } - -int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, int link_id, - bool notify) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_stop_ap(rdev, dev, link_id, notify); - wdev_unlock(wdev); - - return err; -} diff --git a/net/wireless/chan.c b/net/wireless/chan.c index b2469e2c1e70..2af3aaee7493 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -713,7 +713,7 @@ bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) { unsigned int link; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_AP: @@ -782,18 +782,14 @@ static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy, { struct wireless_dev *wdev; + lockdep_assert_wiphy(wiphy); + list_for_each_entry(wdev, &wiphy->wdev_list, list) { - wdev_lock(wdev); - if (!cfg80211_beaconing_iface_active(wdev)) { - wdev_unlock(wdev); + if (!cfg80211_beaconing_iface_active(wdev)) continue; - } - if (cfg80211_wdev_on_sub_chan(wdev, chan, false)) { - wdev_unlock(wdev); + if (cfg80211_wdev_on_sub_chan(wdev, chan, false)) return true; - } - wdev_unlock(wdev); } return false; @@ -1325,10 +1321,7 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { bool ret; - wdev_lock(wdev); ret = cfg80211_ir_permissive_check_wdev(iftype, wdev, chan); - wdev_unlock(wdev); - if (ret) return ret; } @@ -1437,17 +1430,10 @@ EXPORT_SYMBOL(cfg80211_any_usable_channels); struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev, unsigned int link_id) { - /* - * We need to sort out the locking here - in some cases - * where we get here we really just don't care (yet) - * about the valid links, but in others we do. But we - * get here with various driver cases, so we cannot - * easily require the wdev mutex. - */ - if (link_id || wdev->valid_links & BIT(0)) { - ASSERT_WDEV_LOCK(wdev); - WARN_ON(!(wdev->valid_links & BIT(link_id))); - } + lockdep_assert_wiphy(wdev->wiphy); + + WARN_ON(wdev->valid_links && !(wdev->valid_links & BIT(link_id))); + WARN_ON(!wdev->valid_links && link_id > 0); switch (wdev->iftype) { case NL80211_IFTYPE_MESH_POINT: diff --git a/net/wireless/core.c b/net/wireless/core.c index 88042a647aaa..c419177278da 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1278,14 +1278,13 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, rdev->num_running_monitor_ifaces += num; } -void __cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) +void cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; struct cfg80211_sched_scan_request *pos, *tmp; lockdep_assert_held(&rdev->wiphy.mtx); - ASSERT_WDEV_LOCK(wdev); cfg80211_pmsr_wdev_down(wdev); @@ -1293,7 +1292,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - __cfg80211_leave_ibss(rdev, dev, true); + cfg80211_leave_ibss(rdev, dev, true); break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: @@ -1313,14 +1312,14 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, WLAN_REASON_DEAUTH_LEAVING, true); break; case NL80211_IFTYPE_MESH_POINT: - __cfg80211_leave_mesh(rdev, dev); + cfg80211_leave_mesh(rdev, dev); break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - __cfg80211_stop_ap(rdev, dev, -1, true); + cfg80211_stop_ap(rdev, dev, -1, true); break; case NL80211_IFTYPE_OCB: - __cfg80211_leave_ocb(rdev, dev); + cfg80211_leave_ocb(rdev, dev); break; case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: @@ -1338,14 +1337,6 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, } } -void cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) -{ - wdev_lock(wdev); - __cfg80211_leave(rdev, wdev); - wdev_unlock(wdev); -} - void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, gfp_t gfp) { @@ -1370,7 +1361,6 @@ EXPORT_SYMBOL(cfg80211_stop_iface); void cfg80211_init_wdev(struct wireless_dev *wdev) { - mutex_init(&wdev->mtx); INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); @@ -1533,7 +1523,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, case NETDEV_UP: wiphy_lock(&rdev->wiphy); cfg80211_update_iface_num(rdev, wdev->iftype, 1); - wdev_lock(wdev); switch (wdev->iftype) { #ifdef CONFIG_CFG80211_WEXT case NL80211_IFTYPE_ADHOC: @@ -1563,7 +1552,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, default: break; } - wdev_unlock(wdev); rdev->opencount++; /* diff --git a/net/wireless/core.h b/net/wireless/core.h index 5dc76ea3b84e..98f41d9d2ba7 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -235,28 +235,6 @@ void cfg80211_init_wdev(struct wireless_dev *wdev); void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); -static inline void wdev_lock(struct wireless_dev *wdev) - __acquires(wdev) -{ - lockdep_assert_held(&wdev->wiphy->mtx); - mutex_lock(&wdev->mtx); - __acquire(wdev->mtx); -} - -static inline void wdev_unlock(struct wireless_dev *wdev) - __releases(wdev) -{ - lockdep_assert_held(&wdev->wiphy->mtx); - __release(wdev->mtx); - mutex_unlock(&wdev->mtx); -} - -static inline void ASSERT_WDEV_LOCK(struct wireless_dev *wdev) -{ - lockdep_assert_held(&wdev->wiphy->mtx); - lockdep_assert_held(&wdev->mtx); -} - static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) { lockdep_assert_held(&rdev->wiphy.mtx); @@ -340,8 +318,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct cfg80211_ibss_params *params, struct cfg80211_cached_keys *connkeys); void cfg80211_clear_ibss(struct net_device *dev, bool nowext); -int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool nowext); int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, bool nowext); void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, @@ -356,8 +332,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, struct mesh_setup *setup, const struct mesh_config *conf); -int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, - struct net_device *dev); int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, @@ -365,21 +339,13 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef); /* OCB */ -int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ocb_setup *setup); int cfg80211_join_ocb(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ocb_setup *setup); -int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, - struct net_device *dev); int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, struct net_device *dev); /* AP */ -int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, int link, - bool notify); int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, int link, bool notify); @@ -557,8 +523,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); -void __cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev); void cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index e6fdb0b8187d..9f02ee5f08be 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -3,7 +3,7 @@ * Some IBSS support code for cfg80211. * * Copyright 2009 Johannes Berg - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation */ #include @@ -93,7 +93,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, int err; lockdep_assert_held(&rdev->wiphy.mtx); - ASSERT_WDEV_LOCK(wdev); if (wdev->u.ibss.ssid_len) return -EALREADY; @@ -151,13 +150,13 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, return 0; } -static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) +void cfg80211_clear_ibss(struct net_device *dev, bool nowext) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int i; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); kfree_sensitive(wdev->connect_keys); wdev->connect_keys = NULL; @@ -187,22 +186,13 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) cfg80211_sched_dfs_chan_update(rdev); } -void cfg80211_clear_ibss(struct net_device *dev, bool nowext) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - wdev_lock(wdev); - __cfg80211_clear_ibss(dev, nowext); - wdev_unlock(wdev); -} - -int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool nowext) +int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool nowext) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!wdev->u.ibss.ssid_len) return -ENOLINK; @@ -213,24 +203,11 @@ int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, return err; wdev->conn_owner_nlportid = 0; - __cfg80211_clear_ibss(dev, nowext); + cfg80211_clear_ibss(dev, nowext); return 0; } -int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool nowext) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_leave_ibss(rdev, dev, nowext); - wdev_unlock(wdev); - - return err; -} - #ifdef CONFIG_CFG80211_WEXT int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) @@ -239,7 +216,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, enum nl80211_band band; int i, err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!wdev->wext.ibss.beacon_interval) wdev->wext.ibss.beacon_interval = 100; @@ -336,11 +313,9 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, if (wdev->wext.ibss.chandef.chan == chan) return 0; - wdev_lock(wdev); err = 0; if (wdev->u.ibss.ssid_len) - err = __cfg80211_leave_ibss(rdev, dev, true); - wdev_unlock(wdev); + err = cfg80211_leave_ibss(rdev, dev, true); if (err) return err; @@ -354,11 +329,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, wdev->wext.ibss.channel_fixed = false; } - wdev_lock(wdev); - err = cfg80211_ibss_wext_join(rdev, wdev); - wdev_unlock(wdev); - - return err; + return cfg80211_ibss_wext_join(rdev, wdev); } int cfg80211_ibss_wext_giwfreq(struct net_device *dev, @@ -372,12 +343,10 @@ int cfg80211_ibss_wext_giwfreq(struct net_device *dev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) return -EINVAL; - wdev_lock(wdev); if (wdev->u.ibss.current_bss) chan = wdev->u.ibss.current_bss->pub.channel; else if (wdev->wext.ibss.chandef.chan) chan = wdev->wext.ibss.chandef.chan; - wdev_unlock(wdev); if (chan) { freq->m = chan->center_freq; @@ -405,11 +374,9 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, if (!rdev->ops->join_ibss) return -EOPNOTSUPP; - wdev_lock(wdev); err = 0; if (wdev->u.ibss.ssid_len) - err = __cfg80211_leave_ibss(rdev, dev, true); - wdev_unlock(wdev); + err = cfg80211_leave_ibss(rdev, dev, true); if (err) return err; @@ -422,11 +389,7 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, wdev->wext.ibss.ssid = wdev->u.ibss.ssid; wdev->wext.ibss.ssid_len = len; - wdev_lock(wdev); - err = cfg80211_ibss_wext_join(rdev, wdev); - wdev_unlock(wdev); - - return err; + return cfg80211_ibss_wext_join(rdev, wdev); } int cfg80211_ibss_wext_giwessid(struct net_device *dev, @@ -441,7 +404,6 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev, data->flags = 0; - wdev_lock(wdev); if (wdev->u.ibss.ssid_len) { data->flags = 1; data->length = wdev->u.ibss.ssid_len; @@ -451,7 +413,6 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev, data->length = wdev->wext.ibss.ssid_len; memcpy(ssid, wdev->wext.ibss.ssid, data->length); } - wdev_unlock(wdev); return 0; } @@ -491,11 +452,9 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, ether_addr_equal(bssid, wdev->wext.ibss.bssid)) return 0; - wdev_lock(wdev); err = 0; if (wdev->u.ibss.ssid_len) - err = __cfg80211_leave_ibss(rdev, dev, true); - wdev_unlock(wdev); + err = cfg80211_leave_ibss(rdev, dev, true); if (err) return err; @@ -506,11 +465,7 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, } else wdev->wext.ibss.bssid = NULL; - wdev_lock(wdev); - err = cfg80211_ibss_wext_join(rdev, wdev); - wdev_unlock(wdev); - - return err; + return cfg80211_ibss_wext_join(rdev, wdev); } int cfg80211_ibss_wext_giwap(struct net_device *dev, @@ -525,7 +480,6 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev, ap_addr->sa_family = ARPHRD_ETHER; - wdev_lock(wdev); if (wdev->u.ibss.current_bss) memcpy(ap_addr->sa_data, wdev->u.ibss.current_bss->pub.bssid, ETH_ALEN); @@ -534,8 +488,6 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev, else eth_zero_addr(ap_addr->sa_data); - wdev_unlock(wdev); - return 0; } #endif diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 59a3c5c092b1..dc75abdb8f2e 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Portions - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation */ #include #include @@ -109,7 +109,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; @@ -257,13 +257,13 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, return 0; } -int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, - struct net_device *dev) +int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; @@ -287,16 +287,3 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, return err; } - -int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, - struct net_device *dev) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_leave_mesh(rdev, dev); - wdev_unlock(wdev); - - return err; -} diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 775cac4d6100..cc7ae9ea84ea 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -4,7 +4,7 @@ * * Copyright (c) 2009, Jouni Malinen * Copyright (c) 2015 Intel Deutschland GmbH - * Copyright (C) 2019-2020, 2022 Intel Corporation + * Copyright (C) 2019-2020, 2022-2023 Intel Corporation */ #include @@ -149,7 +149,7 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) struct wireless_dev *wdev = dev->ieee80211_ptr; struct ieee80211_mgmt *mgmt = (void *)buf; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); trace_cfg80211_rx_mlme_mgmt(dev, buf, len); @@ -214,7 +214,7 @@ void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len, struct wireless_dev *wdev = dev->ieee80211_ptr; struct ieee80211_mgmt *mgmt = (void *)buf; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); trace_cfg80211_tx_mlme_mgmt(dev, buf, len, reconnect); @@ -262,7 +262,7 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, { struct wireless_dev *wdev = dev->ieee80211_ptr; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!req->bss) return -ENOENT; @@ -331,7 +331,7 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err, i, j; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); for (i = 1; i < ARRAY_SIZE(req->links); i++) { if (!req->links[i].bss) @@ -393,7 +393,7 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, .local_state_change = local_state_change, }; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (local_state_change && (!wdev->connected || @@ -423,7 +423,7 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, }; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!wdev->connected) return -ENOTCONN; @@ -446,7 +446,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; u8 bssid[ETH_ALEN]; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!rdev->ops->deauth) return; @@ -726,6 +726,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, const struct ieee80211_mgmt *mgmt; u16 stype; + lockdep_assert_wiphy(&rdev->wiphy); + if (!wdev->wiphy->mgmt_stypes) return -EOPNOTSUPP; @@ -748,8 +750,6 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { int err = 0; - wdev_lock(wdev); - switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: /* @@ -814,7 +814,6 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, err = -EOPNOTSUPP; break; } - wdev_unlock(wdev); if (err) return err; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f4298104a2f4..71a0a6e34bdb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1544,7 +1544,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, static int nl80211_key_allowed(struct wireless_dev *wdev) { - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_AP: @@ -3423,13 +3423,8 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; int link_id = nl80211_link_id_or_invalid(info->attrs); struct net_device *netdev = info->user_ptr[1]; - int ret; - - wdev_lock(netdev->ieee80211_ptr); - ret = __nl80211_set_channel(rdev, netdev, info, link_id); - wdev_unlock(netdev->ieee80211_ptr); - return ret; + return __nl80211_set_channel(rdev, netdev, info, link_id); } static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) @@ -3536,7 +3531,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) txq_params.link_id = nl80211_link_id_or_invalid(info->attrs); - wdev_lock(netdev->ieee80211_ptr); if (txq_params.link_id >= 0 && !(netdev->ieee80211_ptr->valid_links & BIT(txq_params.link_id))) @@ -3547,7 +3541,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) else result = rdev_set_txq_params(rdev, netdev, &txq_params); - wdev_unlock(netdev->ieee80211_ptr); if (result) goto out; } @@ -3557,12 +3550,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) int link_id = nl80211_link_id_or_invalid(info->attrs); if (wdev) { - wdev_lock(wdev); result = __nl80211_set_channel( rdev, nl80211_can_set_dev_channel(wdev) ? netdev : NULL, info, link_id); - wdev_unlock(wdev); } else { result = __nl80211_set_channel(rdev, netdev, info, link_id); } @@ -3870,33 +3861,31 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } - wdev_lock(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: if (wdev->u.ap.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.ap.ssid_len, wdev->u.ap.ssid)) - goto nla_put_failure_locked; + goto nla_put_failure; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->u.client.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.client.ssid_len, wdev->u.client.ssid)) - goto nla_put_failure_locked; + goto nla_put_failure; break; case NL80211_IFTYPE_ADHOC: if (wdev->u.ibss.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.ibss.ssid_len, wdev->u.ibss.ssid)) - goto nla_put_failure_locked; + goto nla_put_failure; break; default: /* nothing */ break; } - wdev_unlock(wdev); if (rdev->ops->get_txq_stats) { struct cfg80211_txq_stats txqstats = {}; @@ -3943,8 +3932,6 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag genlmsg_end(msg, hdr); return 0; - nla_put_failure_locked: - wdev_unlock(wdev); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; @@ -4191,7 +4178,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (netif_running(dev)) return -EBUSY; - wdev_lock(wdev); BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); wdev->u.mesh.id_up_len = @@ -4199,7 +4185,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) memcpy(wdev->u.mesh.id, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), wdev->u.mesh.id_up_len); - wdev_unlock(wdev); } if (info->attrs[NL80211_ATTR_4ADDR]) { @@ -4300,7 +4285,6 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_MESH_POINT: if (!info->attrs[NL80211_ATTR_MESH_ID]) break; - wdev_lock(wdev); BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); wdev->u.mesh.id_up_len = @@ -4308,7 +4292,6 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) memcpy(wdev->u.mesh.id, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), wdev->u.mesh.id_up_len); - wdev_unlock(wdev); break; case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_P2P_DEVICE: @@ -4599,79 +4582,67 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) !(key.p.mode == NL80211_KEY_SET_TX)) return -EINVAL; - wdev_lock(wdev); - if (key.def) { - if (!rdev->ops->set_default_key) { - err = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_default_key) + return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (err) - goto out; + return err; err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) - goto out; + return err; err = rdev_set_default_key(rdev, dev, link_id, key.idx, key.def_uni, key.def_multi); if (err) - goto out; + return err; #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_key = key.idx; #endif + return 0; } else if (key.defmgmt) { - if (key.def_uni || !key.def_multi) { - err = -EINVAL; - goto out; - } + if (key.def_uni || !key.def_multi) + return -EINVAL; - if (!rdev->ops->set_default_mgmt_key) { - err = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_default_mgmt_key) + return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (err) - goto out; + return err; err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) - goto out; + return err; err = rdev_set_default_mgmt_key(rdev, dev, link_id, key.idx); if (err) - goto out; + return err; #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_mgmt_key = key.idx; #endif + return 0; } else if (key.defbeacon) { - if (key.def_uni || !key.def_multi) { - err = -EINVAL; - goto out; - } + if (key.def_uni || !key.def_multi) + return -EINVAL; - if (!rdev->ops->set_default_beacon_key) { - err = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_default_beacon_key) + return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (err) - goto out; + return err; err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) - goto out; + return err; - err = rdev_set_default_beacon_key(rdev, dev, link_id, key.idx); - if (err) - goto out; + return rdev_set_default_beacon_key(rdev, dev, link_id, key.idx); } else if (key.p.mode == NL80211_KEY_SET_TX && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID)) { @@ -4680,25 +4651,19 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (!mac_addr || key.idx < 0 || key.idx > 1) { - err = -EINVAL; - goto out; - } + if (!mac_addr || key.idx < 0 || key.idx > 1) + return -EINVAL; err = nl80211_validate_key_link_id(info, wdev, link_id, true); if (err) - goto out; + return err; - err = rdev_add_key(rdev, dev, link_id, key.idx, - NL80211_KEYTYPE_PAIRWISE, - mac_addr, &key.p); - } else { - err = -EINVAL; + return rdev_add_key(rdev, dev, link_id, key.idx, + NL80211_KEYTYPE_PAIRWISE, + mac_addr, &key.p); } - out: - wdev_unlock(wdev); - return err; + return -EINVAL; } static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) @@ -4751,7 +4716,6 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - wdev_lock(wdev); err = nl80211_key_allowed(wdev); if (err) GENL_SET_ERR_MSG(info, "key not allowed"); @@ -4767,7 +4731,6 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) if (err) GENL_SET_ERR_MSG(info, "key addition failed"); } - wdev_unlock(wdev); return err; } @@ -4808,7 +4771,6 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->del_key) return -EOPNOTSUPP; - wdev_lock(wdev); err = nl80211_key_allowed(wdev); if (key.type == NL80211_KEYTYPE_GROUP && mac_addr && @@ -4832,7 +4794,6 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) wdev->wext.default_mgmt_key = -1; } #endif - wdev_unlock(wdev); return err; } @@ -6072,20 +6033,18 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) goto out; } - wdev_lock(wdev); - if (info->attrs[NL80211_ATTR_TX_RATES]) { err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, ¶ms->beacon_rate, dev, false, link_id); if (err) - goto out_unlock; + goto out; err = validate_beacon_tx_rate(rdev, params->chandef.chan->band, ¶ms->beacon_rate); if (err) - goto out_unlock; + goto out; } if (info->attrs[NL80211_ATTR_SMPS_MODE]) { @@ -6098,19 +6057,19 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (!(rdev->wiphy.features & NL80211_FEATURE_STATIC_SMPS)) { err = -EINVAL; - goto out_unlock; + goto out; } break; case NL80211_SMPS_DYNAMIC: if (!(rdev->wiphy.features & NL80211_FEATURE_DYNAMIC_SMPS)) { err = -EINVAL; - goto out_unlock; + goto out; } break; default: err = -EINVAL; - goto out_unlock; + goto out; } } else { params->smps_mode = NL80211_SMPS_OFF; @@ -6119,7 +6078,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { err = -EOPNOTSUPP; - goto out_unlock; + goto out; } if (info->attrs[NL80211_ATTR_ACL_POLICY]) { @@ -6127,7 +6086,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(params->acl)) { err = PTR_ERR(params->acl); params->acl = NULL; - goto out_unlock; + goto out; } } @@ -6139,7 +6098,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_HE_OBSS_PD], ¶ms->he_obss_pd); if (err) - goto out_unlock; + goto out; } if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) { @@ -6147,7 +6106,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_FILS_DISCOVERY], params); if (err) - goto out_unlock; + goto out; } if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) { @@ -6155,7 +6114,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP], params); if (err) - goto out_unlock; + goto out; } if (info->attrs[NL80211_ATTR_MBSSID_CONFIG]) { @@ -6166,17 +6125,17 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params->beacon.mbssid_ies->cnt : 0); if (err) - goto out_unlock; + goto out; } if (!params->mbssid_config.ema && params->beacon.rnr_ies) { err = -EINVAL; - goto out_unlock; + goto out; } err = nl80211_calculate_ap_params(params); if (err) - goto out_unlock; + goto out; if (info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]) params->flags = nla_get_u32( @@ -6188,7 +6147,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_SOCKET_OWNER] && wdev->conn_owner_nlportid != info->snd_portid) { err = -EINVAL; - goto out_unlock; + goto out; } /* FIXME: validate MLO/link-id against driver capabilities */ @@ -6206,8 +6165,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) nl80211_send_ap_started(wdev, link_id); } -out_unlock: - wdev_unlock(wdev); out: kfree(params->acl); kfree(params->beacon.mbssid_ies); @@ -6244,9 +6201,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (err) goto out; - wdev_lock(wdev); err = rdev_change_beacon(rdev, dev, ¶ms); - wdev_unlock(wdev); out: kfree(params.mbssid_ies); @@ -7305,9 +7260,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) } /* driver will call cfg80211_check_station_change() */ - wdev_lock(dev->ieee80211_ptr); err = rdev_change_station(rdev, dev, mac_addr, ¶ms); - wdev_unlock(dev->ieee80211_ptr); out_put_vlan: dev_put(params.vlan); @@ -7575,7 +7528,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* be aware of params.vlan when changing code here */ - wdev_lock(dev->ieee80211_ptr); if (wdev->valid_links) { if (params.link_sta_params.link_id < 0) { err = -EINVAL; @@ -7593,7 +7545,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) } err = rdev_add_station(rdev, dev, mac_addr, ¶ms); out: - wdev_unlock(dev->ieee80211_ptr); dev_put(params.vlan); return err; } @@ -7603,7 +7554,6 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct station_del_parameters params; - int ret; memset(¶ms, 0, sizeof(params)); @@ -7651,11 +7601,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) params.reason_code = WLAN_REASON_PREV_AUTH_NOT_VALID; } - wdev_lock(dev->ieee80211_ptr); - ret = rdev_del_station(rdev, dev, ¶ms); - wdev_unlock(dev->ieee80211_ptr); - - return ret; + return rdev_del_station(rdev, dev, ¶ms); } static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, @@ -7974,9 +7920,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; struct bss_parameters params; - int err; memset(¶ms, 0, sizeof(params)); params.link_id = nl80211_link_id_or_invalid(info->attrs); @@ -8039,11 +7983,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; - wdev_lock(wdev); - err = rdev_change_bss(rdev, dev, ¶ms); - wdev_unlock(wdev); - - return err; + return rdev_change_bss(rdev, dev, ¶ms); } static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) @@ -8114,13 +8054,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, if (!rdev->ops->get_mesh_config) return -EOPNOTSUPP; - wdev_lock(wdev); /* If not connected, get default parameters */ if (!wdev->u.mesh.id_len) memcpy(&cur_params, &default_mesh_config, sizeof(cur_params)); else err = rdev_get_mesh_config(rdev, dev, &cur_params); - wdev_unlock(wdev); if (err) return err; @@ -8496,15 +8434,12 @@ static int nl80211_update_mesh_config(struct sk_buff *skb, if (err) return err; - wdev_lock(wdev); if (!wdev->u.mesh.id_len) err = -ENOLINK; if (!err) err = rdev_update_mesh_config(rdev, dev, mask, &cfg); - wdev_unlock(wdev); - return err; } @@ -8995,7 +8930,7 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev, unsigned int link_id; bool all_ok = true; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!cfg80211_beaconing_iface_active(wdev)) return true; @@ -9245,7 +9180,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->n_channels = i; - wdev_lock(wdev); for (i = 0; i < request->n_channels; i++) { struct ieee80211_channel *chan = request->channels[i]; @@ -9254,12 +9188,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) continue; if (!cfg80211_wdev_on_sub_chan(wdev, chan, true)) { - wdev_unlock(wdev); err = -EBUSY; goto out_free; } } - wdev_unlock(wdev); i = 0; if (n_ssids) { @@ -10265,9 +10197,7 @@ skip_beacons: goto free; } - wdev_lock(wdev); err = rdev_channel_switch(rdev, dev, ¶ms); - wdev_unlock(wdev); free: kfree(params.beacon_after.mbssid_ies); @@ -10290,7 +10220,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, void *hdr; struct nlattr *bss; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, NL80211_CMD_NEW_SCAN_RESULTS); @@ -10439,7 +10369,6 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); - wdev_lock(wdev); spin_lock_bh(&rdev->bss_lock); /* @@ -10465,7 +10394,6 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) } spin_unlock_bh(&rdev->bss_lock); - wdev_unlock(wdev); cb->args[2] = idx; wiphy_unlock(&rdev->wiphy); @@ -10588,9 +10516,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) } while (1) { - wdev_lock(wdev); res = rdev_dump_survey(rdev, wdev->netdev, survey_idx, &survey); - wdev_unlock(wdev); if (res == -ENOENT) break; if (res) @@ -10763,9 +10689,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (!req.bss) return -ENOENT; - wdev_lock(dev->ieee80211_ptr); err = cfg80211_mlme_auth(rdev, dev, &req); - wdev_unlock(dev->ieee80211_ptr); cfg80211_put_bss(&rdev->wiphy, req.bss); @@ -11180,8 +11104,6 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); if (!err) { - wdev_lock(dev->ieee80211_ptr); - err = cfg80211_mlme_assoc(rdev, dev, &req); if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) { @@ -11190,8 +11112,6 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) memcpy(dev->ieee80211_ptr->disconnect_bssid, ap_addr, ETH_ALEN); } - - wdev_unlock(dev->ieee80211_ptr); } free: @@ -11208,7 +11128,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; - int ie_len = 0, err; + int ie_len = 0; u16 reason_code; bool local_state_change; @@ -11244,11 +11164,8 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; - wdev_lock(dev->ieee80211_ptr); - err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, - local_state_change); - wdev_unlock(dev->ieee80211_ptr); - return err; + return cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, + local_state_change); } static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) @@ -11256,7 +11173,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; - int ie_len = 0, err; + int ie_len = 0; u16 reason_code; bool local_state_change; @@ -11292,11 +11209,8 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; - wdev_lock(dev->ieee80211_ptr); - err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, - local_state_change); - wdev_unlock(dev->ieee80211_ptr); - return err; + return cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, + local_state_change); } static bool @@ -11474,13 +11388,11 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) ibss.userspace_handles_dfs = nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]); - wdev_lock(dev->ieee80211_ptr); err = __cfg80211_join_ibss(rdev, dev, &ibss, connkeys); if (err) kfree_sensitive(connkeys); else if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; - wdev_unlock(dev->ieee80211_ptr); return err; } @@ -12013,8 +11925,6 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) if (nla_get_flag(info->attrs[NL80211_ATTR_MLO_SUPPORT])) connect.flags |= CONNECT_REQ_MLO_SUPPORT; - wdev_lock(dev->ieee80211_ptr); - err = cfg80211_connect(rdev, dev, &connect, connkeys, connect.prev_bssid); if (err) @@ -12029,8 +11939,6 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) eth_zero_addr(dev->ieee80211_ptr->disconnect_bssid); } - wdev_unlock(dev->ieee80211_ptr); - return err; } @@ -12044,7 +11952,6 @@ static int nl80211_update_connect_params(struct sk_buff *skb, bool fils_sk_offload; u32 auth_type; u32 changed = 0; - int ret; if (!rdev->ops->update_connect_params) return -EOPNOTSUPP; @@ -12105,14 +12012,10 @@ static int nl80211_update_connect_params(struct sk_buff *skb, changed |= UPDATE_AUTH_TYPE; } - wdev_lock(dev->ieee80211_ptr); if (!wdev->connected) - ret = -ENOLINK; - else - ret = rdev_update_connect_params(rdev, dev, &connect, changed); - wdev_unlock(dev->ieee80211_ptr); + return -ENOLINK; - return ret; + return rdev_update_connect_params(rdev, dev, &connect, changed); } static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) @@ -12120,7 +12023,6 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u16 reason; - int ret; if (dev->ieee80211_ptr->conn_owner_nlportid && dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) @@ -12138,10 +12040,7 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - wdev_lock(dev->ieee80211_ptr); - ret = cfg80211_disconnect(rdev, dev, reason, true); - wdev_unlock(dev->ieee80211_ptr); - return ret; + return cfg80211_disconnect(rdev, dev, reason, true); } static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) @@ -12352,7 +12251,6 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (err) return err; - wdev_lock(wdev); if (!cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) { const struct cfg80211_chan_def *oper_chandef, *compat_chandef; @@ -12361,7 +12259,6 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (WARN_ON(!oper_chandef)) { /* cannot happen since we must beacon to get here */ WARN_ON(1); - wdev_unlock(wdev); return -EBUSY; } @@ -12369,12 +12266,9 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, compat_chandef = cfg80211_chandef_compatible(&chandef, oper_chandef); - if (compat_chandef != &chandef) { - wdev_unlock(wdev); + if (compat_chandef != &chandef) return -EBUSY; - } } - wdev_unlock(wdev); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -12433,23 +12327,18 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, unsigned int link_id = nl80211_link_id(info->attrs); struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; - wdev_lock(wdev); err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &mask, dev, true, link_id); if (err) - goto out; + return err; - err = rdev_set_bitrate_mask(rdev, dev, link_id, NULL, &mask); -out: - wdev_unlock(wdev); - return err; + return rdev_set_bitrate_mask(rdev, dev, link_id, NULL, &mask); } static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) @@ -12578,12 +12467,9 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!chandef.chan && params.offchan) return -EINVAL; - wdev_lock(wdev); if (params.offchan && - !cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) { - wdev_unlock(wdev); + !cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) return -EBUSY; - } params.link_id = nl80211_link_id_or_invalid(info->attrs); /* @@ -12592,11 +12478,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) * to the driver. */ if (params.link_id >= 0 && - !(wdev->valid_links & BIT(params.link_id))) { - wdev_unlock(wdev); + !(wdev->valid_links & BIT(params.link_id))) return -EINVAL; - } - wdev_unlock(wdev); params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); @@ -12866,8 +12749,8 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - int i, err; s32 prev = S32_MIN; + int i; /* Check all values negative and sorted */ for (i = 0; i < n_thresholds; i++) { @@ -12881,9 +12764,7 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - wdev_lock(wdev); cfg80211_cqm_config_free(wdev); - wdev_unlock(wdev); if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) { if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */ @@ -12900,17 +12781,14 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */ n_thresholds = 0; - wdev_lock(wdev); if (n_thresholds) { struct cfg80211_cqm_config *cqm_config; cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, n_thresholds), GFP_KERNEL); - if (!cqm_config) { - err = -ENOMEM; - goto unlock; - } + if (!cqm_config) + return -ENOMEM; cqm_config->rssi_hyst = hysteresis; cqm_config->n_rssi_thresholds = n_thresholds; @@ -12921,12 +12799,7 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, wdev->cqm_config = cqm_config; } - err = cfg80211_cqm_rssi_update(rdev, dev); - -unlock: - wdev_unlock(wdev); - - return err; + return cfg80211_cqm_rssi_update(rdev, dev); } static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) @@ -13108,11 +12981,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) setup.control_port_over_nl80211 = true; } - wdev_lock(dev->ieee80211_ptr); err = __cfg80211_join_mesh(rdev, dev, &setup, &cfg); if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; - wdev_unlock(dev->ieee80211_ptr); return err; } @@ -14056,21 +13927,13 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) if (tb[NL80211_REKEY_DATA_AKM]) rekey_data.akm = nla_get_u32(tb[NL80211_REKEY_DATA_AKM]); - wdev_lock(wdev); - if (!wdev->connected) { - err = -ENOTCONN; - goto out; - } + if (!wdev->connected) + return -ENOTCONN; - if (!rdev->ops->set_rekey_data) { - err = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_rekey_data) + return -EOPNOTSUPP; - err = rdev_set_rekey_data(rdev, dev, &rekey_data); - out: - wdev_unlock(wdev); - return err; + return rdev_set_rekey_data(rdev, dev, &rekey_data); } static int nl80211_register_unexpected_frame(struct sk_buff *skb, @@ -15274,11 +15137,9 @@ static int nl80211_set_qos_map(struct sk_buff *skb, memcpy(qos_map->up, pos, IEEE80211_QOS_MAP_LEN_MIN); } - wdev_lock(dev->ieee80211_ptr); ret = nl80211_key_allowed(dev->ieee80211_ptr); if (!ret) ret = rdev_set_qos_map(rdev, dev, qos_map); - wdev_unlock(dev->ieee80211_ptr); kfree(qos_map); return ret; @@ -15292,7 +15153,6 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) const u8 *peer; u8 tsid, up; u16 admitted_time = 0; - int err; if (!(rdev->wiphy.features & NL80211_FEATURE_SUPPORTS_WMM_ADMISSION)) return -EOPNOTSUPP; @@ -15322,34 +15182,25 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - wdev_lock(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->connected) break; - err = -ENOTCONN; - goto out; + return -ENOTCONN; default: - err = -EOPNOTSUPP; - goto out; + return -EOPNOTSUPP; } - err = rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time); - - out: - wdev_unlock(wdev); - return err; + return rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time); } static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *peer; u8 tsid; - int err; if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC]) return -EINVAL; @@ -15357,11 +15208,7 @@ static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info) tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]); peer = nla_data(info->attrs[NL80211_ATTR_MAC]); - wdev_lock(wdev); - err = rdev_del_tx_ts(rdev, dev, tsid, peer); - wdev_unlock(wdev); - - return err; + return rdev_del_tx_ts(rdev, dev, tsid, peer); } static int nl80211_tdls_channel_switch(struct sk_buff *skb, @@ -15417,11 +15264,7 @@ static int nl80211_tdls_channel_switch(struct sk_buff *skb, addr = nla_data(info->attrs[NL80211_ATTR_MAC]); oper_class = nla_get_u8(info->attrs[NL80211_ATTR_OPER_CLASS]); - wdev_lock(wdev); - err = rdev_tdls_channel_switch(rdev, dev, addr, oper_class, &chandef); - wdev_unlock(wdev); - - return err; + return rdev_tdls_channel_switch(rdev, dev, addr, oper_class, &chandef); } static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb, @@ -15429,7 +15272,6 @@ static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb, { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *addr; if (!rdev->ops->tdls_channel_switch || @@ -15450,9 +15292,7 @@ static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb, addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - wdev_lock(wdev); rdev_tdls_cancel_channel_switch(rdev, dev, addr); - wdev_unlock(wdev); return 0; } @@ -15485,7 +15325,6 @@ static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_pmk_conf pmk_conf = {}; - int ret; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) @@ -15498,34 +15337,24 @@ static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_PMK]) return -EINVAL; - wdev_lock(wdev); - if (!wdev->connected) { - ret = -ENOTCONN; - goto out; - } + if (!wdev->connected) + return -ENOTCONN; pmk_conf.aa = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (memcmp(pmk_conf.aa, wdev->u.client.connected_addr, ETH_ALEN)) { - ret = -EINVAL; - goto out; - } + if (memcmp(pmk_conf.aa, wdev->u.client.connected_addr, ETH_ALEN)) + return -EINVAL; pmk_conf.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]); pmk_conf.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); if (pmk_conf.pmk_len != WLAN_PMK_LEN && - pmk_conf.pmk_len != WLAN_PMK_LEN_SUITE_B_192) { - ret = -EINVAL; - goto out; - } + pmk_conf.pmk_len != WLAN_PMK_LEN_SUITE_B_192) + return -EINVAL; if (info->attrs[NL80211_ATTR_PMKR0_NAME]) pmk_conf.pmk_r0_name = nla_data(info->attrs[NL80211_ATTR_PMKR0_NAME]); - ret = rdev_set_pmk(rdev, dev, &pmk_conf); -out: - wdev_unlock(wdev); - return ret; + return rdev_set_pmk(rdev, dev, &pmk_conf); } static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info) @@ -15534,7 +15363,6 @@ static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *aa; - int ret; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) @@ -15547,12 +15375,8 @@ static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; - wdev_lock(wdev); aa = nla_data(info->attrs[NL80211_ATTR_MAC]); - ret = rdev_del_pmk(rdev, dev, aa); - wdev_unlock(wdev); - - return ret; + return rdev_del_pmk(rdev, dev, aa); } static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info) @@ -15626,8 +15450,6 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - wdev_lock(wdev); - switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: @@ -15636,21 +15458,16 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_ADHOC: if (wdev->u.ibss.current_bss) break; - err = -ENOTCONN; - goto out; + return -ENOTCONN; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->connected) break; - err = -ENOTCONN; - goto out; + return -ENOTCONN; default: - err = -EOPNOTSUPP; - goto out; + return -EOPNOTSUPP; } - wdev_unlock(wdev); - buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); len = nla_len(info->attrs[NL80211_ATTR_FRAME]); dest = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -15666,9 +15483,6 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) if (!err && !dont_wait_for_ack) nl_set_extack_cookie_u64(info->extack, cookie); return err; - out: - wdev_unlock(wdev); - return err; } static int nl80211_get_ftm_responder_stats(struct sk_buff *skb, @@ -15946,8 +15760,6 @@ static int nl80211_set_tid_config(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_MAC]) tid_config->peer = nla_data(info->attrs[NL80211_ATTR_MAC]); - wdev_lock(dev->ieee80211_ptr); - nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG], rem_conf) { ret = nla_parse_nested(attrs, NL80211_TID_CONFIG_ATTR_MAX, @@ -15969,7 +15781,6 @@ static int nl80211_set_tid_config(struct sk_buff *skb, bad_tid_conf: kfree(tid_config); - wdev_unlock(dev->ieee80211_ptr); return ret; } @@ -16066,9 +15877,7 @@ static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info) params.counter_offset_presp = offset; } - wdev_lock(wdev); err = rdev_color_change(rdev, dev, ¶ms); - wdev_unlock(wdev); out: kfree(params.beacon_next.mbssid_ies); @@ -16124,7 +15933,6 @@ static int nl80211_add_link(struct sk_buff *skb, struct genl_info *info) !is_valid_ether_addr(nla_data(info->attrs[NL80211_ATTR_MAC]))) return -EINVAL; - wdev_lock(wdev); wdev->valid_links |= BIT(link_id); ether_addr_copy(wdev->links[link_id].addr, nla_data(info->attrs[NL80211_ATTR_MAC])); @@ -16134,7 +15942,6 @@ static int nl80211_add_link(struct sk_buff *skb, struct genl_info *info) wdev->valid_links &= ~BIT(link_id); eth_zero_addr(wdev->links[link_id].addr); } - wdev_unlock(wdev); return ret; } @@ -16156,9 +15963,7 @@ static int nl80211_remove_link(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - wdev_lock(wdev); cfg80211_remove_link(wdev, link_id); - wdev_unlock(wdev); return 0; } @@ -16248,14 +16053,10 @@ nl80211_add_mod_link_station(struct sk_buff *skb, struct genl_info *info, if (err) return err; - wdev_lock(dev->ieee80211_ptr); if (add) - err = rdev_add_link_station(rdev, dev, ¶ms); - else - err = rdev_mod_link_station(rdev, dev, ¶ms); - wdev_unlock(dev->ieee80211_ptr); + return rdev_add_link_station(rdev, dev, ¶ms); - return err; + return rdev_mod_link_station(rdev, dev, ¶ms); } static int @@ -16276,7 +16077,6 @@ nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info) struct link_station_del_parameters params = {}; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - int ret; if (!rdev->ops->del_link_station) return -EOPNOTSUPP; @@ -16288,11 +16088,7 @@ nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info) params.mld_mac = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); params.link_id = nla_get_u8(info->attrs[NL80211_ATTR_MLO_LINK_ID]); - wdev_lock(dev->ieee80211_ptr); - ret = rdev_del_link_station(rdev, dev, ¶ms); - wdev_unlock(dev->ieee80211_ptr); - - return ret; + return rdev_del_link_station(rdev, dev, ¶ms); } static int nl80211_set_hw_timestamp(struct sk_buff *skb, @@ -18300,7 +18096,7 @@ void cfg80211_links_removed(struct net_device *dev, u16 link_mask) struct nlattr *links; void *hdr; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); trace_cfg80211_links_removed(dev, link_mask); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && @@ -19354,7 +19150,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); WARN_INVALID_LINK_ID(wdev, link_id); trace_cfg80211_ch_switch_notify(dev, chandef, link_id, punct_bitmap); @@ -19399,7 +19195,7 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev, struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); WARN_INVALID_LINK_ID(wdev, link_id); trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id, @@ -19422,7 +19218,7 @@ int cfg80211_bss_color_notify(struct net_device *dev, struct sk_buff *msg; void *hdr; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); trace_cfg80211_bss_color_notify(dev, cmd, count, color_bitmap); diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c index 29afaf3da54f..7d2d67f13ad9 100644 --- a/net/wireless/ocb.c +++ b/net/wireless/ocb.c @@ -4,7 +4,7 @@ * * Copyright: (c) 2014 Czech Technical University in Prague * (c) 2014 Volkswagen Group Research - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation * Author: Rostislav Lisovy * Funded by: Volkswagen Group Research */ @@ -15,14 +15,14 @@ #include "core.h" #include "rdev-ops.h" -int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ocb_setup *setup) +int cfg80211_join_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ocb_setup *setup) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB) return -EOPNOTSUPP; @@ -40,27 +40,13 @@ int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, return err; } -int cfg80211_join_ocb(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ocb_setup *setup) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_join_ocb(rdev, dev, setup); - wdev_unlock(wdev); - - return err; -} - -int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, - struct net_device *dev) +int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB) return -EOPNOTSUPP; @@ -77,16 +63,3 @@ int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, return err; } - -int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, - struct net_device *dev) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_leave_ocb(rdev, dev); - wdev_unlock(wdev); - - return err; -} diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 9611aa0bd051..e106dcea3977 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -600,7 +600,7 @@ static void cfg80211_pmsr_process_abort(struct wireless_dev *wdev) struct cfg80211_pmsr_request *req, *tmp; LIST_HEAD(free_list); - lockdep_assert_held(&wdev->mtx); + lockdep_assert_wiphy(wdev->wiphy); spin_lock_bh(&wdev->pmsr_lock); list_for_each_entry_safe(req, tmp, &wdev->pmsr_list, list) { @@ -623,9 +623,7 @@ void cfg80211_pmsr_free_wk(struct work_struct *work) pmsr_free_wk); wiphy_lock(wdev->wiphy); - wdev_lock(wdev); cfg80211_pmsr_process_abort(wdev); - wdev_unlock(wdev); wiphy_unlock(wdev->wiphy); } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 1cdaf273d775..f86ee1a6daad 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2342,12 +2342,11 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) bool ret; int link; - wdev_lock(wdev); iftype = wdev->iftype; /* make sure the interface is active */ if (!wdev->netdev || !netif_running(wdev->netdev)) - goto wdev_inactive_unlock; + return true; for (link = 0; link < ARRAY_SIZE(wdev->links); link++) { struct ieee80211_channel *chan; @@ -2407,8 +2406,6 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) break; } - wdev_unlock(wdev); - switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: @@ -2429,16 +2426,8 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) default: break; } - - wdev_lock(wdev); } - wdev_unlock(wdev); - - return true; - -wdev_inactive_unlock: - wdev_unlock(wdev); return true; } @@ -3577,13 +3566,10 @@ static bool is_wiphy_all_set_reg_flag(enum ieee80211_regulatory_flags flag) for_each_rdev(rdev) { wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { - wdev_lock(wdev); if (!(wdev->wiphy->regulatory_flags & flag)) { - wdev_unlock(wdev); wiphy_unlock(&rdev->wiphy); return false; } - wdev_unlock(wdev); } wiphy_unlock(&rdev->wiphy); } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index c271f30b58fa..50fcb27e6dab 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -67,7 +67,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) struct cfg80211_scan_request *request; int n_channels, err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (rdev->scan_req || rdev->scan_msg) return -EBUSY; @@ -151,7 +151,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev, struct cfg80211_assoc_request req = {}; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!wdev->conn) return 0; @@ -255,16 +255,13 @@ void cfg80211_conn_work(struct work_struct *work) if (!wdev->netdev) continue; - wdev_lock(wdev); - if (!netif_running(wdev->netdev)) { - wdev_unlock(wdev); + if (!netif_running(wdev->netdev)) continue; - } + if (!wdev->conn || - wdev->conn->state == CFG80211_CONN_CONNECTED) { - wdev_unlock(wdev); + wdev->conn->state == CFG80211_CONN_CONNECTED) continue; - } + if (wdev->conn->params.bssid) { memcpy(bssid_buf, wdev->conn->params.bssid, ETH_ALEN); bssid = bssid_buf; @@ -279,7 +276,6 @@ void cfg80211_conn_work(struct work_struct *work) cr.timeout_reason = treason; __cfg80211_connect_result(wdev->netdev, &cr, false); } - wdev_unlock(wdev); } wiphy_unlock(&rdev->wiphy); @@ -300,7 +296,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel, wdev->conn->params.bssid, @@ -317,13 +313,13 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) return bss; } -static void __cfg80211_sme_scan_done(struct net_device *dev) +void cfg80211_sme_scan_done(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!wdev->conn) return; @@ -339,15 +335,6 @@ static void __cfg80211_sme_scan_done(struct net_device *dev) schedule_work(&rdev->conn_work); } -void cfg80211_sme_scan_done(struct net_device *dev) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - wdev_lock(wdev); - __cfg80211_sme_scan_done(dev); - wdev_unlock(wdev); -} - void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) { struct wiphy *wiphy = wdev->wiphy; @@ -355,7 +342,7 @@ void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; u16 status_code = le16_to_cpu(mgmt->u.auth.status_code); - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!wdev->conn || wdev->conn->state == CFG80211_CONN_CONNECTED) return; @@ -705,11 +692,9 @@ static bool cfg80211_is_all_idle(void) for_each_rdev(rdev) { wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { - wdev_lock(wdev); if (wdev->conn || wdev->connected || cfg80211_beaconing_iface_active(wdev)) is_all_idle = false; - wdev_unlock(wdev); } wiphy_unlock(&rdev->wiphy); } @@ -763,7 +748,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *connected_addr; bool bss_not_found = false; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) @@ -1095,7 +1080,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev, unsigned int link; const u8 *connected_addr; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) @@ -1299,7 +1284,7 @@ EXPORT_SYMBOL(cfg80211_roamed); void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid, const u8 *td_bitmap, u8 td_bitmap_len) { - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) @@ -1355,7 +1340,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, union iwreq_data wrqu; #endif - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) @@ -1445,7 +1430,7 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); /* * If we have an ssid_len, we're trying to connect or are @@ -1551,7 +1536,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err = 0; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); kfree_sensitive(wdev->connect_keys); wdev->connect_keys = NULL; @@ -1587,19 +1572,18 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); wiphy_lock(wdev->wiphy); - wdev_lock(wdev); if (wdev->conn_owner_nlportid) { switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - __cfg80211_leave_ibss(rdev, wdev->netdev, false); + cfg80211_leave_ibss(rdev, wdev->netdev, false); break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - __cfg80211_stop_ap(rdev, wdev->netdev, -1, false); + cfg80211_stop_ap(rdev, wdev->netdev, -1, false); break; case NL80211_IFTYPE_MESH_POINT: - __cfg80211_leave_mesh(rdev, wdev->netdev); + cfg80211_leave_mesh(rdev, wdev->netdev); break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: @@ -1624,6 +1608,5 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) } } - wdev_unlock(wdev); wiphy_unlock(wdev->wiphy); } diff --git a/net/wireless/util.c b/net/wireless/util.c index 1783ab9d57a3..fff99fe43fdd 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1044,7 +1044,6 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) list_del(&ev->list); spin_unlock_irqrestore(&wdev->event_lock, flags); - wdev_lock(wdev); switch (ev->type) { case EVENT_CONNECT_RESULT: __cfg80211_connect_result( @@ -1066,7 +1065,7 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) ev->ij.channel); break; case EVENT_STOPPED: - __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); + cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); break; case EVENT_PORT_AUTHORIZED: __cfg80211_port_authorized(wdev, ev->pa.bssid, @@ -1074,7 +1073,6 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) ev->pa.td_bitmap_len); break; } - wdev_unlock(wdev); kfree(ev); @@ -1124,9 +1122,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EBUSY; dev->ieee80211_ptr->use_4addr = false; - wdev_lock(dev->ieee80211_ptr); rdev_set_qos_map(rdev, dev, NULL); - wdev_unlock(dev->ieee80211_ptr); switch (otype) { case NL80211_IFTYPE_AP: @@ -1138,10 +1134,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - wdev_lock(dev->ieee80211_ptr); cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, true); - wdev_unlock(dev->ieee80211_ptr); break; case NL80211_IFTYPE_MESH_POINT: /* mesh should be handled? */ @@ -2647,12 +2641,12 @@ void cfg80211_remove_link(struct wireless_dev *wdev, unsigned int link_id) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - __cfg80211_stop_ap(rdev, wdev->netdev, link_id, true); + cfg80211_stop_ap(rdev, wdev->netdev, link_id, true); break; default: /* per-link not relevant */ @@ -2677,12 +2671,10 @@ void cfg80211_remove_links(struct wireless_dev *wdev) if (wdev->iftype != NL80211_IFTYPE_AP) return; - wdev_lock(wdev); if (wdev->valid_links) { for_each_valid_link(wdev, link_id) cfg80211_remove_link(wdev, link_id); } - wdev_unlock(wdev); } int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index e3acfac7430a..d23ce088bffa 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -415,10 +415,10 @@ int cfg80211_wext_giwretry(struct net_device *dev, } EXPORT_WEXT_HANDLER(cfg80211_wext_giwretry); -static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool pairwise, - const u8 *addr, bool remove, bool tx_key, - int idx, struct key_params *params) +static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool pairwise, + const u8 *addr, bool remove, bool tx_key, + int idx, struct key_params *params) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err, i; @@ -471,7 +471,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, */ if (idx == wdev->wext.default_key && wdev->iftype == NL80211_IFTYPE_ADHOC) { - __cfg80211_leave_ibss(rdev, wdev->netdev, true); + cfg80211_leave_ibss(rdev, wdev->netdev, true); rejoin = true; } @@ -552,7 +552,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, */ if (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->wext.default_key == -1) { - __cfg80211_leave_ibss(rdev, wdev->netdev, true); + cfg80211_leave_ibss(rdev, wdev->netdev, true); rejoin = true; } err = rdev_set_default_key(rdev, dev, -1, idx, true, @@ -580,21 +580,6 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, return 0; } -static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool pairwise, - const u8 *addr, bool remove, bool tx_key, - int idx, struct key_params *params) -{ - int err; - - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_set_encryption(rdev, dev, pairwise, addr, - remove, tx_key, idx, params); - wdev_unlock(dev->ieee80211_ptr); - - return err; -} - static int cfg80211_wext_siwencode(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *keybuf) @@ -639,7 +624,6 @@ static int cfg80211_wext_siwencode(struct net_device *dev, else if (erq->length == 0) { /* No key data - just set the default TX key index */ err = 0; - wdev_lock(wdev); if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) @@ -647,7 +631,6 @@ static int cfg80211_wext_siwencode(struct net_device *dev, true); if (!err) wdev->wext.default_key = idx; - wdev_unlock(wdev); goto out; } @@ -697,12 +680,8 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, !rdev->ops->set_default_key) return -EOPNOTSUPP; - wdev_lock(wdev); - if (wdev->valid_links) { - wdev_unlock(wdev); + if (wdev->valid_links) return -EOPNOTSUPP; - } - wdev_unlock(wdev); switch (ext->alg) { case IW_ENCODE_ALG_NONE: @@ -1341,13 +1320,11 @@ static int cfg80211_wext_giwrate(struct net_device *dev, return -EOPNOTSUPP; err = 0; - wdev_lock(wdev); if (!wdev->valid_links && wdev->links[0].client.current_bss) memcpy(addr, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); else err = -EOPNOTSUPP; - wdev_unlock(wdev); if (err) return err; @@ -1387,17 +1364,15 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) return NULL; /* Grab BSSID of current BSS, if any */ - wdev_lock(wdev); + wiphy_lock(&rdev->wiphy); if (wdev->valid_links || !wdev->links[0].client.current_bss) { - wdev_unlock(wdev); + wiphy_unlock(&rdev->wiphy); return NULL; } memcpy(bssid, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); - wdev_unlock(wdev); memset(&sinfo, 0, sizeof(sinfo)); - wiphy_lock(&rdev->wiphy); ret = rdev_get_station(rdev, dev, bssid, &sinfo); wiphy_unlock(&rdev->wiphy); diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index f3eaa3388694..8edd9ada69d0 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -23,7 +23,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, int err, i; ASSERT_RTNL(); - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!netif_running(wdev->netdev)) return 0; @@ -87,15 +87,11 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, return -EINVAL; } - wdev_lock(wdev); - if (wdev->conn) { bool event = true; - if (wdev->wext.connect.channel == chan) { - err = 0; - goto out; - } + if (wdev->wext.connect.channel == chan) + return 0; /* if SSID set, we'll try right again, avoid event */ if (wdev->wext.connect.ssid_len) @@ -103,14 +99,11 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, err = cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, event); if (err) - goto out; + return err; } wdev->wext.connect.channel = chan; - err = cfg80211_mgd_wext_connect(rdev, wdev); - out: - wdev_unlock(wdev); - return err; + return cfg80211_mgd_wext_connect(rdev, wdev); } int cfg80211_mgd_wext_giwfreq(struct net_device *dev, @@ -127,12 +120,10 @@ int cfg80211_mgd_wext_giwfreq(struct net_device *dev, if (wdev->valid_links) return -EOPNOTSUPP; - wdev_lock(wdev); if (wdev->links[0].client.current_bss) chan = wdev->links[0].client.current_bss->pub.channel; else if (wdev->wext.connect.channel) chan = wdev->wext.connect.channel; - wdev_unlock(wdev); if (chan) { freq->m = chan->center_freq; @@ -164,17 +155,13 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; - wdev_lock(wdev); - - err = 0; - if (wdev->conn) { bool event = true; if (wdev->wext.connect.ssid && len && len == wdev->wext.connect.ssid_len && memcmp(wdev->wext.connect.ssid, ssid, len) == 0) - goto out; + return 0; /* if SSID set now, we'll try to connect, avoid event */ if (len) @@ -182,7 +169,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, err = cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, event); if (err) - goto out; + return err; } wdev->wext.prev_bssid_valid = false; @@ -194,10 +181,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, wdev->wext.connect.crypto.control_port_ethertype = cpu_to_be16(ETH_P_PAE); - err = cfg80211_mgd_wext_connect(rdev, wdev); - out: - wdev_unlock(wdev); - return err; + return cfg80211_mgd_wext_connect(rdev, wdev); } int cfg80211_mgd_wext_giwessid(struct net_device *dev, @@ -216,7 +200,6 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, data->flags = 0; - wdev_lock(wdev); if (wdev->links[0].client.current_bss) { const struct element *ssid_elem; @@ -238,7 +221,6 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, data->length = wdev->wext.connect.ssid_len; memcpy(ssid, wdev->wext.connect.ssid, data->length); } - wdev_unlock(wdev); return ret; } @@ -263,23 +245,20 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) bssid = NULL; - wdev_lock(wdev); - if (wdev->conn) { - err = 0; /* both automatic */ if (!bssid && !wdev->wext.connect.bssid) - goto out; + return 0; /* fixed already - and no change */ if (wdev->wext.connect.bssid && bssid && ether_addr_equal(bssid, wdev->wext.connect.bssid)) - goto out; + return 0; err = cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, false); if (err) - goto out; + return err; } if (bssid) { @@ -288,10 +267,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, } else wdev->wext.connect.bssid = NULL; - err = cfg80211_mgd_wext_connect(rdev, wdev); - out: - wdev_unlock(wdev); - return err; + return cfg80211_mgd_wext_connect(rdev, wdev); } int cfg80211_mgd_wext_giwap(struct net_device *dev, @@ -306,18 +282,15 @@ int cfg80211_mgd_wext_giwap(struct net_device *dev, ap_addr->sa_family = ARPHRD_ETHER; - wdev_lock(wdev); - if (wdev->valid_links) { - wdev_unlock(wdev); + if (wdev->valid_links) return -EOPNOTSUPP; - } + if (wdev->links[0].client.current_bss) memcpy(ap_addr->sa_data, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); else eth_zero_addr(ap_addr->sa_data); - wdev_unlock(wdev); return 0; } @@ -339,7 +312,6 @@ int cfg80211_wext_siwgenie(struct net_device *dev, ie = NULL; wiphy_lock(wdev->wiphy); - wdev_lock(wdev); /* no change */ err = 0; @@ -370,7 +342,6 @@ int cfg80211_wext_siwgenie(struct net_device *dev, /* userspace better not think we'll reconnect */ err = 0; out: - wdev_unlock(wdev); wiphy_unlock(wdev->wiphy); return err; } @@ -396,7 +367,6 @@ int cfg80211_wext_siwmlme(struct net_device *dev, return -EINVAL; wiphy_lock(&rdev->wiphy); - wdev_lock(wdev); switch (mlme->cmd) { case IW_MLME_DEAUTH: case IW_MLME_DISASSOC: @@ -406,7 +376,6 @@ int cfg80211_wext_siwmlme(struct net_device *dev, err = -EOPNOTSUPP; break; } - wdev_unlock(wdev); wiphy_unlock(&rdev->wiphy); return err; -- cgit v1.2.3 From b7600aae8a20ff267ae82ff3c51cef536cd6383d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 29 Aug 2023 11:25:50 +0200 Subject: wifi: mac80211: fix TXQ error path and cleanup We currently call ieee80211_txq_teardown_flows() as part of ieee80211_remove_interfaces(), but that's not really right in case of HW registration failures, specifically rate control. Call it separately to fix that issue. Reported-by: Zhengchao Shao Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 2 -- net/mac80211/main.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7e3acf670f0f..510f8aead4f9 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -2255,8 +2255,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) WARN(local->open_count, "%s: open count remains %d\n", wiphy_name(local->hw.wiphy), local->open_count); - ieee80211_txq_teardown_flows(local); - mutex_lock(&local->iflist_mtx); list_splice_init(&local->interfaces, &unreg_list); mutex_unlock(&local->iflist_mtx); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 0ab603850a85..eabf6c1bf3ff 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1457,6 +1457,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_remove_interfaces(local); rtnl_unlock(); fail_rate: + ieee80211_txq_teardown_flows(local); fail_flows: ieee80211_led_exit(local); destroy_workqueue(local->workqueue); @@ -1493,6 +1494,8 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) */ ieee80211_remove_interfaces(local); + ieee80211_txq_teardown_flows(local); + wiphy_lock(local->hw.wiphy); wiphy_delayed_work_cancel(local->hw.wiphy, &local->roc_work); wiphy_work_cancel(local->hw.wiphy, &local->reconfig_filter); -- cgit v1.2.3 From 05f136220d17839eb7c155f015ace9152f603225 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 29 Aug 2023 20:16:11 +0200 Subject: wifi: mac80211: fix BA session teardown race As previously reported by Alexander, whose commit 69403bad97aa ("wifi: mac80211: sdata can be NULL during AMPDU start") I'm reverting as part of this commit, there's a race between station destruction and aggregation setup, where the aggregation setup can happen while the station is being removed and queue the work after ieee80211_sta_tear_down_BA_sessions() has already run in __sta_info_destroy_part1(), and thus the worker will run with a now freed station. In his case, this manifested in a NULL sdata pointer, but really there's no guarantee whatsoever. The real issue seems to be that it's possible at all to have a situation where this occurs - we want to stop the BA sessions when doing _part1, but we cannot be sure, and WLAN_STA_BLOCK_BA isn't necessarily effective since we don't know that the setup isn't concurrently running and already got past the check. Simply call ieee80211_sta_tear_down_BA_sessions() again in the second part of station destruction, since at that point really nothing else can hold a reference to the station any more. Also revert the sdata checks since those are just misleading at this point. Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 6 +----- net/mac80211/driver-ops.c | 3 --- net/mac80211/sta_info.c | 14 ++++++++++++++ 3 files changed, 15 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 0627abb09f0e..b8a278355e18 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -497,7 +497,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; struct ieee80211_local *local = sta->local; - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_ampdu_params params = { .sta = &sta->sta, .action = IEEE80211_AMPDU_TX_START, @@ -525,7 +525,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ synchronize_net(); - sdata = sta->sdata; params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); tid_tx->ssn = params.ssn; @@ -539,9 +538,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state); } else if (ret) { - if (!sdata) - return; - ht_dbg(sdata, "BA request denied - HW unavailable for %pM tid %d\n", sta->sta.addr, tid); diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 919300750527..169dbbca54b6 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -409,9 +409,6 @@ int drv_ampdu_action(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); - if (!sdata) - return -EIO; - sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) return -EIO; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index abcc280acd38..2a61269a4b54 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1385,6 +1385,20 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc) * after _part1 and before _part2! */ + /* + * There's a potential race in _part1 where we set WLAN_STA_BLOCK_BA + * but someone might have just gotten past a check, and not yet into + * queuing the work/creating the data/etc. + * + * Do another round of destruction so that the worker is certainly + * canceled before we later free the station. + * + * Since this is after synchronize_rcu()/synchronize_net() we're now + * certain that nobody can actually hold a reference to the STA and + * be calling e.g. ieee80211_start_tx_ba_session(). + */ + ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); + might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); -- cgit v1.2.3 From e8c1841278a78362f7034f3de415096ddb19f097 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 28 Aug 2023 09:54:39 +0200 Subject: wifi: cfg80211: annotate iftype_data pointer with sparse There were are a number of cases in mac80211 and iwlwifi (at least) that used the sband->iftype_data pointer directly, instead of using the accessors to find the right array entry to use. Make sparse warn when such a thing is done. To not have a lot of casts, add two helper functions/macros - ieee80211_set_sband_iftype_data() - for_each_sband_iftype_data() Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/mac.c | 15 +++--- drivers/net/wireless/ath/ath12k/mac.c | 12 ++--- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 4 +- drivers/net/wireless/mediatek/mt76/mt7915/init.c | 9 ++-- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 9 ++-- drivers/net/wireless/mediatek/mt76/mt7996/init.c | 3 +- drivers/net/wireless/quantenna/qtnfmac/commands.c | 5 +- drivers/net/wireless/quantenna/qtnfmac/core.c | 2 +- drivers/net/wireless/realtek/rtw89/core.c | 15 +++--- drivers/net/wireless/realtek/rtw89/regd.c | 2 +- drivers/net/wireless/virtual/mac80211_hwsim.c | 30 +++++------ include/net/cfg80211.h | 59 ++++++++++++++++++++-- net/mac80211/main.c | 7 +-- net/wireless/chan.c | 5 +- net/wireless/core.c | 8 ++- net/wireless/nl80211.c | 6 +-- 16 files changed, 112 insertions(+), 79 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index c071bf5841af..6ed036b51dba 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -5893,8 +5893,9 @@ static void ath11k_mac_setup_he_cap(struct ath11k *ar, ar->mac.iftype[NL80211_BAND_2GHZ], NL80211_BAND_2GHZ); band = &ar->mac.sbands[NL80211_BAND_2GHZ]; - band->iftype_data = ar->mac.iftype[NL80211_BAND_2GHZ]; - band->n_iftype_data = count; + _ieee80211_set_sband_iftype_data(band, + ar->mac.iftype[NL80211_BAND_2GHZ], + count); } if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP) { @@ -5902,8 +5903,9 @@ static void ath11k_mac_setup_he_cap(struct ath11k *ar, ar->mac.iftype[NL80211_BAND_5GHZ], NL80211_BAND_5GHZ); band = &ar->mac.sbands[NL80211_BAND_5GHZ]; - band->iftype_data = ar->mac.iftype[NL80211_BAND_5GHZ]; - band->n_iftype_data = count; + _ieee80211_set_sband_iftype_data(band, + ar->mac.iftype[NL80211_BAND_5GHZ], + count); } if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP && @@ -5912,8 +5914,9 @@ static void ath11k_mac_setup_he_cap(struct ath11k *ar, ar->mac.iftype[NL80211_BAND_6GHZ], NL80211_BAND_6GHZ); band = &ar->mac.sbands[NL80211_BAND_6GHZ]; - band->iftype_data = ar->mac.iftype[NL80211_BAND_6GHZ]; - band->n_iftype_data = count; + _ieee80211_set_sband_iftype_data(band, + ar->mac.iftype[NL80211_BAND_6GHZ], + count); } } diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 88346e66bb75..24113709972d 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4647,8 +4647,8 @@ static void ath12k_mac_setup_sband_iftype_data(struct ath12k *ar, ar->mac.iftype[band], band); sband = &ar->mac.sbands[band]; - sband->iftype_data = ar->mac.iftype[band]; - sband->n_iftype_data = count; + _ieee80211_set_sband_iftype_data(sband, ar->mac.iftype[band], + count); } if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP) { @@ -4657,8 +4657,8 @@ static void ath12k_mac_setup_sband_iftype_data(struct ath12k *ar, ar->mac.iftype[band], band); sband = &ar->mac.sbands[band]; - sband->iftype_data = ar->mac.iftype[band]; - sband->n_iftype_data = count; + _ieee80211_set_sband_iftype_data(sband, ar->mac.iftype[band], + count); } if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP && @@ -4668,8 +4668,8 @@ static void ath12k_mac_setup_sband_iftype_data(struct ath12k *ar, ar->mac.iftype[band], band); sband = &ar->mac.sbands[band]; - sband->iftype_data = ar->mac.iftype[band]; - sband->n_iftype_data = count; + _ieee80211_set_sband_iftype_data(sband, ar->mac.iftype[band], + count); } } diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 31176897b746..cff1f97536e3 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -1077,8 +1077,8 @@ static void iwl_init_he_hw_capab(struct iwl_trans *trans, memcpy(iftype_data, iwl_he_eht_capa, sizeof(iwl_he_eht_capa)); - sband->iftype_data = iftype_data; - sband->n_iftype_data = ARRAY_SIZE(iwl_he_eht_capa); + _ieee80211_set_sband_iftype_data(sband, iftype_data, + ARRAY_SIZE(iwl_he_eht_capa)); for (i = 0; i < sband->n_iftype_data; i++) iwl_nvm_fixup_sband_iftd(trans, data, sband, &iftype_data[i], diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index 35fdf4f98d80..b27d04e02aba 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -1127,8 +1127,7 @@ void mt7915_set_stream_he_caps(struct mt7915_phy *phy) n = mt7915_init_he_caps(phy, NL80211_BAND_2GHZ, data); band = &phy->mt76->sband_2g.sband; - band->iftype_data = data; - band->n_iftype_data = n; + _ieee80211_set_sband_iftype_data(band, data, n); } if (phy->mt76->cap.has_5ghz) { @@ -1136,8 +1135,7 @@ void mt7915_set_stream_he_caps(struct mt7915_phy *phy) n = mt7915_init_he_caps(phy, NL80211_BAND_5GHZ, data); band = &phy->mt76->sband_5g.sband; - band->iftype_data = data; - band->n_iftype_data = n; + _ieee80211_set_sband_iftype_data(band, data, n); } if (phy->mt76->cap.has_6ghz) { @@ -1145,8 +1143,7 @@ void mt7915_set_stream_he_caps(struct mt7915_phy *phy) n = mt7915_init_he_caps(phy, NL80211_BAND_6GHZ, data); band = &phy->mt76->sband_6g.sband; - band->iftype_data = data; - band->n_iftype_data = n; + _ieee80211_set_sband_iftype_data(band, data, n); } } diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 0844d28b3223..62e6da1386aa 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -196,8 +196,7 @@ void mt7921_set_stream_he_caps(struct mt792x_phy *phy) n = mt7921_init_he_caps(phy, NL80211_BAND_2GHZ, data); band = &phy->mt76->sband_2g.sband; - band->iftype_data = data; - band->n_iftype_data = n; + _ieee80211_set_sband_iftype_data(band, data, n); } if (phy->mt76->cap.has_5ghz) { @@ -205,16 +204,14 @@ void mt7921_set_stream_he_caps(struct mt792x_phy *phy) n = mt7921_init_he_caps(phy, NL80211_BAND_5GHZ, data); band = &phy->mt76->sband_5g.sband; - band->iftype_data = data; - band->n_iftype_data = n; + _ieee80211_set_sband_iftype_data(band, data, n); if (phy->mt76->cap.has_6ghz) { data = phy->iftype[NL80211_BAND_6GHZ]; n = mt7921_init_he_caps(phy, NL80211_BAND_6GHZ, data); band = &phy->mt76->sband_6g.sband; - band->iftype_data = data; - band->n_iftype_data = n; + _ieee80211_set_sband_iftype_data(band, data, n); } } } diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 26e03b28935f..0d6cc214ce10 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -827,8 +827,7 @@ __mt7996_set_stream_he_eht_caps(struct mt7996_phy *phy, n++; } - sband->iftype_data = data; - sband->n_iftype_data = n; + _ieee80211_set_sband_iftype_data(sband, data, n); } void mt7996_set_stream_he_eht_caps(struct mt7996_phy *phy) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 68ae9c7ea95a..9540ad6196d7 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -1335,7 +1335,7 @@ static int qtnf_cmd_band_fill_iftype(const u8 *data, return -EINVAL; } - kfree(band->iftype_data); + kfree((__force void *)band->iftype_data); band->iftype_data = NULL; band->n_iftype_data = tlv->n_iftype_data; if (band->n_iftype_data == 0) @@ -1347,7 +1347,8 @@ static int qtnf_cmd_band_fill_iftype(const u8 *data, band->n_iftype_data = 0; return -ENOMEM; } - band->iftype_data = iftype_data; + + _ieee80211_set_sband_iftype_data(band, iftype_data, tlv->n_iftype_data); for (i = 0; i < band->n_iftype_data; i++) qtnf_cmd_conv_iftype(iftype_data++, &tlv->iftype_data[i]); diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index 2a63ffdc4b2c..677bac835330 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -535,7 +535,7 @@ static void qtnf_core_mac_detach(struct qtnf_bus *bus, unsigned int macid) if (!wiphy->bands[band]) continue; - kfree(wiphy->bands[band]->iftype_data); + kfree((__force void *)wiphy->bands[band]->iftype_data); wiphy->bands[band]->n_iftype_data = 0; kfree(wiphy->bands[band]->channels); diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 856f3543eff2..fc686954b3dd 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -3359,8 +3359,7 @@ static void rtw89_init_he_cap(struct rtw89_dev *rtwdev, idx++; } - sband->iftype_data = iftype_data; - sband->n_iftype_data = idx; + _ieee80211_set_sband_iftype_data(sband, iftype_data, idx); } static int rtw89_core_set_supported_band(struct rtw89_dev *rtwdev) @@ -3405,11 +3404,11 @@ err: hw->wiphy->bands[NL80211_BAND_5GHZ] = NULL; hw->wiphy->bands[NL80211_BAND_6GHZ] = NULL; if (sband_2ghz) - kfree(sband_2ghz->iftype_data); + kfree((__force void *)sband_2ghz->iftype_data); if (sband_5ghz) - kfree(sband_5ghz->iftype_data); + kfree((__force void *)sband_5ghz->iftype_data); if (sband_6ghz) - kfree(sband_6ghz->iftype_data); + kfree((__force void *)sband_6ghz->iftype_data); kfree(sband_2ghz); kfree(sband_5ghz); kfree(sband_6ghz); @@ -3421,11 +3420,11 @@ static void rtw89_core_clr_supported_band(struct rtw89_dev *rtwdev) struct ieee80211_hw *hw = rtwdev->hw; if (hw->wiphy->bands[NL80211_BAND_2GHZ]) - kfree(hw->wiphy->bands[NL80211_BAND_2GHZ]->iftype_data); + kfree((__force void *)hw->wiphy->bands[NL80211_BAND_2GHZ]->iftype_data); if (hw->wiphy->bands[NL80211_BAND_5GHZ]) - kfree(hw->wiphy->bands[NL80211_BAND_5GHZ]->iftype_data); + kfree((__force void *)hw->wiphy->bands[NL80211_BAND_5GHZ]->iftype_data); if (hw->wiphy->bands[NL80211_BAND_6GHZ]) - kfree(hw->wiphy->bands[NL80211_BAND_6GHZ]->iftype_data); + kfree((__force void *)hw->wiphy->bands[NL80211_BAND_6GHZ]->iftype_data); kfree(hw->wiphy->bands[NL80211_BAND_2GHZ]); kfree(hw->wiphy->bands[NL80211_BAND_5GHZ]); kfree(hw->wiphy->bands[NL80211_BAND_6GHZ]); diff --git a/drivers/net/wireless/realtek/rtw89/regd.c b/drivers/net/wireless/realtek/rtw89/regd.c index 9e2328db1865..c956a8b971c6 100644 --- a/drivers/net/wireless/realtek/rtw89/regd.c +++ b/drivers/net/wireless/realtek/rtw89/regd.c @@ -377,7 +377,7 @@ bottom: return; wiphy->bands[NL80211_BAND_6GHZ] = NULL; - kfree(sband->iftype_data); + kfree((__force void *)sband->iftype_data); kfree(sband); } diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index f5a0880da3fc..36f2d2388ddd 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -4899,25 +4899,19 @@ static const struct ieee80211_sband_iftype_data sband_capa_6ghz[] = { static void mac80211_hwsim_sband_capab(struct ieee80211_supported_band *sband) { - u16 n_iftype_data; - - if (sband->band == NL80211_BAND_2GHZ) { - n_iftype_data = ARRAY_SIZE(sband_capa_2ghz); - sband->iftype_data = - (struct ieee80211_sband_iftype_data *)sband_capa_2ghz; - } else if (sband->band == NL80211_BAND_5GHZ) { - n_iftype_data = ARRAY_SIZE(sband_capa_5ghz); - sband->iftype_data = - (struct ieee80211_sband_iftype_data *)sband_capa_5ghz; - } else if (sband->band == NL80211_BAND_6GHZ) { - n_iftype_data = ARRAY_SIZE(sband_capa_6ghz); - sband->iftype_data = - (struct ieee80211_sband_iftype_data *)sband_capa_6ghz; - } else { - return; + switch (sband->band) { + case NL80211_BAND_2GHZ: + ieee80211_set_sband_iftype_data(sband, sband_capa_2ghz); + break; + case NL80211_BAND_5GHZ: + ieee80211_set_sband_iftype_data(sband, sband_capa_5ghz); + break; + case NL80211_BAND_6GHZ: + ieee80211_set_sband_iftype_data(sband, sband_capa_6ghz); + break; + default: + break; } - - sband->n_iftype_data = n_iftype_data; } #ifdef CONFIG_MAC80211_MESH diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index aa9c26a03f30..922fd9e0d9b4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -410,6 +410,19 @@ struct ieee80211_sta_eht_cap { u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN]; }; +/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ +#ifdef __CHECKER__ +/* + * This is used to mark the sband->iftype_data pointer which is supposed + * to be an array with special access semantics (per iftype), but a lot + * of code got it wrong in the past, so with this marking sparse will be + * noisy when the pointer is used directly. + */ +# define __iftd __attribute__((noderef, address_space(__iftype_data))) +#else +# define __iftd +#endif /* __CHECKER__ */ + /** * struct ieee80211_sband_iftype_data - sband data per interface type * @@ -543,9 +556,47 @@ struct ieee80211_supported_band { struct ieee80211_sta_s1g_cap s1g_cap; struct ieee80211_edmg edmg_cap; u16 n_iftype_data; - const struct ieee80211_sband_iftype_data *iftype_data; + const struct ieee80211_sband_iftype_data __iftd *iftype_data; }; +/** + * _ieee80211_set_sband_iftype_data - set sband iftype data array + * @sband: the sband to initialize + * @iftd: the iftype data array pointer + * @n_iftd: the length of the iftype data array + * + * Set the sband iftype data array; use this where the length cannot + * be derived from the ARRAY_SIZE() of the argument, but prefer + * ieee80211_set_sband_iftype_data() where it can be used. + */ +static inline void +_ieee80211_set_sband_iftype_data(struct ieee80211_supported_band *sband, + const struct ieee80211_sband_iftype_data *iftd, + u16 n_iftd) +{ + sband->iftype_data = (const void __iftd __force *)iftd; + sband->n_iftype_data = n_iftd; +} + +/** + * ieee80211_set_sband_iftype_data - set sband iftype data array + * @sband: the sband to initialize + * @iftd: the iftype data array + */ +#define ieee80211_set_sband_iftype_data(sband, iftd) \ + _ieee80211_set_sband_iftype_data(sband, iftd, ARRAY_SIZE(iftd)) + +/** + * for_each_sband_iftype_data - iterate sband iftype data entries + * @sband: the sband whose iftype_data array to iterate + * @i: iterator counter + * @iftd: iftype data pointer to set + */ +#define for_each_sband_iftype_data(sband, i, iftd) \ + for (i = 0, iftd = (const void __force *)&(sband)->iftype_data[i]; \ + i < (sband)->n_iftype_data; \ + i++, iftd = (const void __force *)&(sband)->iftype_data[i]) + /** * ieee80211_get_sband_iftype_data - return sband data for a given iftype * @sband: the sband to search for the STA on @@ -557,6 +608,7 @@ static inline const struct ieee80211_sband_iftype_data * ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband, u8 iftype) { + const struct ieee80211_sband_iftype_data *data; int i; if (WARN_ON(iftype >= NL80211_IFTYPE_MAX)) @@ -565,10 +617,7 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband, if (iftype == NL80211_IFTYPE_AP_VLAN) iftype = NL80211_IFTYPE_AP; - for (i = 0; i < sband->n_iftype_data; i++) { - const struct ieee80211_sband_iftype_data *data = - &sband->iftype_data[i]; - + for_each_sband_iftype_data(sband, i, data) { if (data->types_mask & BIT(iftype)) return data; } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index eabf6c1bf3ff..bf8f72c412ee 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1066,6 +1066,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) supp_he = false; supp_eht = false; for (band = 0; band < NUM_NL80211_BANDS; band++) { + const struct ieee80211_sband_iftype_data *iftd; struct ieee80211_supported_band *sband; sband = local->hw.wiphy->bands[band]; @@ -1112,11 +1113,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) supp_ht = supp_ht || sband->ht_cap.ht_supported; supp_vht = supp_vht || sband->vht_cap.vht_supported; - for (i = 0; i < sband->n_iftype_data; i++) { - const struct ieee80211_sband_iftype_data *iftd; - - iftd = &sband->iftype_data[i]; - + for_each_sband_iftype_data(sband, i, iftd) { supp_he = supp_he || iftd->he_cap.has_he; supp_eht = supp_eht || iftd->eht_cap.has_eht; } diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 2af3aaee7493..842190dfa100 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -6,7 +6,7 @@ * * Copyright 2009 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2018-2022 Intel Corporation + * Copyright 2018-2023 Intel Corporation */ #include @@ -1162,8 +1162,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, if (!sband) return false; - for (i = 0; i < sband->n_iftype_data; i++) { - iftd = &sband->iftype_data[i]; + for_each_sband_iftype_data(sband, i, iftd) { if (!iftd->eht_cap.has_eht) continue; diff --git a/net/wireless/core.c b/net/wireless/core.c index c419177278da..b0f6ae3ce78f 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -821,6 +821,7 @@ int wiphy_register(struct wiphy *wiphy) /* sanity check supported bands/channels */ for (band = 0; band < NUM_NL80211_BANDS; band++) { + const struct ieee80211_sband_iftype_data *iftd; u16 types = 0; bool have_he = false; @@ -877,14 +878,11 @@ int wiphy_register(struct wiphy *wiphy) return -EINVAL; } - for (i = 0; i < sband->n_iftype_data; i++) { - const struct ieee80211_sband_iftype_data *iftd; + for_each_sband_iftype_data(sband, i, iftd) { bool has_ap, has_non_ap; u32 ap_bits = BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_P2P_GO); - iftd = &sband->iftype_data[i]; - if (WARN_ON(!iftd->types_mask)) return -EINVAL; if (WARN_ON(types & iftd->types_mask)) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 71a0a6e34bdb..ab0aea7dca7d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1913,20 +1913,20 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, struct nlattr *nl_iftype_data = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_IFTYPE_DATA); + const struct ieee80211_sband_iftype_data *iftd; int err; if (!nl_iftype_data) return -ENOBUFS; - for (i = 0; i < sband->n_iftype_data; i++) { + for_each_sband_iftype_data(sband, i, iftd) { struct nlattr *iftdata; iftdata = nla_nest_start_noflag(msg, i + 1); if (!iftdata) return -ENOBUFS; - err = nl80211_send_iftype_data(msg, sband, - &sband->iftype_data[i]); + err = nl80211_send_iftype_data(msg, sband, iftd); if (err) return err; -- cgit v1.2.3 From 5ea82df1f50e42416d0a8a7c42d37cc1df1545fe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Sep 2023 11:57:52 +0200 Subject: wifi: mac80211: fix RCU usage warning in mesh fast-xmit In mesh_fast_tx_flush_addr() we already hold the lock, so don't need additional hashtable RCU protection. Use the rhashtable_lookup_fast() variant to avoid RCU protection warnings. Fixes: d5edb9ae8d56 ("wifi: mac80211: mesh fast xmit support") Signed-off-by: Johannes Berg --- net/mac80211/mesh_pathtbl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index d32e304eeb4b..3e52aaa57b1f 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -648,7 +648,7 @@ void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata, cache = &sdata->u.mesh.tx_cache; spin_lock_bh(&cache->walk_lock); - entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params); + entry = rhashtable_lookup_fast(&cache->rht, addr, fast_tx_rht_params); if (entry) mesh_fast_tx_entry_free(cache, entry); spin_unlock_bh(&cache->walk_lock); -- cgit v1.2.3 From a469a5938d1fd98e50119893f22541fe6e269f02 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 28 Aug 2023 13:04:10 +0300 Subject: wifi: mac80211: add support for mld in ieee80211_chswitch_done This allows to finalize the CSA per link. In case the switch didn't work, tear down the MLD connection. Also pass the ieee80211_bss_conf to post_channel_switch to let the driver know which link completed the switch. Signed-off-by: Emmanuel Grumbach Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230828130311.3d3eacc88436.Ic2d14e2285aa1646216a56806cfd4a8d0054437c@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlegacy/4965-mac.c | 2 +- drivers/net/wireless/intel/iwlegacy/common.c | 2 +- drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c | 6 ++-- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 10 ++++--- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 +- .../net/wireless/intel/iwlwifi/mvm/time-event.c | 2 +- drivers/net/wireless/ti/wlcore/event.c | 2 +- drivers/net/wireless/ti/wlcore/main.c | 6 ++-- include/net/mac80211.h | 8 +++-- net/mac80211/cfg.c | 35 ++++++++++++---------- net/mac80211/driver-ops.h | 6 ++-- net/mac80211/mlme.c | 27 +++++++++++------ net/mac80211/trace.h | 11 ++++--- 14 files changed, 73 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index 0a4aa3c678c1..69276266ce6f 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -6122,7 +6122,7 @@ il4965_mac_channel_switch(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (il->ops->set_channel_switch(il, ch_switch)) { clear_bit(S_CHANNEL_SWITCH_PENDING, &il->status); il->switch_channel = 0; - ieee80211_chswitch_done(il->vif, false); + ieee80211_chswitch_done(il->vif, false, 0); } out: diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c index 96002121bb8b..054fef680aba 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.c +++ b/drivers/net/wireless/intel/iwlegacy/common.c @@ -4090,7 +4090,7 @@ il_chswitch_done(struct il_priv *il, bool is_success) return; if (test_and_clear_bit(S_CHANNEL_SWITCH_PENDING, &il->status)) - ieee80211_chswitch_done(il->vif, is_success); + ieee80211_chswitch_done(il->vif, is_success, 0); } EXPORT_SYMBOL(il_chswitch_done); diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c index b1939ff275b5..5f3d5b15f727 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c @@ -2,7 +2,7 @@ /****************************************************************************** * * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. - * Copyright (C) 2018 - 2019, 2022 Intel Corporation + * Copyright(C) 2018 - 2019, 2022 - 2023 Intel Corporation * * Portions of this file are derived from the ipw3945 project, as well * as portions of the ieee80211 subsystem header files. @@ -1001,7 +1001,7 @@ static void iwlagn_mac_channel_switch(struct ieee80211_hw *hw, if (priv->lib->set_channel_switch(priv, ch_switch)) { clear_bit(STATUS_CHANNEL_SWITCH_PENDING, &priv->status); priv->switch_channel = 0; - ieee80211_chswitch_done(ctx->vif, false); + ieee80211_chswitch_done(ctx->vif, false, 0); } out: @@ -1024,7 +1024,7 @@ void iwl_chswitch_done(struct iwl_priv *priv, bool is_success) return; if (ctx->vif) - ieee80211_chswitch_done(ctx->vif, is_success); + ieee80211_chswitch_done(ctx->vif, is_success, 0); } static void iwlagn_configure_filter(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 7369a45f7f2b..b28d998c65c5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -1839,7 +1839,7 @@ void iwl_mvm_channel_switch_start_notif(struct iwl_mvm *mvm, iwl_mvm_csa_client_absent(mvm, vif); cancel_delayed_work(&mvmvif->csa_work); - ieee80211_chswitch_done(vif, true); + ieee80211_chswitch_done(vif, true, 0); break; default: /* should never happen */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 5918c1f2b10c..921f72dcddac 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1370,7 +1370,8 @@ int iwl_mvm_set_tx_power(struct iwl_mvm *mvm, struct ieee80211_vif *vif, } int iwl_mvm_post_channel_switch(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); @@ -1452,7 +1453,8 @@ void iwl_mvm_abort_channel_switch(struct ieee80211_hw *hw, mvmvif->csa_failed = true; mutex_unlock(&mvm->mutex); - iwl_mvm_post_channel_switch(hw, vif); + /* If we're here, we can't support MLD */ + iwl_mvm_post_channel_switch(hw, vif, &vif->bss_conf); } void iwl_mvm_channel_switch_disconnect_wk(struct work_struct *wk) @@ -1464,7 +1466,7 @@ void iwl_mvm_channel_switch_disconnect_wk(struct work_struct *wk) vif = container_of((void *)mvmvif, struct ieee80211_vif, drv_priv); /* Trigger disconnect (should clear the CSA state) */ - ieee80211_chswitch_done(vif, false); + ieee80211_chswitch_done(vif, false, 0); } static u8 @@ -5535,7 +5537,7 @@ void iwl_mvm_channel_switch_rx_beacon(struct ieee80211_hw *hw, if (mvmvif->csa_misbehave) { /* Second time, give up on this AP*/ iwl_mvm_abort_channel_switch(hw, vif); - ieee80211_chswitch_done(vif, false); + ieee80211_chswitch_done(vif, false, 0); mvmvif->csa_misbehave = false; return; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index b18c91c5dd5d..dda13f4351c3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -2427,7 +2427,8 @@ static inline u8 iwl_mvm_phy_band_from_nl80211(enum nl80211_band band) /* Channel Switch */ void iwl_mvm_channel_switch_disconnect_wk(struct work_struct *wk); int iwl_mvm_post_channel_switch(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link); /* Channel Context */ /** diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 5f0e7144a951..e1f6cea649c3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -223,7 +223,7 @@ iwl_mvm_te_handle_notify_csa(struct iwl_mvm *mvm, } iwl_mvm_csa_client_absent(mvm, te_data->vif); cancel_delayed_work(&mvmvif->csa_work); - ieee80211_chswitch_done(te_data->vif, true); + ieee80211_chswitch_done(te_data->vif, true, 0); break; default: /* should never happen */ diff --git a/drivers/net/wireless/ti/wlcore/event.c b/drivers/net/wireless/ti/wlcore/event.c index 46ab69eab26a..1e082d039b82 100644 --- a/drivers/net/wireless/ti/wlcore/event.c +++ b/drivers/net/wireless/ti/wlcore/event.c @@ -229,7 +229,7 @@ void wlcore_event_channel_switch(struct wl1271 *wl, vif = wl12xx_wlvif_to_vif(wlvif); if (wlvif->bss_type == BSS_TYPE_STA_BSS) { - ieee80211_chswitch_done(vif, success); + ieee80211_chswitch_done(vif, success, 0); cancel_delayed_work(&wlvif->channel_switch_work); } else { set_bit(WLVIF_FLAG_BEACON_DISABLED, &wlvif->flags); diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index bf21611872a3..b7e68d2721c1 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -2043,7 +2043,7 @@ static void wlcore_channel_switch_work(struct work_struct *work) goto out; vif = wl12xx_wlvif_to_vif(wlvif); - ieee80211_chswitch_done(vif, false); + ieee80211_chswitch_done(vif, false, 0); ret = pm_runtime_resume_and_get(wl->dev); if (ret < 0) @@ -3030,7 +3030,7 @@ static int wlcore_unset_assoc(struct wl1271 *wl, struct wl12xx_vif *wlvif) struct ieee80211_vif *vif = wl12xx_wlvif_to_vif(wlvif); wl12xx_cmd_stop_channel_switch(wl, wlvif); - ieee80211_chswitch_done(vif, false); + ieee80211_chswitch_done(vif, false, 0); cancel_delayed_work(&wlvif->channel_switch_work); } @@ -5451,7 +5451,7 @@ static void wl12xx_op_channel_switch(struct ieee80211_hw *hw, if (unlikely(wl->state == WLCORE_STATE_OFF)) { if (test_bit(WLVIF_FLAG_STA_ASSOCIATED, &wlvif->flags)) - ieee80211_chswitch_done(vif, false); + ieee80211_chswitch_done(vif, false, 0); goto out; } else if (unlikely(wl->state != WLCORE_STATE_ON)) { goto out; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 154592ce48e5..09fe4601bf59 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4539,7 +4539,8 @@ struct ieee80211_ops { struct ieee80211_channel_switch *ch_switch); int (*post_channel_switch)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); void (*abort_channel_switch)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void (*channel_switch_rx_beacon)(struct ieee80211_hw *hw, @@ -6537,11 +6538,14 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw); * ieee80211_chswitch_done - Complete channel switch process * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @success: make the channel switch successful or not + * @link_id: the link_id on which the switch was done. Ignored if success is + * false. * * Complete the channel switch post-process: set the new operational channel * and wake up the suspended queues. */ -void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); +void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, + unsigned int link_id); /** * ieee80211_channel_switch_disconnect - disconnect due to channel switch error diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 851d6ed68367..490ee6f52d6e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3590,8 +3590,9 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, return 0; } -static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { + struct ieee80211_sub_if_data *sdata = link_data->sdata; struct ieee80211_local *local = sdata->local; u64 changed = 0; int err; @@ -3605,20 +3606,20 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) * completed successfully */ - if (sdata->deflink.reserved_chanctx) { + if (link_data->reserved_chanctx) { /* * with multi-vif csa driver may call ieee80211_csa_finish() * many times while waiting for other interfaces to use their * reservations */ - if (sdata->deflink.reserved_ready) + if (link_data->reserved_ready) return 0; return ieee80211_link_use_reserved_context(&sdata->deflink); } if (!cfg80211_chandef_identical(&sdata->vif.bss_conf.chandef, - &sdata->deflink.csa_chandef)) + &link_data->csa_chandef)) return -EINVAL; sdata->vif.bss_conf.csa_active = false; @@ -3635,25 +3636,27 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed); - if (sdata->deflink.csa_block_tx) { + if (link_data->csa_block_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->deflink.csa_block_tx = false; + link_data->csa_block_tx = false; } - err = drv_post_channel_switch(sdata); + err = drv_post_channel_switch(link_data); if (err) return err; - cfg80211_ch_switch_notify(sdata->dev, &sdata->deflink.csa_chandef, 0, + cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chandef, 0, sdata->vif.bss_conf.eht_puncturing); return 0; } -static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +static void ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { - if (__ieee80211_csa_finalize(sdata)) { + struct ieee80211_sub_if_data *sdata = link_data->sdata; + + if (__ieee80211_csa_finalize(link_data)) { sdata_info(sdata, "failed to finalize CSA, disconnecting\n"); cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev, GFP_KERNEL); @@ -3662,21 +3665,21 @@ static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) { - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, - deflink.csa_finalize_work); + struct ieee80211_link_data *link = + container_of(work, struct ieee80211_link_data, csa_finalize_work); + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ - if (!sdata->vif.bss_conf.csa_active) + if (!link->conf->csa_active) return; if (!ieee80211_sdata_running(sdata)) return; - ieee80211_csa_finalize(sdata); + ieee80211_csa_finalize(link); } static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, @@ -3919,7 +3922,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, drv_channel_switch_beacon(sdata, ¶ms->chandef); } else { /* if the beacon didn't change, we can finalize immediately */ - ieee80211_csa_finalize(sdata); + ieee80211_csa_finalize(&sdata->deflink); } out: diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 554c7aa10cc2..77048b9065e6 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1126,8 +1126,9 @@ drv_pre_channel_switch(struct ieee80211_sub_if_data *sdata, } static inline int -drv_post_channel_switch(struct ieee80211_sub_if_data *sdata) +drv_post_channel_switch(struct ieee80211_link_data *link) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; int ret = 0; @@ -1139,7 +1140,8 @@ drv_post_channel_switch(struct ieee80211_sub_if_data *sdata) trace_drv_post_channel_switch(local, sdata); if (local->ops->post_channel_switch) - ret = local->ops->post_channel_switch(&local->hw, &sdata->vif); + ret = local->ops->post_channel_switch(&local->hw, &sdata->vif, + link->conf); trace_drv_return_int(local, ret); return ret; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6d0a29749e8c..0f295c5403b3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1763,7 +1763,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) */ link->u.mgd.beacon_crc_valid = false; - ret = drv_post_channel_switch(sdata); + ret = drv_post_channel_switch(link); if (ret) { sdata_info(sdata, "driver post channel switch failed, disconnecting\n"); @@ -1775,25 +1775,34 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef, 0, 0); } -void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) +void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, + unsigned int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif))) - success = false; + trace_api_chswitch_done(sdata, success, link_id); + + rcu_read_lock(); - trace_api_chswitch_done(sdata, success); if (!success) { sdata_info(sdata, "driver channel switch failed, disconnecting\n"); wiphy_work_queue(sdata->local->hw.wiphy, - &ifmgd->csa_connection_drop_work); + &sdata->u.mgd.csa_connection_drop_work); } else { + struct ieee80211_link_data *link = + rcu_dereference(sdata->link[link_id]); + + if (WARN_ON(!link)) { + rcu_read_unlock(); + return; + } + wiphy_delayed_work_queue(sdata->local->hw.wiphy, - &sdata->deflink.u.mgd.chswitch_work, - 0); + &link->u.mgd.chswitch_work, 0); } + + rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_chswitch_done); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index b8c53b4a710b..032718d5b298 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2839,23 +2839,26 @@ TRACE_EVENT(api_sta_block_awake, ); TRACE_EVENT(api_chswitch_done, - TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success), + TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success, + unsigned int link_id), - TP_ARGS(sdata, success), + TP_ARGS(sdata, success, link_id), TP_STRUCT__entry( VIF_ENTRY __field(bool, success) + __field(unsigned int, link_id) ), TP_fast_assign( VIF_ASSIGN; __entry->success = success; + __entry->link_id = link_id; ), TP_printk( - VIF_PR_FMT " success=%d", - VIF_PR_ARG, __entry->success + VIF_PR_FMT " success=%d link_id=%d", + VIF_PR_ARG, __entry->success, __entry->link_id ) ); -- cgit v1.2.3 From 43125539fc69c6aa63d34b516939431391bddeac Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 27 Aug 2023 14:05:19 +0300 Subject: wifi: cfg80211: fix off-by-one in element defrag If a fragment is the last element, it's erroneously not accepted. Fix that. Fixes: f837a653a097 ("wifi: cfg80211: add element defragmentation helper") Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.adca9fbd3317.I6b2df45eb71513f3e48efd196ae3cddec362dc1c@changeid Signed-off-by: Johannes Berg --- net/wireless/scan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 0cf1ce7b6934..19516073c6d5 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2354,8 +2354,8 @@ ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies, /* elem might be invalid after the memmove */ next = (void *)(elem->data + elem->datalen); - elem_datalen = elem->datalen; + if (elem->id == WLAN_EID_EXTENSION) { copied = elem->datalen - 1; if (copied > data_len) @@ -2376,7 +2376,7 @@ ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies, for (elem = next; elem->data < ies + ieslen && - elem->data + elem->datalen < ies + ieslen; + elem->data + elem->datalen <= ies + ieslen; elem = next) { /* elem might be invalid after the memmove */ next = (void *)(elem->data + elem->datalen); -- cgit v1.2.3 From 730eeb17bbdd3c31f91e2a4fff35dd7e9c67d706 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 27 Aug 2023 14:05:20 +0300 Subject: wifi: cfg80211: add first kunit tests, for element defrag Add a couple of tests for element defragmentation, to see that the function works correctly. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.e2a5cead1816.I09f0edc19d162b54ee330991c728c1e9aa42ebf6@changeid Signed-off-by: Johannes Berg --- net/wireless/Kconfig | 11 +++ net/wireless/Makefile | 1 + net/wireless/tests/Makefile | 3 + net/wireless/tests/fragmentation.c | 157 +++++++++++++++++++++++++++++++++++++ net/wireless/tests/module.c | 10 +++ 5 files changed, 182 insertions(+) create mode 100644 net/wireless/tests/Makefile create mode 100644 net/wireless/tests/fragmentation.c create mode 100644 net/wireless/tests/module.c (limited to 'net') diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index f620acd2a0f5..a9ac85e09af3 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -201,6 +201,17 @@ config CFG80211_WEXT_EXPORT Drivers should select this option if they require cfg80211's wext compatibility symbols to be exported. +config CFG80211_KUNIT_TEST + tristate "KUnit tests for cfg80211" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on CFG80211 + default KUNIT_ALL_TESTS + depends on !KERNEL_6_2 + help + Enable this option to test cfg80211 functions with kunit. + + If unsure, say N. + endif # CFG80211 config LIB80211 diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 527ae669f6f7..089c841528c8 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_LIB80211) += lib80211.o obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o +obj-y += tests/ obj-$(CONFIG_WEXT_CORE) += wext-core.o obj-$(CONFIG_WEXT_PROC) += wext-proc.o diff --git a/net/wireless/tests/Makefile b/net/wireless/tests/Makefile new file mode 100644 index 000000000000..fa8e297bbc5e --- /dev/null +++ b/net/wireless/tests/Makefile @@ -0,0 +1,3 @@ +cfg80211-tests-y += module.o fragmentation.o + +obj-$(CONFIG_CFG80211_KUNIT_TEST) += cfg80211-tests.o diff --git a/net/wireless/tests/fragmentation.c b/net/wireless/tests/fragmentation.c new file mode 100644 index 000000000000..49a339ca8880 --- /dev/null +++ b/net/wireless/tests/fragmentation.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for element fragmentation + * + * Copyright (C) 2023 Intel Corporation + */ +#include +#include +#include + +static void defragment_0(struct kunit *test) +{ + ssize_t ret; + static const u8 input[] = { + [0] = WLAN_EID_EXTENSION, + [1] = 254, + [2] = WLAN_EID_EXT_EHT_MULTI_LINK, + [27] = 27, + [123] = 123, + [254 + 2] = WLAN_EID_FRAGMENT, + [254 + 3] = 7, + [254 + 3 + 7] = 0, /* for size */ + }; + u8 *data = kunit_kzalloc(test, sizeof(input), GFP_KERNEL); + + KUNIT_ASSERT_NOT_NULL(test, data); + + ret = cfg80211_defragment_element((void *)input, + input, sizeof(input), + data, sizeof(input), + WLAN_EID_FRAGMENT); + KUNIT_EXPECT_EQ(test, ret, 253); + KUNIT_EXPECT_MEMEQ(test, data, input + 3, 253); +} + +static void defragment_1(struct kunit *test) +{ + ssize_t ret; + static const u8 input[] = { + [0] = WLAN_EID_EXTENSION, + [1] = 255, + [2] = WLAN_EID_EXT_EHT_MULTI_LINK, + [27] = 27, + [123] = 123, + [255 + 2] = WLAN_EID_FRAGMENT, + [255 + 3] = 7, + [255 + 3 + 1] = 0xaa, + [255 + 3 + 8] = WLAN_EID_FRAGMENT, /* not used */ + [255 + 3 + 9] = 1, + [255 + 3 + 10] = 0, /* for size */ + }; + u8 *data = kunit_kzalloc(test, sizeof(input), GFP_KERNEL); + const struct element *elem; + int count = 0; + + KUNIT_ASSERT_NOT_NULL(test, data); + + for_each_element(elem, input, sizeof(input)) + count++; + + /* check the elements are right */ + KUNIT_ASSERT_EQ(test, count, 3); + + ret = cfg80211_defragment_element((void *)input, + input, sizeof(input), + data, sizeof(input), + WLAN_EID_FRAGMENT); + /* this means the last fragment was not used */ + KUNIT_EXPECT_EQ(test, ret, 254 + 7); + KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254); + KUNIT_EXPECT_MEMEQ(test, data + 254, input + 255 + 4, 7); +} + +static void defragment_2(struct kunit *test) +{ + ssize_t ret; + static const u8 input[] = { + [0] = WLAN_EID_EXTENSION, + [1] = 255, + [2] = WLAN_EID_EXT_EHT_MULTI_LINK, + [27] = 27, + [123] = 123, + + [257 + 0] = WLAN_EID_FRAGMENT, + [257 + 1] = 255, + [257 + 20] = 0xaa, + + [2 * 257 + 0] = WLAN_EID_FRAGMENT, + [2 * 257 + 1] = 1, + [2 * 257 + 2] = 0xcc, + [2 * 257 + 3] = WLAN_EID_FRAGMENT, /* not used */ + [2 * 257 + 4] = 1, + [2 * 257 + 5] = 0, /* for size */ + }; + u8 *data = kunit_kzalloc(test, sizeof(input), GFP_KERNEL); + const struct element *elem; + int count = 0; + + KUNIT_ASSERT_NOT_NULL(test, data); + + for_each_element(elem, input, sizeof(input)) + count++; + + /* check the elements are right */ + KUNIT_ASSERT_EQ(test, count, 4); + + ret = cfg80211_defragment_element((void *)input, + input, sizeof(input), + data, sizeof(input), + WLAN_EID_FRAGMENT); + /* this means the last fragment was not used */ + KUNIT_EXPECT_EQ(test, ret, 254 + 255 + 1); + KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254); + KUNIT_EXPECT_MEMEQ(test, data + 254, input + 257 + 2, 255); + KUNIT_EXPECT_MEMEQ(test, data + 254 + 255, input + 2 * 257 + 2, 1); +} + +static void defragment_at_end(struct kunit *test) +{ + ssize_t ret; + static const u8 input[] = { + [0] = WLAN_EID_EXTENSION, + [1] = 255, + [2] = WLAN_EID_EXT_EHT_MULTI_LINK, + [27] = 27, + [123] = 123, + [255 + 2] = WLAN_EID_FRAGMENT, + [255 + 3] = 7, + [255 + 3 + 7] = 0, /* for size */ + }; + u8 *data = kunit_kzalloc(test, sizeof(input), GFP_KERNEL); + + KUNIT_ASSERT_NOT_NULL(test, data); + + ret = cfg80211_defragment_element((void *)input, + input, sizeof(input), + data, sizeof(input), + WLAN_EID_FRAGMENT); + KUNIT_EXPECT_EQ(test, ret, 254 + 7); + KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254); + KUNIT_EXPECT_MEMEQ(test, data + 254, input + 255 + 4, 7); +} + +static struct kunit_case element_fragmentation_test_cases[] = { + KUNIT_CASE(defragment_0), + KUNIT_CASE(defragment_1), + KUNIT_CASE(defragment_2), + KUNIT_CASE(defragment_at_end), + {} +}; + +static struct kunit_suite element_fragmentation = { + .name = "cfg80211-element-defragmentation", + .test_cases = element_fragmentation_test_cases, +}; + +kunit_test_suite(element_fragmentation); diff --git a/net/wireless/tests/module.c b/net/wireless/tests/module.c new file mode 100644 index 000000000000..9ff7b2c12312 --- /dev/null +++ b/net/wireless/tests/module.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This is just module boilerplate for the cfg80211 kunit module. + * + * Copyright (C) 2023 Intel Corporation + */ +#include + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("tests for cfg80211"); -- cgit v1.2.3 From ffbd0c8c1e7f86408919d023cf87119b9b7385d5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 27 Aug 2023 14:05:21 +0300 Subject: wifi: mac80211: add an element parsing unit test Add a unit test for the parsing of a fragmented sta profile sub-element inside a fragmented multi-link element. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.333bc75df13f.I0ddfeb6a88a4d89e7c7850e8ef45a4b19b5a061a@changeid Signed-off-by: Johannes Berg --- net/mac80211/Kconfig | 11 +++++ net/mac80211/Makefile | 2 + net/mac80211/tests/Makefile | 3 ++ net/mac80211/tests/elems.c | 101 ++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/tests/module.c | 10 +++++ net/mac80211/util.c | 3 ++ 6 files changed, 130 insertions(+) create mode 100644 net/mac80211/tests/Makefile create mode 100644 net/mac80211/tests/elems.c create mode 100644 net/mac80211/tests/module.c (limited to 'net') diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 51ec8256b7fa..037ab74f5ade 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -57,6 +57,17 @@ endif comment "Some wireless drivers require a rate control algorithm" depends on MAC80211 && MAC80211_HAS_RC=n +config MAC80211_KUNIT_TEST + tristate "KUnit tests for mac80211" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on MAC80211 + default KUNIT_ALL_TESTS + depends on !KERNEL_6_2 + help + Enable this option to test mac80211 internals with kunit. + + If unsure, say N. + config MAC80211_MESH bool "Enable mac80211 mesh networking support" depends on MAC80211 diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index b8de44da1fb8..c9eb52768133 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -65,4 +65,6 @@ rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += \ mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) +obj-y += tests/ + ccflags-y += -DDEBUG diff --git a/net/mac80211/tests/Makefile b/net/mac80211/tests/Makefile new file mode 100644 index 000000000000..4814584f8a14 --- /dev/null +++ b/net/mac80211/tests/Makefile @@ -0,0 +1,3 @@ +mac80211-tests-y += module.o elems.o + +obj-$(CONFIG_MAC80211_KUNIT_TEST) += mac80211-tests.o diff --git a/net/mac80211/tests/elems.c b/net/mac80211/tests/elems.c new file mode 100644 index 000000000000..997d0cd27b2d --- /dev/null +++ b/net/mac80211/tests/elems.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for element parsing + * + * Copyright (C) 2023 Intel Corporation + */ +#include +#include "../ieee80211_i.h" + +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); + +static void mle_defrag(struct kunit *test) +{ + struct ieee80211_elems_parse_params parse_params = { + .link_id = 12, + .from_ap = true, + }; + struct ieee802_11_elems *parsed; + struct sk_buff *skb; + u8 *len_mle, *len_prof; + int i; + + skb = alloc_skb(1024, GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, skb); + + if (skb_pad(skb, skb_tailroom(skb))) { + KUNIT_FAIL(test, "failed to pad skb"); + return; + } + + /* build a multi-link element */ + skb_put_u8(skb, WLAN_EID_EXTENSION); + len_mle = skb_put(skb, 1); + skb_put_u8(skb, WLAN_EID_EXT_EHT_MULTI_LINK); + + put_unaligned_le16(IEEE80211_ML_CONTROL_TYPE_BASIC, + skb_put(skb, 2)); + /* struct ieee80211_mle_basic_common_info */ + skb_put_u8(skb, 7); /* includes len field */ + skb_put_data(skb, "\x00\x00\x00\x00\x00\x00", ETH_ALEN); /* MLD addr */ + + /* with a STA profile inside */ + skb_put_u8(skb, IEEE80211_MLE_SUBELEM_PER_STA_PROFILE); + len_prof = skb_put(skb, 1); + put_unaligned_le16(IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE | + parse_params.link_id, + skb_put(skb, 2)); + skb_put_u8(skb, 1); /* fake sta_info_len - includes itself */ + /* put a bunch of useless elements into it */ + for (i = 0; i < 20; i++) { + skb_put_u8(skb, WLAN_EID_SSID); + skb_put_u8(skb, 20); + skb_put(skb, 20); + } + + /* fragment STA profile */ + ieee80211_fragment_element(skb, len_prof, + IEEE80211_MLE_SUBELEM_FRAGMENT); + /* fragment MLE */ + ieee80211_fragment_element(skb, len_mle, WLAN_EID_FRAGMENT); + + parse_params.start = skb->data; + parse_params.len = skb->len; + parsed = ieee802_11_parse_elems_full(&parse_params); + /* should return ERR_PTR or valid, not NULL */ + KUNIT_EXPECT_NOT_NULL(test, parsed); + + if (IS_ERR_OR_NULL(parsed)) + goto free_skb; + + KUNIT_EXPECT_NOT_NULL(test, parsed->ml_basic_elem); + KUNIT_EXPECT_EQ(test, + parsed->ml_basic_len, + 2 /* control */ + + 7 /* common info */ + + 2 /* sta profile element header */ + + 3 /* sta profile header */ + + 20 * 22 /* sta profile data */ + + 2 /* sta profile fragment element */); + KUNIT_EXPECT_NOT_NULL(test, parsed->prof); + KUNIT_EXPECT_EQ(test, + parsed->sta_prof_len, + 3 /* sta profile header */ + + 20 * 22 /* sta profile data */); + + kfree(parsed); +free_skb: + kfree_skb(skb); +} + +static struct kunit_case element_parsing_test_cases[] = { + KUNIT_CASE(mle_defrag), + {} +}; + +static struct kunit_suite element_parsing = { + .name = "mac80211-element-parsing", + .test_cases = element_parsing_test_cases, +}; + +kunit_test_suite(element_parsing); diff --git a/net/mac80211/tests/module.c b/net/mac80211/tests/module.c new file mode 100644 index 000000000000..9d05f2943935 --- /dev/null +++ b/net/mac80211/tests/module.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This is just module boilerplate for the mac80211 kunit module. + * + * Copyright (C) 2023 Intel Corporation + */ +#include + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("tests for mac80211"); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 4aefb9483aa9..b0232a2b963e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "ieee80211_i.h" #include "driver-ops.h" @@ -1654,6 +1655,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) return elems; } +EXPORT_SYMBOL_IF_KUNIT(ieee802_11_parse_elems_full); void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_queue_params @@ -5127,3 +5129,4 @@ void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id) *len_pos = elem_len; } +EXPORT_SYMBOL_IF_KUNIT(ieee80211_fragment_element); -- cgit v1.2.3 From 5806ef25bc6e6cf0c04005ff25a4585437d567de Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 27 Aug 2023 14:05:22 +0300 Subject: wifi: cfg80211: add ieee80211_fragment_element to public API This function will be used by the kunit tests within cfg80211. As it is generally useful, move it from mac80211 to cfg80211. Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.5af9391659f5.Ie534ed6591ba02be8572d4d7242394f29e3af04b@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++++++ net/mac80211/ieee80211_i.h | 2 -- net/mac80211/util.c | 29 ----------------------------- net/wireless/util.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 922fd9e0d9b4..f22b22d7d4e8 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8873,6 +8873,18 @@ static inline size_t ieee80211_ie_split(const u8 *ies, size_t ielen, return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset); } +/** + * ieee80211_fragment_element - fragment the last element in skb + * @skb: The skbuf that the element was added to + * @len_pos: Pointer to length of the element to fragment + * @frag_id: The element ID to use for fragments + * + * This function fragments all data after @len_pos, adding fragmentation + * elements with the given ID as appropriate. The SKB will grow in size + * accordingly. + */ +void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id); + /** * cfg80211_report_wowlan_wakeup - report wakeup from WoWLAN * @wdev: the wireless device reporting the wakeup diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b8465d205076..a6fd87f0c035 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2308,8 +2308,6 @@ ieee802_11_parse_elems(const u8 *start, size_t len, bool action, return ieee802_11_parse_elems_crc(start, len, action, 0, 0, bss); } -void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id); - extern const int ieee802_1d_to_ac[8]; static inline int ieee80211_ac_from_tid(int tid) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b0232a2b963e..ed113028794a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -5101,32 +5101,3 @@ u8 *ieee80211_ie_build_eht_cap(u8 *pos, return pos; } - -void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id) -{ - unsigned int elem_len; - - if (!len_pos) - return; - - elem_len = skb->data + skb->len - len_pos - 1; - - while (elem_len > 255) { - /* this one is 255 */ - *len_pos = 255; - /* remaining data gets smaller */ - elem_len -= 255; - /* make space for the fragment ID/len in SKB */ - skb_put(skb, 2); - /* shift back the remaining data to place fragment ID/len */ - memmove(len_pos + 255 + 3, len_pos + 255 + 1, elem_len); - /* place the fragment ID */ - len_pos += 255 + 1; - *len_pos = frag_id; - /* and point to fragment length to update later */ - len_pos++; - } - - *len_pos = elem_len; -} -EXPORT_SYMBOL_IF_KUNIT(ieee80211_fragment_element); diff --git a/net/wireless/util.c b/net/wireless/util.c index fff99fe43fdd..56cbd9979a3f 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1966,6 +1966,35 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, } EXPORT_SYMBOL(ieee80211_ie_split_ric); +void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id) +{ + unsigned int elem_len; + + if (!len_pos) + return; + + elem_len = skb->data + skb->len - len_pos - 1; + + while (elem_len > 255) { + /* this one is 255 */ + *len_pos = 255; + /* remaining data gets smaller */ + elem_len -= 255; + /* make space for the fragment ID/len in SKB */ + skb_put(skb, 2); + /* shift back the remaining data to place fragment ID/len */ + memmove(len_pos + 255 + 3, len_pos + 255 + 1, elem_len); + /* place the fragment ID */ + len_pos += 255 + 1; + *len_pos = frag_id; + /* and point to fragment length to update later */ + len_pos++; + } + + *len_pos = elem_len; +} +EXPORT_SYMBOL(ieee80211_fragment_element); + bool ieee80211_operating_class_to_band(u8 operating_class, enum nl80211_band *band) { -- cgit v1.2.3 From 79aa3a09a7ff03c66d3f35a157ead0abb7e49812 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 27 Aug 2023 14:05:23 +0300 Subject: wifi: mac80211: add more warnings about inserting sta info The sta info needs to be inserted before its links may be modified. Add a few warnings to prevent accidental usage of these functions. Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.eeb43b3cc9e3.I5fd8236f70e64bf6268f33c883f7a878d963b83e@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 2a61269a4b54..0c59ab9f9aaa 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2866,6 +2866,8 @@ int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id) lockdep_assert_wiphy(sdata->local->hw.wiphy); + WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED)); + /* must represent an MLD from the start */ if (WARN_ON(!sta->sta.valid_links)) return -EINVAL; @@ -2895,6 +2897,8 @@ void ieee80211_sta_free_link(struct sta_info *sta, unsigned int link_id) { lockdep_assert_wiphy(sta->sdata->local->hw.wiphy); + WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED)); + sta_remove_link(sta, link_id, false); } @@ -2922,7 +2926,7 @@ int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id) sta->sta.valid_links = new_links; - if (!test_sta_flag(sta, WLAN_STA_INSERTED)) + if (WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED))) goto hash; ieee80211_recalc_min_chandef(sdata, link_id); @@ -2955,7 +2959,7 @@ void ieee80211_sta_remove_link(struct sta_info *sta, unsigned int link_id) sta->sta.valid_links &= ~BIT(link_id); - if (test_sta_flag(sta, WLAN_STA_INSERTED)) + if (!WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED))) drv_change_sta_links(sdata->local, sdata, &sta->sta, old_links, sta->sta.valid_links); -- cgit v1.2.3 From fe5cb719e78d4507afbada6ccf9730447002d6e2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 27 Aug 2023 14:05:24 +0300 Subject: wifi: mac80211: remove unnecessary struct forward declaration This just causes kernel-doc to complain at this spot, but isn't actually needed anyway, so remove it. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.33a5591dfdeb.If4e7e1a1cb4c04f0afd83db7401c780404dca699@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a6fd87f0c035..8fdd6905f65d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -625,7 +625,6 @@ struct ieee80211_if_ocb { * vendor-specific mesh synchronization * */ -struct ieee802_11_elems; struct ieee80211_mesh_sync_ops { void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, u16 stype, struct ieee80211_mgmt *mgmt, unsigned int len, -- cgit v1.2.3 From 799f53e223cb1bd2155d6aaaf3c3d3955440abfb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 27 Aug 2023 14:05:25 +0300 Subject: wifi: mac80211: fix various kernel-doc issues There are various kernel-doc issues here, fix them. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.7ce9761f9ebb.I0f44e76c518f72135cc855c809bfa7a5e977b894@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 18 +++++++++++------- net/mac80211/sta_info.h | 4 +--- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8fdd6905f65d..e7dc4cdcdcde 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -624,6 +624,8 @@ struct ieee80211_if_ocb { * these declarations define the interface, which enables * vendor-specific mesh synchronization * + * @rx_bcn_presp: beacon/probe response was received + * @adjust_tsf: TSF adjustment method */ struct ieee80211_mesh_sync_ops { void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, u16 stype, @@ -864,12 +866,13 @@ enum txq_info_flags { * struct txq_info - per tid queue * * @tin: contains packets split into multiple flows - * @def_flow: used as a fallback flow when a packet destined to @tin hashes to - * a fq_flow which is already owned by a different tin - * @def_cvars: codel vars for @def_flow + * @def_cvars: codel vars for the @tin's default_flow + * @cstats: code statistics for this queue * @frags: used to keep fragments created after dequeue * @schedule_order: used with ieee80211_local->active_txqs * @schedule_round: counter to prevent infinite loops on TXQ scheduling + * @flags: TXQ flags from &enum txq_info_flags + * @txq: the driver visible part */ struct txq_info { struct fq_tin tin; @@ -898,7 +901,8 @@ struct ieee80211_if_mntr { * struct ieee80211_if_nan - NAN state * * @conf: current NAN configuration - * @func_ids: a bitmap of available instance_id's + * @func_lock: lock for @func_inst_ids + * @function_inst_ids: a bitmap of available instance_id's */ struct ieee80211_if_nan { struct cfg80211_nan_conf conf; @@ -1239,7 +1243,7 @@ struct tpt_led_trigger { #endif /** - * mac80211 scan flags - currently active scan mode + * enum mac80211_scan_flags - currently active scan mode * * @SCAN_SW_SCANNING: We're currently in the process of scanning but may as * well be on the operating channel @@ -1257,7 +1261,7 @@ struct tpt_led_trigger { * and could send a probe request after receiving a beacon. * @SCAN_BEACON_DONE: Beacon received, we can now send a probe request */ -enum { +enum mac80211_scan_flags { SCAN_SW_SCANNING, SCAN_HW_SCANNING, SCAN_ONCHANNEL_SCANNING, @@ -2179,7 +2183,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, * flags from &enum ieee80211_conn_flags. * @bssid: the currently connected bssid (for reporting) * @csa_ie: parsed 802.11 csa elements on count, mode, chandef and mesh ttl. - All of them will be filled with if success only. + * All of them will be filled with if success only. * Return: 0 on success, <0 on error and >0 if there is nothing to parse. */ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index aaf45d8523a1..7acf2223e47a 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -3,7 +3,7 @@ * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright(c) 2020-2022 Intel Corporation + * Copyright(c) 2020-2023 Intel Corporation */ #ifndef STA_INFO_H @@ -615,8 +615,6 @@ struct link_sta_info { * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) * @rcu_head: RCU head used for freeing this station struct - * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, - * taken from HT/VHT capabilities or VHT operating mode notification * @cparams: CoDel parameters for this station. * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) * @amsdu_mesh_control: track the mesh A-MSDU format used by the peer: -- cgit v1.2.3 From 2a53743989868016ac42e578b1ac959014b6b17b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 27 Aug 2023 14:05:26 +0300 Subject: wifi: cfg80211: reg: fix various kernel-doc issues Clean up the kernel-doc comments in reg.h. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.36d7b52da0f5.I85fbfb3095613f4a0512493cbbdda881dc31be2c@changeid Signed-off-by: Johannes Berg --- net/wireless/reg.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.h b/net/wireless/reg.h index f3707f729024..a703e53c23ee 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -5,7 +5,7 @@ /* * Copyright 2008-2011 Luis R. Rodriguez - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019, 2023 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -133,7 +133,7 @@ void regulatory_hint_disconnect(void); /** * cfg80211_get_unii - get the U-NII band for the frequency * @freq: the frequency for which we want to get the UNII band. - + * * Get a value specifying the U-NII band frequency belongs to. * U-NII bands are defined by the FCC in C.F.R 47 part 15. * @@ -156,11 +156,11 @@ bool regulatory_indoor_allowed(void); /** * regulatory_propagate_dfs_state - Propagate DFS channel state to other wiphys - * @wiphy - wiphy on which radar is detected and the event will be propagated + * @wiphy: wiphy on which radar is detected and the event will be propagated * to other available wiphys having the same DFS domain - * @chandef - Channel definition of radar detected channel - * @dfs_state - DFS channel state to be set - * @event - Type of radar event which triggered this DFS state change + * @chandef: Channel definition of radar detected channel + * @dfs_state: DFS channel state to be set + * @event: Type of radar event which triggered this DFS state change * * This function should be called with rtnl lock held. */ @@ -171,8 +171,8 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy, /** * reg_dfs_domain_same - Checks if both wiphy have same DFS domain configured - * @wiphy1 - wiphy it's dfs_region to be checked against that of wiphy2 - * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1 + * @wiphy1: wiphy it's dfs_region to be checked against that of wiphy2 + * @wiphy2: wiphy it's dfs_region to be checked against that of wiphy1 */ bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2); -- cgit v1.2.3 From 428e8976a15f849ad92b1c1e38dda2a684350ff7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 27 Aug 2023 14:05:28 +0300 Subject: wifi: mac80211: fix # of MSDU in A-MSDU calculation During my refactoring I wanted to get rid of the switch, but replaced it with the wrong calculation. Fix that. Fixes: 175ad2ec89fe ("wifi: mac80211: limit A-MSDU subframes for client too") Reported-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.51bf1b8b0adb.Iffbd337fdad2b86ae12f5a39c69fb82b517f7486@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 0c59ab9f9aaa..7243c6aa2161 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2986,7 +2986,7 @@ void ieee80211_sta_set_max_amsdu_subframes(struct sta_info *sta, WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB) << 1; if (val) - sta->sta.max_amsdu_subframes = 4 << val; + sta->sta.max_amsdu_subframes = 4 << (4 - val); } #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From 0f99f0878350f907b31c1ea091c807faa566dded Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 27 Aug 2023 14:05:29 +0300 Subject: wifi: mac80211: Print local link address during authentication To ease debugging, mostly in cases that authentication fails. Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.9c08605e2691.I0032e9d6e01325862189e4a20b02ddbe8f2f5e75@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0f295c5403b3..69f236b8bb45 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7018,6 +7018,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_auth_data *auth_data; + struct ieee80211_link_data *link; u16 auth_alg; int err; bool cont_auth; @@ -7143,8 +7144,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, false); } - sdata_info(sdata, "authenticate with %pM\n", auth_data->ap_addr); - /* needed for transmitting the auth frame(s) properly */ memcpy(sdata->vif.cfg.ap_addr, auth_data->ap_addr, ETH_ALEN); @@ -7153,6 +7152,19 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, if (err) goto err_clear; + if (req->link_id > 0) + link = sdata_dereference(sdata->link[req->link_id], sdata); + else + link = sdata_dereference(sdata->link[0], sdata); + + if (WARN_ON(!link)) { + err = -ENOLINK; + goto err_clear; + } + + sdata_info(sdata, "authenticate with %pM (local address=%pM)\n", + auth_data->ap_addr, link->conf->addr); + err = ieee80211_auth(sdata); if (err) { sta_info_destroy_addr(sdata, auth_data->ap_addr); -- cgit v1.2.3 From 90668e3204f197f64ac7eb60650c22fb6ec8db6e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 27 Aug 2023 14:05:30 +0300 Subject: wifi: mac80211: take MBSSID/EHT data also from probe resp The code that sets up the assoc link will currently take the BSS element data from the beacon only. This is correct for some of the data, notably the timing and the "have_beacon", but all the data about MBSSID and EHT really doesn't need to be taken from there, and if the EHT puncturing is misconfigured on the AP but we didn't receive a beacon yet, this causes us to connect but immediately disconnect upon receiving the first beacon, rather than connecting without EHT in the first place. Change the code to take MBSSID and EHT data also from the probe response, for a better picture of what the BSS capabilities are and to avoid that EHT puncturing problem. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.3c7e52d49482.Iba6b672f6dc74b45bba26bc497e953e27da43ef9@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 69f236b8bb45..f7dd25dc766c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7195,7 +7195,7 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, unsigned int link_id) { struct ieee80211_local *local = sdata->local; - const struct cfg80211_bss_ies *beacon_ies; + const struct cfg80211_bss_ies *bss_ies; struct ieee80211_supported_band *sband; const struct element *ht_elem, *vht_elem; struct ieee80211_link_data *link; @@ -7270,32 +7270,37 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, link->conf->eht_puncturing = 0; rcu_read_lock(); - beacon_ies = rcu_dereference(cbss->beacon_ies); - if (beacon_ies) { - const struct ieee80211_eht_operation *eht_oper; - const struct element *elem; + bss_ies = rcu_dereference(cbss->beacon_ies); + if (bss_ies) { u8 dtim_count = 0; - ieee80211_get_dtim(beacon_ies, &dtim_count, + ieee80211_get_dtim(bss_ies, &dtim_count, &link->u.mgd.dtim_period); sdata->deflink.u.mgd.have_beacon = true; if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) { - link->conf->sync_tsf = beacon_ies->tsf; + link->conf->sync_tsf = bss_ies->tsf; link->conf->sync_device_ts = bss->device_ts_beacon; link->conf->sync_dtim_count = dtim_count; } + } else { + bss_ies = rcu_dereference(cbss->ies); + } + + if (bss_ies) { + const struct ieee80211_eht_operation *eht_oper; + const struct element *elem; elem = cfg80211_find_ext_elem(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION, - beacon_ies->data, beacon_ies->len); + bss_ies->data, bss_ies->len); if (elem && elem->datalen >= 3) link->conf->profile_periodicity = elem->data[2]; else link->conf->profile_periodicity = 0; elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, - beacon_ies->data, beacon_ies->len); + bss_ies->data, bss_ies->len); if (elem && elem->datalen >= 11 && (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) link->conf->ema_ap = true; @@ -7303,7 +7308,7 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, link->conf->ema_ap = false; elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_OPERATION, - beacon_ies->data, beacon_ies->len); + bss_ies->data, bss_ies->len); eht_oper = (const void *)(elem->data + 1); if (elem && -- cgit v1.2.3 From 563fe446ef2b30d0eb918a46070cfc7fb41290a7 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 27 Aug 2023 14:05:31 +0300 Subject: wifi: mac80211: Do not force off-channel for management Tx with MLO When user space transmits a management frame it is expected to use the MLD addresses if the connection is an MLD one. Thus, in case the management Tx is using the MLD address and no channel is configured off-channel should not be used (as one of the active links would be used). Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.73c8efce252f.Ie4b0a842debb24ef25c5e6cb2ad69b9f46bc4b2a@changeid Signed-off-by: Johannes Berg --- net/mac80211/offchannel.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index be377ed12baa..071582dbe6a5 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -829,8 +829,13 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, if (!sdata->u.mgd.associated || (params->offchan && params->wait && local->ops->remain_on_channel && - memcmp(sdata->vif.cfg.ap_addr, mgmt->bssid, ETH_ALEN))) + memcmp(sdata->vif.cfg.ap_addr, mgmt->bssid, ETH_ALEN))) { need_offchan = true; + } else if (sdata->u.mgd.associated && + ether_addr_equal(sdata->vif.cfg.ap_addr, mgmt->da)) { + sta = sta_info_get_bss(sdata, mgmt->da); + mlo_sta = sta && sta->sta.mlo; + } break; case NL80211_IFTYPE_P2P_DEVICE: need_offchan = true; -- cgit v1.2.3 From 86a8db67a1330c203ae54cf25f1af08616e2e3c2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 27 Aug 2023 14:05:32 +0300 Subject: wifi: mac80211: fix channel switch link data Use the correct link ID and per-link puncturing data instead of hardcoding link ID 0 and using deflink puncturing. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230827135854.0b6a211c8e75.I5724d32bb2dae440888efbc47334d8c115db9d50@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 9 +++++---- net/mac80211/mlme.c | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 490ee6f52d6e..de8f76a7637b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3618,7 +3618,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) return ieee80211_link_use_reserved_context(&sdata->deflink); } - if (!cfg80211_chandef_identical(&sdata->vif.bss_conf.chandef, + if (!cfg80211_chandef_identical(&link_data->conf->chandef, &link_data->csa_chandef)) return -EINVAL; @@ -3634,7 +3634,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) changed |= BSS_CHANGED_EHT_PUNCTURING; } - ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed); + ieee80211_link_info_change_notify(sdata, link_data, changed); if (link_data->csa_block_tx) { ieee80211_wake_vif_queues(local, sdata, @@ -3646,8 +3646,9 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) if (err) return err; - cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chandef, 0, - sdata->vif.bss_conf.eht_puncturing); + cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chandef, + link_data->link_id, + link_data->conf->eht_puncturing); return 0; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f7dd25dc766c..f1d88393689c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1772,7 +1772,8 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) return; } - cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef, 0, 0); + cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef, + link->link_id, 0); } void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, -- cgit v1.2.3 From 762c8dc7f269b748babe32dd19d2084ce1b3f31f Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 11 Sep 2023 20:50:45 +0800 Subject: net: dst: remove unnecessary input parameter in dst_alloc and dst_init Since commit 1202cdd66531("Remove DECnet support from kernel") has been merged, all callers pass in the initial_ref value of 1 when they call dst_alloc(). Therefore, remove initial_ref when the dst_alloc() is declared and replace initial_ref with 1 in dst_alloc(). Also when all callers call dst_init(), the value of initial_ref is 1. Therefore, remove the input parameter initial_ref of the dst_init() and replace initial_ref with the value 1 in dst_init. Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20230911125045.346390-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- include/net/dst.h | 4 ++-- net/core/dst.c | 10 +++++----- net/ipv4/route.c | 6 +++--- net/ipv6/route.c | 4 ++-- net/openvswitch/actions.c | 4 ++-- net/sched/sch_frag.c | 4 ++-- net/xfrm/xfrm_policy.c | 2 +- 7 files changed, 17 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/dst.h b/include/net/dst.h index 78884429deed..f8b8599a0600 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -392,10 +392,10 @@ static inline int dst_discard(struct sk_buff *skb) { return dst_discard_out(&init_net, skb->sk, skb); } -void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, +void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_obsolete, unsigned short flags); void dst_init(struct dst_entry *dst, struct dst_ops *ops, - struct net_device *dev, int initial_ref, int initial_obsolete, + struct net_device *dev, int initial_obsolete, unsigned short flags); struct dst_entry *dst_destroy(struct dst_entry *dst); void dst_dev_put(struct dst_entry *dst); diff --git a/net/core/dst.c b/net/core/dst.c index 980e2fd2f013..6838d3212c37 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -45,7 +45,7 @@ const struct dst_metrics dst_default_metrics = { EXPORT_SYMBOL(dst_default_metrics); void dst_init(struct dst_entry *dst, struct dst_ops *ops, - struct net_device *dev, int initial_ref, int initial_obsolete, + struct net_device *dev, int initial_obsolete, unsigned short flags) { dst->dev = dev; @@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, dst->tclassid = 0; #endif dst->lwtstate = NULL; - rcuref_init(&dst->__rcuref, initial_ref); + rcuref_init(&dst->__rcuref, 1); INIT_LIST_HEAD(&dst->rt_uncached); dst->__use = 0; dst->lastuse = jiffies; @@ -77,7 +77,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, EXPORT_SYMBOL(dst_init); void *dst_alloc(struct dst_ops *ops, struct net_device *dev, - int initial_ref, int initial_obsolete, unsigned short flags) + int initial_obsolete, unsigned short flags) { struct dst_entry *dst; @@ -90,7 +90,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, if (!dst) return NULL; - dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags); + dst_init(dst, ops, dev, initial_obsolete, flags); return dst; } @@ -270,7 +270,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst, struct dst_entry *dst; dst = &md_dst->dst; - dst_init(dst, &dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, + dst_init(dst, &dst_blackhole_ops, NULL, DST_OBSOLETE_NONE, DST_METADATA | DST_NOCOUNT); memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); md_dst->type = type; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 66f419e7f9a7..fb3045692b99 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1630,7 +1630,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, { struct rtable *rt; - rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, + rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, (noxfrm ? DST_NOXFRM : 0)); if (rt) { @@ -1658,7 +1658,7 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) { struct rtable *new_rt; - new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, + new_rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, rt->dst.flags); if (new_rt) { @@ -2832,7 +2832,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or struct rtable *ort = (struct rtable *) dst_orig; struct rtable *rt; - rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); + rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0); if (rt) { struct dst_entry *new = &rt->dst; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9c687b357e6a..9d8dfc7423e4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -341,7 +341,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 1, DST_OBSOLETE_FORCE_CHK, flags); + DST_OBSOLETE_FORCE_CHK, flags); if (rt) { rt6_info_init(rt); @@ -2655,7 +2655,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori struct net_device *loopback_dev = net->loopback_dev; struct dst_entry *new = NULL; - rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, + rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, DST_OBSOLETE_DEAD, 0); if (rt) { rt6_info_init(rt); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index fd66014d8a76..5f8094acd056 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -873,7 +873,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, prepare_frag(vport, skb, orig_network_offset, ovs_key_mac_proto(key)); - dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_rt.dst.dev = vport->dev; @@ -890,7 +890,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, prepare_frag(vport, skb, orig_network_offset, ovs_key_mac_proto(key)); memset(&ovs_rt, 0, sizeof(ovs_rt)); - dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_rt.dst.dev = vport->dev; diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index a9bd0a235890..ce63414185fd 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -96,7 +96,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, unsigned long orig_dst; sch_frag_prepare_frag(skb, xmit); - dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); sch_frag_rt.dst.dev = skb->dev; @@ -112,7 +112,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, sch_frag_prepare_frag(skb, xmit); memset(&sch_frag_rt, 0, sizeof(sch_frag_rt)); - dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); sch_frag_rt.dst.dev = skb->dev; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d6b405782b63..c4c4fc29ccf5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2561,7 +2561,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) default: BUG(); } - xdst = dst_alloc(dst_ops, NULL, 1, DST_OBSOLETE_NONE, 0); + xdst = dst_alloc(dst_ops, NULL, DST_OBSOLETE_NONE, 0); if (likely(xdst)) { memset_after(xdst, 0, u.dst); -- cgit v1.2.3 From b49d252216e4f9e3030865b79d5ca16f050e4a19 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Sep 2023 17:05:28 +0000 Subject: tcp: no longer release socket ownership in tcp_release_cb() This partially reverts c3f9b01849ef ("tcp: tcp_release_cb() should release socket ownership"). prequeue has been removed by Florian in commit e7942d0633c4 ("tcp: remove prequeue support") __tcp_checksum_complete_user() being gone, we no longer have to release socket ownership in tcp_release_cb(). This is a prereq for third patch in the series ("net: call prot->release_cb() when processing backlog"). Signed-off-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/sock.c | 3 --- net/ipv4/tcp_output.c | 10 ---------- 2 files changed, 13 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 16584e2dd648..21610e3845a5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3519,9 +3519,6 @@ void release_sock(struct sock *sk) if (sk->sk_backlog.tail) __release_sock(sk); - /* Warning : release_cb() might need to release sk ownership, - * ie call sock_release_ownership(sk) before us. - */ if (sk->sk_prot->release_cb) sk->sk_prot->release_cb(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ccfc8bbf7455..b4cac12d0e63 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1101,16 +1101,6 @@ void tcp_release_cb(struct sock *sk) tcp_tsq_write(sk); __sock_put(sk); } - /* Here begins the tricky part : - * We are called from release_sock() with : - * 1) BH disabled - * 2) sk_lock.slock spinlock held - * 3) socket owned by us (sk->sk_lock.owned == 1) - * - * But following code is meant to be called from BH handlers, - * so we should keep BH disabled, but early release socket ownership - */ - sock_release_ownership(sk); if (flags & TCPF_WRITE_TIMER_DEFERRED) { tcp_write_timer_handler(sk); -- cgit v1.2.3 From 4505dc2a522826975167823f64f0896bac1323fb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Sep 2023 17:05:30 +0000 Subject: net: call prot->release_cb() when processing backlog __sk_flush_backlog() / sk_flush_backlog() are used when TCP recvmsg()/sendmsg() process large chunks, to not let packets in the backlog too long. It makes sense to call tcp_release_cb() to also process actions held in sk->sk_tsq_flags for smoother scheduling. Signed-off-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/sock.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 21610e3845a5..bb89b88bc1e8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3001,6 +3001,9 @@ void __sk_flush_backlog(struct sock *sk) { spin_lock_bh(&sk->sk_lock.slock); __release_sock(sk); + + if (sk->sk_prot->release_cb) + sk->sk_prot->release_cb(sk); spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL_GPL(__sk_flush_backlog); -- cgit v1.2.3 From 133c4c0d37175f510a10fa9bed51e223936073fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Sep 2023 17:05:31 +0000 Subject: tcp: defer regular ACK while processing socket backlog This idea came after a particular workload requested the quickack attribute set on routes, and a performance drop was noticed for large bulk transfers. For high throughput flows, it is best to use one cpu running the user thread issuing socket system calls, and a separate cpu to process incoming packets from BH context. (With TSO/GRO, bottleneck is usually the 'user' cpu) Problem is the user thread can spend a lot of time while holding the socket lock, forcing BH handler to queue most of incoming packets in the socket backlog. Whenever the user thread releases the socket lock, it must first process all accumulated packets in the backlog, potentially adding latency spikes. Due to flood mitigation, having too many packets in the backlog increases chance of unexpected drops. Backlog processing unfortunately shifts a fair amount of cpu cycles from the BH cpu to the 'user' cpu, thus reducing max throughput. This patch takes advantage of the backlog processing, and the fact that ACK are mostly cumulative. The idea is to detect we are in the backlog processing and defer all eligible ACK into a single one, sent from tcp_release_cb(). This saves cpu cycles on both sides, and network resources. Performance of a single TCP flow on a 200Gbit NIC: - Throughput is increased by 20% (100Gbit -> 120Gbit). - Number of generated ACK per second shrinks from 240,000 to 40,000. - Number of backlog drops per second shrinks from 230 to 0. Benchmark context: - Regular netperf TCP_STREAM (no zerocopy) - Intel(R) Xeon(R) Platinum 8481C (Saphire Rapids) - MAX_SKB_FRAGS = 17 (~60KB per GRO packet) This feature is guarded by a new sysctl, and enabled by default: /proc/sys/net/ipv4/tcp_backlog_ack_defer Signed-off-by: Eric Dumazet Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Dave Taht Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 7 +++++++ include/linux/tcp.h | 14 ++++++++------ include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 9 +++++++++ net/ipv4/tcp_input.c | 8 ++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 5 ++++- 7 files changed, 38 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index a66054d0763a..5bfa1837968c 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -745,6 +745,13 @@ tcp_comp_sack_nr - INTEGER Default : 44 +tcp_backlog_ack_defer - BOOLEAN + If set, user thread processing socket backlog tries sending + one ACK for the whole queue. This helps to avoid potential + long latencies at end of a TCP socket syscall. + + Default : true + tcp_slow_start_after_idle - BOOLEAN If set, provide RFC2861 behavior and time out the congestion window after an idle period. An idle period is defined at diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3c5efeeb024f..44d946161d4a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -463,15 +463,17 @@ enum tsq_enum { TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call * tcp_v{4|6}_mtu_reduced() */ + TCP_ACK_DEFERRED, /* TX pure ack is deferred */ }; enum tsq_flags { - TSQF_THROTTLED = (1UL << TSQ_THROTTLED), - TSQF_QUEUED = (1UL << TSQ_QUEUED), - TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), - TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), - TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), - TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), + TSQF_THROTTLED = BIT(TSQ_THROTTLED), + TSQF_QUEUED = BIT(TSQ_QUEUED), + TCPF_TSQ_DEFERRED = BIT(TCP_TSQ_DEFERRED), + TCPF_WRITE_TIMER_DEFERRED = BIT(TCP_WRITE_TIMER_DEFERRED), + TCPF_DELACK_TIMER_DEFERRED = BIT(TCP_DELACK_TIMER_DEFERRED), + TCPF_MTU_REDUCED_DEFERRED = BIT(TCP_MTU_REDUCED_DEFERRED), + TCPF_ACK_DEFERRED = BIT(TCP_ACK_DEFERRED), }; #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7a41c4791536..d96d05b08819 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -132,6 +132,7 @@ struct netns_ipv4 { u8 sysctl_tcp_syncookies; u8 sysctl_tcp_migrate_req; u8 sysctl_tcp_comp_sack_nr; + u8 sysctl_tcp_backlog_ack_defer; int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6ac890b4073f..e7f024d93572 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1366,6 +1366,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, }, + { + .procname = "tcp_backlog_ack_defer", + .data = &init_net.ipv4.sysctl_tcp_backlog_ack_defer, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, { .procname = "tcp_reflect_tos", .data = &init_net.ipv4.sysctl_tcp_reflect_tos, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 06fe1cf645d5..41b471748437 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5553,6 +5553,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) { + /* If we are running from __release_sock() in user context, + * Defer the ack until tcp_release_cb(). + */ + if (sock_owned_by_user_nocheck(sk) && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_backlog_ack_defer)) { + set_bit(TCP_ACK_DEFERRED, &sk->sk_tsq_flags); + return; + } send_now: tcp_send_ack(sk); return; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 27140e5cdc06..f13eb7e23d03 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3263,6 +3263,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC; net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC; net->ipv4.sysctl_tcp_comp_sack_nr = 44; + net->ipv4.sysctl_tcp_backlog_ack_defer = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; atomic_set(&net->ipv4.tfo_active_disable_times, 0); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4cac12d0e63..1fc1f879cfd6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1077,7 +1077,8 @@ static void tcp_tasklet_func(struct tasklet_struct *t) #define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ TCPF_WRITE_TIMER_DEFERRED | \ TCPF_DELACK_TIMER_DEFERRED | \ - TCPF_MTU_REDUCED_DEFERRED) + TCPF_MTU_REDUCED_DEFERRED | \ + TCPF_ACK_DEFERRED) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -1114,6 +1115,8 @@ void tcp_release_cb(struct sock *sk) inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } + if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk)) + tcp_send_ack(sk); } EXPORT_SYMBOL(tcp_release_cb); -- cgit v1.2.3 From 22446b7ee2bb44fe7a61d8eda6d83bdc726bbbd9 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 12 Sep 2023 06:55:16 +0300 Subject: wifi: wext: avoid extra calls to strlen() in ieee80211_bss() Since 'sprintf()' returns the number of characters emitted, an extra calls to 'strlen()' in 'ieee80211_bss()' may be dropped. Signed-off-by: Dmitry Antipov Link: https://lore.kernel.org/r/20230912035522.15947-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg --- net/wireless/scan.c | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 19516073c6d5..ae4d000009fe 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -3422,59 +3422,63 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, cfg = (u8 *)ie + 2; memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; - sprintf(buf, "Mesh Network Path Selection Protocol ID: " - "0x%02X", cfg[0]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Mesh Network Path Selection Protocol ID: 0x%02X", + cfg[0]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; - sprintf(buf, "Path Selection Metric ID: 0x%02X", - cfg[1]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Path Selection Metric ID: 0x%02X", + cfg[1]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; - sprintf(buf, "Congestion Control Mode ID: 0x%02X", - cfg[2]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Congestion Control Mode ID: 0x%02X", + cfg[2]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; - sprintf(buf, "Synchronization ID: 0x%02X", cfg[3]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Synchronization ID: 0x%02X", + cfg[3]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; - sprintf(buf, "Authentication ID: 0x%02X", cfg[4]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Authentication ID: 0x%02X", + cfg[4]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; - sprintf(buf, "Formation Info: 0x%02X", cfg[5]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Formation Info: 0x%02X", + cfg[5]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; - sprintf(buf, "Capabilities: 0x%02X", cfg[6]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Capabilities: 0x%02X", + cfg[6]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, @@ -3530,17 +3534,16 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; - sprintf(buf, "tsf=%016llx", (unsigned long long)(ies->tsf)); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, "tsf=%016llx", + (unsigned long long)(ies->tsf)); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; - sprintf(buf, " Last beacon: %ums ago", - elapsed_jiffies_msecs(bss->ts)); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, " Last beacon: %ums ago", + elapsed_jiffies_msecs(bss->ts)); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) -- cgit v1.2.3 From 5add321c329b1746589b51359259666ca3dbe219 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 29 Aug 2023 12:17:43 +0200 Subject: wifi: cfg80211: remove scan_width support There really isn't any support for scanning at different channel widths than 20 MHz since there's no way to set it. Remove this support for now, if somebody wants to maintain this whole thing later we can revisit how it should work. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/wil6210/wmi.c | 2 - .../broadcom/brcm80211/brcmfmac/cfg80211.c | 1 - include/net/cfg80211.h | 63 +------------------ include/uapi/linux/nl80211.h | 2 +- net/mac80211/ibss.c | 30 ++------- net/mac80211/ieee80211_i.h | 3 +- net/mac80211/ocb.c | 5 +- net/mac80211/scan.c | 71 +++++----------------- net/wireless/mesh.c | 5 +- net/wireless/nl80211.c | 1 - net/wireless/scan.c | 23 +------ net/wireless/trace.h | 6 +- net/wireless/util.c | 14 ++--- 13 files changed, 35 insertions(+), 191 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 6a5976a2944c..6fdb77d4c59e 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -870,7 +870,6 @@ static void wmi_evt_rx_mgmt(struct wil6210_vif *vif, int id, void *d, int len) struct cfg80211_bss *bss; struct cfg80211_inform_bss bss_data = { .chan = channel, - .scan_width = NL80211_BSS_CHAN_WIDTH_20, .signal = signal, .boottime_ns = ktime_to_ns(ktime_get_boottime()), }; @@ -1389,7 +1388,6 @@ wmi_evt_sched_scan_result(struct wil6210_vif *vif, int id, void *d, int len) u32 d_len; struct cfg80211_bss *bss; struct cfg80211_inform_bss bss_data = { - .scan_width = NL80211_BSS_CHAN_WIDTH_20, .boottime_ns = ktime_to_ns(ktime_get_boottime()), }; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 2a90bb24ba77..94b4a7b8793d 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -3367,7 +3367,6 @@ static s32 brcmf_inform_single_bss(struct brcmf_cfg80211_info *cfg, freq = ieee80211_channel_to_frequency(channel, band); bss_data.chan = ieee80211_get_channel(wiphy, freq); - bss_data.scan_width = NL80211_BSS_CHAN_WIDTH_20; bss_data.boottime_ns = ktime_to_ns(ktime_get_boottime()); notify_capability = le16_to_cpu(bi->capability); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 134d9e0b73c9..2d3fa4a29781 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2536,7 +2536,6 @@ struct cfg80211_scan_6ghz_params { * @n_ssids: number of SSIDs * @channels: channels to scan on. * @n_channels: total number of channels to scan - * @scan_width: channel width for scanning * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets * @duration: how long to listen on each channel, in TUs. If @@ -2566,7 +2565,6 @@ struct cfg80211_scan_request { struct cfg80211_ssid *ssids; int n_ssids; u32 n_channels; - enum nl80211_bss_scan_width scan_width; const u8 *ie; size_t ie_len; u16 duration; @@ -2661,7 +2659,6 @@ struct cfg80211_bss_select_adjust { * @ssids: SSIDs to scan for (passed in the probe_reqs in active scans) * @n_ssids: number of SSIDs * @n_channels: total number of channels to scan - * @scan_width: channel width for scanning * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets * @flags: control flags from &enum nl80211_scan_flags @@ -2709,7 +2706,6 @@ struct cfg80211_sched_scan_request { struct cfg80211_ssid *ssids; int n_ssids; u32 n_channels; - enum nl80211_bss_scan_width scan_width; const u8 *ie; size_t ie_len; u32 flags; @@ -2757,7 +2753,6 @@ enum cfg80211_signal_type { /** * struct cfg80211_inform_bss - BSS inform data * @chan: channel the frame was received on - * @scan_width: scan width that was used * @signal: signal strength value, according to the wiphy's * signal type * @boottime_ns: timestamp (CLOCK_BOOTTIME) when the information was @@ -2777,7 +2772,6 @@ enum cfg80211_signal_type { */ struct cfg80211_inform_bss { struct ieee80211_channel *chan; - enum nl80211_bss_scan_width scan_width; s32 signal; u64 boottime_ns; u64 parent_tsf; @@ -2811,7 +2805,6 @@ struct cfg80211_bss_ies { * for use in scan results and similar. * * @channel: channel this BSS is on - * @scan_width: width of the control channel * @bssid: BSSID of the BSS * @beacon_interval: the beacon interval as from the frame * @capability: the capability field in host byte order @@ -2841,7 +2834,6 @@ struct cfg80211_bss_ies { */ struct cfg80211_bss { struct ieee80211_channel *channel; - enum nl80211_bss_scan_width scan_width; const struct cfg80211_bss_ies __rcu *ies; const struct cfg80211_bss_ies __rcu *beacon_ies; @@ -6321,13 +6313,11 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, /** * ieee80211_mandatory_rates - get mandatory rates for a given band * @sband: the band to look for rates in - * @scan_width: width of the control channel * * This function returns a bitmap of the mandatory rates for the given * band, bits are set according to the rate position in the bitrates array. */ -u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband, - enum nl80211_bss_scan_width scan_width); +u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband); /* * Radiotap parsing functions -- for controlled injection support @@ -6988,22 +6978,6 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, struct ieee80211_mgmt *mgmt, size_t len, gfp_t gfp); -static inline struct cfg80211_bss * __must_check -cfg80211_inform_bss_width_frame(struct wiphy *wiphy, - struct ieee80211_channel *rx_channel, - enum nl80211_bss_scan_width scan_width, - struct ieee80211_mgmt *mgmt, size_t len, - s32 signal, gfp_t gfp) -{ - struct cfg80211_inform_bss data = { - .chan = rx_channel, - .scan_width = scan_width, - .signal = signal, - }; - - return cfg80211_inform_bss_frame_data(wiphy, &data, mgmt, len, gfp); -} - static inline struct cfg80211_bss * __must_check cfg80211_inform_bss_frame(struct wiphy *wiphy, struct ieee80211_channel *rx_channel, @@ -7012,7 +6986,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, { struct cfg80211_inform_bss data = { .chan = rx_channel, - .scan_width = NL80211_BSS_CHAN_WIDTH_20, .signal = signal, }; @@ -7114,26 +7087,6 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, u16 beacon_interval, const u8 *ie, size_t ielen, gfp_t gfp); -static inline struct cfg80211_bss * __must_check -cfg80211_inform_bss_width(struct wiphy *wiphy, - struct ieee80211_channel *rx_channel, - enum nl80211_bss_scan_width scan_width, - enum cfg80211_bss_frame_type ftype, - const u8 *bssid, u64 tsf, u16 capability, - u16 beacon_interval, const u8 *ie, size_t ielen, - s32 signal, gfp_t gfp) -{ - struct cfg80211_inform_bss data = { - .chan = rx_channel, - .scan_width = scan_width, - .signal = signal, - }; - - return cfg80211_inform_bss_data(wiphy, &data, ftype, bssid, tsf, - capability, beacon_interval, ie, ielen, - gfp); -} - static inline struct cfg80211_bss * __must_check cfg80211_inform_bss(struct wiphy *wiphy, struct ieee80211_channel *rx_channel, @@ -7144,7 +7097,6 @@ cfg80211_inform_bss(struct wiphy *wiphy, { struct cfg80211_inform_bss data = { .chan = rx_channel, - .scan_width = NL80211_BSS_CHAN_WIDTH_20, .signal = signal, }; @@ -7229,19 +7181,6 @@ void cfg80211_bss_iter(struct wiphy *wiphy, void *data), void *iter_data); -static inline enum nl80211_bss_scan_width -cfg80211_chandef_to_scan_width(const struct cfg80211_chan_def *chandef) -{ - switch (chandef->width) { - case NL80211_CHAN_WIDTH_5: - return NL80211_BSS_CHAN_WIDTH_5; - case NL80211_CHAN_WIDTH_10: - return NL80211_BSS_CHAN_WIDTH_10; - default: - return NL80211_BSS_CHAN_WIDTH_20; - } -} - /** * cfg80211_rx_mlme_mgmt - notification of processed MLME management frame * @dev: network device diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 88eb85c63029..b628126e06fa 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5038,7 +5038,7 @@ enum nl80211_bss_scan_width { * elements from a Beacon frame (bin); not present if no Beacon frame has * yet been received * @NL80211_BSS_CHAN_WIDTH: channel width of the control channel - * (u32, enum nl80211_bss_scan_width) + * (u32, enum nl80211_bss_scan_width) - No longer used! * @NL80211_BSS_BEACON_TSF: TSF of the last received beacon (u64) * (not present if no beacon frame has been received yet) * @NL80211_BSS_PRESP_DATA: the data in @NL80211_BSS_INFORMATION_ELEMENTS and diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 9907cea6457c..55ec34602b53 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -377,7 +377,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); bss_meta.chan = chan; - bss_meta.scan_width = cfg80211_chandef_to_scan_width(&chandef); bss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta, mgmt, presp->head_len, GFP_KERNEL); @@ -595,7 +594,6 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_supported_band *sband; - enum nl80211_bss_scan_width scan_width; int band; /* @@ -624,7 +622,6 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, if (WARN_ON_ONCE(!chanctx_conf)) return NULL; band = chanctx_conf->def.chan->band; - scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def); rcu_read_unlock(); sta = sta_info_alloc(sdata, addr, GFP_KERNEL); @@ -636,7 +633,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; sta->sta.deflink.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(sband, scan_width); + ieee80211_mandatory_rates(sband); return ieee80211_ibss_finish_sta(sta); } @@ -975,7 +972,6 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, { struct sta_info *sta; enum nl80211_band band = rx_status->band; - enum nl80211_bss_scan_width scan_width; struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; bool rates_updated = false; @@ -1001,15 +997,9 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, u32 prev_rates; prev_rates = sta->sta.deflink.supp_rates[band]; - /* make sure mandatory rates are always added */ - scan_width = NL80211_BSS_CHAN_WIDTH_20; - if (rx_status->bw == RATE_INFO_BW_5) - scan_width = NL80211_BSS_CHAN_WIDTH_5; - else if (rx_status->bw == RATE_INFO_BW_10) - scan_width = NL80211_BSS_CHAN_WIDTH_10; sta->sta.deflink.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(sband, scan_width); + ieee80211_mandatory_rates(sband); if (sta->sta.deflink.supp_rates[band] != prev_rates) { ibss_dbg(sdata, "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", @@ -1196,7 +1186,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_supported_band *sband; - enum nl80211_bss_scan_width scan_width; int band; /* @@ -1222,7 +1211,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, return; } band = chanctx_conf->def.chan->band; - scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def); rcu_read_unlock(); sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); @@ -1232,7 +1220,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; sta->sta.deflink.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(sband, scan_width); + ieee80211_mandatory_rates(sband); spin_lock(&ifibss->incomplete_lock); list_add(&sta->list, &ifibss->incomplete_stations); @@ -1282,7 +1270,6 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - enum nl80211_bss_scan_width scan_width; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -1304,9 +1291,8 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) sdata_info(sdata, "No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n"); - scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef); ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len, - NULL, 0, scan_width); + NULL, 0); } static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) @@ -1424,7 +1410,6 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) struct cfg80211_bss *cbss; struct ieee80211_channel *chan = NULL; const u8 *bssid = NULL; - enum nl80211_bss_scan_width scan_width; int active_ibss; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -1483,8 +1468,6 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); - scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef); - if (ifibss->fixed_channel) { num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy, &ifibss->chandef, @@ -1492,11 +1475,10 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) ARRAY_SIZE(channels)); ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len, channels, - num, scan_width); + num); } else { ieee80211_request_ibss_scan(sdata, ifibss->ssid, - ifibss->ssid_len, NULL, - 0, scan_width); + ifibss->ssid_len, NULL, 0); } } else { int interval = IEEE80211_SCAN_INTERVAL; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e7dc4cdcdcde..e443a8e5e9be 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1911,8 +1911,7 @@ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work); int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, const u8 *ssid, u8 ssid_len, struct ieee80211_channel **channels, - unsigned int n_channels, - enum nl80211_bss_scan_width scan_width); + unsigned int n_channels); int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req); void ieee80211_scan_cancel(struct ieee80211_local *local); diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index 6e2965ffb809..449af4e1cca4 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -44,7 +44,6 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_supported_band *sband; - enum nl80211_bss_scan_width scan_width; struct sta_info *sta; int band; @@ -66,7 +65,6 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, return; } band = chanctx_conf->def.chan->band; - scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def); rcu_read_unlock(); sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); @@ -75,8 +73,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, /* Add only mandatory rates for now */ sband = local->hw.wiphy->bands[band]; - sta->sta.deflink.supp_rates[band] = - ieee80211_mandatory_rates(sband, scan_width); + sta->sta.deflink.supp_rates[band] = ieee80211_mandatory_rates(sband); spin_lock(&ifocb->incomplete_lock); list_add(&sta->list, &ifocb->incomplete_stations); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 58d525e41f6b..24fa06105378 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -187,12 +187,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local, else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC)) bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal; - bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_20; - if (rx_status->bw == RATE_INFO_BW_5) - bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_5; - else if (rx_status->bw == RATE_INFO_BW_10) - bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10; - bss_meta.chan = channel; rcu_read_lock(); @@ -315,22 +309,11 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) ieee80211_rx_bss_put(local, bss); } -static void -ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef, - enum nl80211_bss_scan_width scan_width) +static void ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef) { memset(chandef, 0, sizeof(*chandef)); - switch (scan_width) { - case NL80211_BSS_CHAN_WIDTH_5: - chandef->width = NL80211_CHAN_WIDTH_5; - break; - case NL80211_BSS_CHAN_WIDTH_10: - chandef->width = NL80211_CHAN_WIDTH_10; - break; - default: - chandef->width = NL80211_CHAN_WIDTH_20_NOHT; - break; - } + + chandef->width = NL80211_CHAN_WIDTH_20_NOHT; } /* return false if no more work */ @@ -378,7 +361,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata) } local->hw_scan_req->req.n_channels = n_chans; - ieee80211_prepare_scan_chandef(&chandef, req->scan_width); + ieee80211_prepare_scan_chandef(&chandef); if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT) flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT; @@ -919,7 +902,6 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, { int skip; struct ieee80211_channel *chan; - enum nl80211_bss_scan_width oper_scan_width; struct cfg80211_scan_request *scan_req; scan_req = rcu_dereference_protected(local->scan_req, @@ -933,42 +915,21 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, local->scan_chandef.freq1_offset = chan->freq_offset; local->scan_chandef.center_freq2 = 0; - /* For scanning on the S1G band, ignore scan_width (which is constant - * across all channels) for now since channel width is specific to each - * channel. Detect the required channel width here and likely revisit - * later. Maybe scan_width could be used to build the channel scan list? + /* For scanning on the S1G band, detect the channel width according to + * the channel being scanned. */ if (chan->band == NL80211_BAND_S1GHZ) { local->scan_chandef.width = ieee80211_s1g_channel_width(chan); goto set_channel; } - switch (scan_req->scan_width) { - case NL80211_BSS_CHAN_WIDTH_5: - local->scan_chandef.width = NL80211_CHAN_WIDTH_5; - break; - case NL80211_BSS_CHAN_WIDTH_10: - local->scan_chandef.width = NL80211_CHAN_WIDTH_10; - break; - default: - case NL80211_BSS_CHAN_WIDTH_20: - /* If scanning on oper channel, use whatever channel-type - * is currently in use. - */ - oper_scan_width = cfg80211_chandef_to_scan_width( - &local->_oper_chandef); - if (chan == local->_oper_chandef.chan && - oper_scan_width == scan_req->scan_width) - local->scan_chandef = local->_oper_chandef; - else - local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT; - break; - case NL80211_BSS_CHAN_WIDTH_1: - case NL80211_BSS_CHAN_WIDTH_2: - /* shouldn't get here, S1G handled above */ - WARN_ON(1); - break; - } + /* If scanning on oper channel, use whatever channel-type + * is currently in use. + */ + if (chan == local->_oper_chandef.chan) + local->scan_chandef = local->_oper_chandef; + else + local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT; set_channel: if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) @@ -1152,8 +1113,7 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, const u8 *ssid, u8 ssid_len, struct ieee80211_channel **channels, - unsigned int n_channels, - enum nl80211_bss_scan_width scan_width) + unsigned int n_channels) { struct ieee80211_local *local = sdata->local; int ret = -EBUSY, i, n_ch = 0; @@ -1210,7 +1170,6 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, local->int_scan_req->ssids = &local->scan_ssid; local->int_scan_req->n_ssids = 1; - local->int_scan_req->scan_width = scan_width; memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN); local->int_scan_req->ssids[0].ssid_len = ssid_len; @@ -1311,7 +1270,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, goto out; } - ieee80211_prepare_scan_chandef(&chandef, req->scan_width); + ieee80211_prepare_scan_chandef(&chandef); ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz, &sched_scan_ies, req->ie, diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index dc75abdb8f2e..83306979fbe2 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -172,7 +172,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, * basic rates */ if (!setup->basic_rates) { - enum nl80211_bss_scan_width scan_width; struct ieee80211_supported_band *sband = rdev->wiphy.bands[setup->chandef.chan->band]; @@ -193,9 +192,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, } } } else { - scan_width = cfg80211_chandef_to_scan_width(&setup->chandef); - setup->basic_rates = ieee80211_mandatory_rates(sband, - scan_width); + setup->basic_rates = ieee80211_mandatory_rates(sband); } } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ab0aea7dca7d..f2dd4c85a10f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10283,7 +10283,6 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) || nla_put_u32(msg, NL80211_BSS_FREQUENCY_OFFSET, res->channel->freq_offset) || - nla_put_u32(msg, NL80211_BSS_CHAN_WIDTH, res->scan_width) || nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO, jiffies_to_msecs(jiffies - intbss->ts))) goto nla_put_failure; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index ae4d000009fe..a5758edf53b8 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1638,8 +1638,6 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, continue; if (bss->pub.channel != new->pub.channel) continue; - if (bss->pub.scan_width != new->pub.scan_width) - continue; if (rcu_access_pointer(bss->pub.beacon_ies)) continue; ies = rcu_access_pointer(bss->pub.ies); @@ -1936,8 +1934,7 @@ EXPORT_SYMBOL(cfg80211_get_ies_channel_number); */ static struct ieee80211_channel * cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, - struct ieee80211_channel *channel, - enum nl80211_bss_scan_width scan_width) + struct ieee80211_channel *channel) { u32 freq; int channel_number; @@ -1977,16 +1974,6 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, return channel; } - if (scan_width == NL80211_BSS_CHAN_WIDTH_10 || - scan_width == NL80211_BSS_CHAN_WIDTH_5) { - /* - * Ignore channel number in 5 and 10 MHz channels where there - * may not be an n:1 or 1:n mapping between frequencies and - * channel numbers. - */ - return channel; - } - /* * Use the channel determined through the payload channel number * instead of the RX channel reported by the driver. @@ -2046,14 +2033,12 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, channel = data->channel; if (!channel) channel = cfg80211_get_bss_channel(wiphy, data->ie, data->ielen, - drv_data->chan, - drv_data->scan_width); + drv_data->chan); if (!channel) return NULL; memcpy(tmp.pub.bssid, data->bssid, ETH_ALEN); tmp.pub.channel = channel; - tmp.pub.scan_width = drv_data->scan_width; if (data->bss_source != BSS_SOURCE_STA_PROFILE) tmp.pub.signal = drv_data->signal; else @@ -2814,8 +2799,7 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, variable = ext->u.s1g_beacon.variable; } - channel = cfg80211_get_bss_channel(wiphy, variable, - ielen, data->chan, data->scan_width); + channel = cfg80211_get_bss_channel(wiphy, variable, ielen, data->chan); if (!channel) return NULL; @@ -2868,7 +2852,6 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, tmp.pub.beacon_interval = beacon_int; tmp.pub.capability = capability; tmp.pub.channel = channel; - tmp.pub.scan_width = data->scan_width; tmp.pub.signal = data->signal; tmp.ts_boottime = data->boottime_ns; tmp.parent_tsf = data->parent_tsf; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 617c0d0dfa96..126c3a03e43e 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3590,7 +3590,6 @@ TRACE_EVENT(cfg80211_inform_bss_frame, TP_STRUCT__entry( WIPHY_ENTRY CHAN_ENTRY - __field(enum nl80211_bss_scan_width, scan_width) __dynamic_array(u8, mgmt, len) __field(s32, signal) __field(u64, ts_boottime) @@ -3600,7 +3599,6 @@ TRACE_EVENT(cfg80211_inform_bss_frame, TP_fast_assign( WIPHY_ASSIGN; CHAN_ASSIGN(data->chan); - __entry->scan_width = data->scan_width; if (mgmt) memcpy(__get_dynamic_array(mgmt), mgmt, len); __entry->signal = data->signal; @@ -3609,8 +3607,8 @@ TRACE_EVENT(cfg80211_inform_bss_frame, MAC_ASSIGN(parent_bssid, data->parent_bssid); ), TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT - "(scan_width: %d) signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: %pM", - WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width, + "signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: %pM", + WIPHY_PR_ARG, CHAN_PR_ARG, __entry->signal, (unsigned long long)__entry->ts_boottime, (unsigned long long)__entry->parent_tsf, __entry->parent_bssid) diff --git a/net/wireless/util.c b/net/wireless/util.c index 56cbd9979a3f..213c9405e645 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -43,8 +43,7 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, } EXPORT_SYMBOL(ieee80211_get_response_rate); -u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband, - enum nl80211_bss_scan_width scan_width) +u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband) { struct ieee80211_rate *bitrates; u32 mandatory_rates = 0; @@ -54,15 +53,10 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband, if (WARN_ON(!sband)) return 1; - if (sband->band == NL80211_BAND_2GHZ) { - if (scan_width == NL80211_BSS_CHAN_WIDTH_5 || - scan_width == NL80211_BSS_CHAN_WIDTH_10) - mandatory_flag = IEEE80211_RATE_MANDATORY_G; - else - mandatory_flag = IEEE80211_RATE_MANDATORY_B; - } else { + if (sband->band == NL80211_BAND_2GHZ) + mandatory_flag = IEEE80211_RATE_MANDATORY_B; + else mandatory_flag = IEEE80211_RATE_MANDATORY_A; - } bitrates = sband->bitrates; for (i = 0; i < sband->n_bitrates; i++) -- cgit v1.2.3 From 2400dfe23fa91612c18c6c8d8a5b8164ff98836c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 29 Aug 2023 12:17:44 +0200 Subject: wifi: mac80211: remove shifted rate support We really cannot even get into this as we can't have a BSS with a 5/10 MHz (scan) width, and therefore all the code handling shifted rates cannot happen. Remove it all, since it's broken anyway, at least with MLO. Signed-off-by: Johannes Berg --- net/mac80211/airtime.c | 8 ++-- net/mac80211/cfg.c | 8 +--- net/mac80211/ibss.c | 13 ++----- net/mac80211/ieee80211_i.h | 39 +------------------ net/mac80211/mlme.c | 18 +++------ net/mac80211/rc80211_minstrel_ht.c | 7 ++-- net/mac80211/status.c | 12 +++--- net/mac80211/tx.c | 20 ++++------ net/mac80211/util.c | 79 +++++++++++--------------------------- 9 files changed, 53 insertions(+), 151 deletions(-) (limited to 'net') diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index e8ebd343e2bf..14be7b526f1d 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -632,7 +632,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, { struct ieee80211_supported_band *sband; struct ieee80211_chanctx_conf *conf; - int rateidx, shift = 0; + int rateidx; bool cck, short_pream; u32 basic_rates; u8 band = 0; @@ -641,10 +641,8 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, len += 38; /* Ethernet header length */ conf = rcu_dereference(vif->bss_conf.chanctx_conf); - if (conf) { + if (conf) band = conf->def.chan->band; - shift = ieee80211_chandef_get_shift(&conf->def); - } if (pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, @@ -704,7 +702,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, short_pream = vif->bss_conf.use_short_preamble; rateidx = basic_rates ? ffs(basic_rates) - 1 : 0; - rate = sband->bitrates[rateidx].bitrate << shift; + rate = sband->bitrates[rateidx].bitrate; cck = sband->bitrates[rateidx].flags & IEEE80211_RATE_MANDATORY_B; return ieee80211_calc_legacy_rate_duration(rate, short_pream, cck, len); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index de8f76a7637b..4f30e80192e7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -810,15 +810,11 @@ void sta_set_rate_info_tx(struct sta_info *sta, rinfo->nss = ieee80211_rate_get_vht_nss(rate); } else { struct ieee80211_supported_band *sband; - int shift = ieee80211_vif_get_shift(&sta->sdata->vif); - u16 brate; sband = ieee80211_get_sband(sta->sdata); WARN_ON_ONCE(sband && !sband->bitrates); - if (sband && sband->bitrates) { - brate = sband->bitrates[rate->idx].bitrate; - rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); - } + if (sband && sband->bitrates) + rinfo->legacy = sband->bitrates[rate->idx].bitrate; } if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_40; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 55ec34602b53..a7736acadf3c 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -51,7 +51,6 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, u32 rate_flags, rates = 0, rates_added = 0; struct beacon_data *presp; int frame_len; - int shift; /* Build IBSS probe response */ frame_len = sizeof(struct ieee80211_hdr_3addr) + @@ -92,7 +91,6 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[chandef->chan->band]; rate_flags = ieee80211_chandef_rate_flags(chandef); - shift = ieee80211_chandef_get_shift(chandef); rates_n = 0; if (have_higher_than_11mbit) *have_higher_than_11mbit = false; @@ -111,8 +109,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, *pos++ = WLAN_EID_SUPP_RATES; *pos++ = min_t(int, 8, rates_n); for (ri = 0; ri < sband->n_bitrates; ri++) { - int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate, - 5 * (1 << shift)); + int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate, 5); u8 basic = 0; if (!(rates & BIT(ri))) continue; @@ -155,8 +152,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, *pos++ = WLAN_EID_EXT_SUPP_RATES; *pos++ = rates_n - 8; for (; ri < sband->n_bitrates; ri++) { - int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate, - 5 * (1 << shift)); + int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate, 5); u8 basic = 0; if (!(rates & BIT(ri))) continue; @@ -399,7 +395,6 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, enum nl80211_channel_type chan_type; u64 tsf; u32 rate_flags; - int shift; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -434,7 +429,6 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sband = sdata->local->hw.wiphy->bands[cbss->channel->band]; rate_flags = ieee80211_chandef_rate_flags(&sdata->u.ibss.chandef); - shift = ieee80211_vif_get_shift(&sdata->vif); basic_rates = 0; @@ -448,8 +442,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, != rate_flags) continue; - brate = DIV_ROUND_UP(sband->bitrates[j].bitrate, - 5 * (1 << shift)); + brate = DIV_ROUND_UP(sband->bitrates[j].bitrate, 5); if (brate == rate) { if (is_basic) basic_rates |= BIT(j); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e443a8e5e9be..d5c5f865323c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1141,40 +1141,6 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) #define sdata_dereference(p, sdata) \ wiphy_dereference(sdata->local->hw.wiphy, p) -static inline int -ieee80211_chanwidth_get_shift(enum nl80211_chan_width width) -{ - switch (width) { - case NL80211_CHAN_WIDTH_5: - return 2; - case NL80211_CHAN_WIDTH_10: - return 1; - default: - return 0; - } -} - -static inline int -ieee80211_chandef_get_shift(struct cfg80211_chan_def *chandef) -{ - return ieee80211_chanwidth_get_shift(chandef->width); -} - -static inline int -ieee80211_vif_get_shift(struct ieee80211_vif *vif) -{ - struct ieee80211_chanctx_conf *chanctx_conf; - int shift = 0; - - rcu_read_lock(); - chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf); - if (chanctx_conf) - shift = ieee80211_chandef_get_shift(&chanctx_conf->def); - rcu_read_unlock(); - - return shift; -} - static inline int ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems, struct cfg80211_rnr_elems *rnr_elems, @@ -2041,7 +2007,7 @@ struct sk_buff * ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 info_flags); void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, - int retry_count, int shift, bool send_to_cooked, + int retry_count, bool send_to_cooked, struct ieee80211_tx_status *status); void ieee80211_check_fast_xmit(struct sta_info *sta); @@ -2214,8 +2180,7 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw) /* utility functions/constants */ extern const void *const mac80211_wiphy_privid; /* for wiphy privid */ int ieee80211_frame_duration(enum nl80211_band band, size_t len, - int rate, int erp, int short_preamble, - int shift); + int rate, int erp, int short_preamble); void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_queue_params *qparam, int ac); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f1d88393689c..e8f16ed235c3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -829,7 +829,6 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb, struct ieee80211_supported_band *sband, struct ieee80211_mgd_assoc_data *assoc_data) { - unsigned int shift = ieee80211_chanwidth_get_shift(width); unsigned int rates_len, supp_rates_len; u32 rates = 0; int i, count; @@ -868,8 +867,7 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb, count = 0; for (i = 0; i < sband->n_bitrates; i++) { if (BIT(i) & rates) { - int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, - 5 * (1 << shift)); + int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); *pos++ = (u8)rate; if (++count == 8) break; @@ -885,8 +883,7 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb, if (BIT(i) & rates) { int rate; - rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, - 5 * (1 << shift)); + rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); *pos++ = (u8)rate; } } @@ -3857,8 +3854,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, u8 *supp_rates, unsigned int supp_rates_len, u32 *rates, u32 *basic_rates, bool *have_higher_than_11mbit, - int *min_rate, int *min_rate_index, - int shift) + int *min_rate, int *min_rate_index) { int i, j; @@ -3866,7 +3862,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, int rate = supp_rates[i] & 0x7f; bool is_basic = !!(supp_rates[i] & 0x80); - if ((rate * 5 * (1 << shift)) > 110) + if ((rate * 5) > 110) *have_higher_than_11mbit = true; /* @@ -3890,7 +3886,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, br = &sband->bitrates[j]; - brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5); + brate = DIV_ROUND_UP(br->bitrate, 5); if (brate == rate) { *rates |= BIT(j); if (is_basic) @@ -4334,8 +4330,6 @@ static int ieee80211_mgd_setup_link_sta(struct ieee80211_link_data *link, u32 rates = 0, basic_rates = 0; bool have_higher_than_11mbit = false; int min_rate = INT_MAX, min_rate_index = -1; - /* this is clearly wrong for MLO but we'll just remove it later */ - int shift = ieee80211_vif_get_shift(&sdata->vif); struct ieee80211_supported_band *sband; memcpy(link_sta->addr, cbss->bssid, ETH_ALEN); @@ -4351,7 +4345,7 @@ static int ieee80211_mgd_setup_link_sta(struct ieee80211_link_data *link, ieee80211_get_rates(sband, bss->supp_rates, bss->supp_rates_len, &rates, &basic_rates, &have_higher_than_11mbit, - &min_rate, &min_rate_index, shift); + &min_rate, &min_rate_index); /* * This used to be a workaround for basic rates missing diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index b34c80522047..6bf3b4444a43 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1725,16 +1725,15 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, mi->band = sband->band; mi->last_stats_update = jiffies; - ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1, 0); - mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1, 0); + ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1); + mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1); mi->overhead += ack_dur; mi->overhead_rtscts = mi->overhead + 2 * ack_dur; ctl_rate = &sband->bitrates[rate_lowest_index(sband, sta)]; erp = ctl_rate->flags & IEEE80211_RATE_ERP_G; ack_dur = ieee80211_frame_duration(sband->band, 10, - ctl_rate->bitrate, erp, 1, - ieee80211_chandef_get_shift(chandef)); + ctl_rate->bitrate, erp, 1); mi->overhead_legacy = ack_dur; mi->overhead_legacy_rtscts = mi->overhead_legacy + 2 * ack_dur; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 3355e66d96d8..f67eafada741 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -256,7 +256,7 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info, static void ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, struct sk_buff *skb, int retry_count, - int rtap_len, int shift, + int rtap_len, struct ieee80211_tx_status *status) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -307,7 +307,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, if (legacy_rate) { rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE)); - *pos = DIV_ROUND_UP(legacy_rate, 5 * (1 << shift)); + *pos = DIV_ROUND_UP(legacy_rate, 5); /* padding for tx flags */ pos += 2; } @@ -878,7 +878,7 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw, } void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, - int retry_count, int shift, bool send_to_cooked, + int retry_count, bool send_to_cooked, struct ieee80211_tx_status *status) { struct sk_buff *skb2; @@ -895,7 +895,7 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, return; } ieee80211_add_tx_radiotap_header(local, skb, retry_count, - rtap_len, shift, status); + rtap_len, status); /* XXX: is this sufficient for BPF? */ skb_reset_mac_header(skb); @@ -948,14 +948,12 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, bool acked; bool noack_success; struct ieee80211_bar *bar; - int shift = 0; int tid = IEEE80211_NUM_TIDS; fc = hdr->frame_control; if (status->sta) { sta = container_of(status->sta, struct sta_info, sta); - shift = ieee80211_vif_get_shift(&sta->sdata->vif); if (info->flags & IEEE80211_TX_STATUS_EOSP) clear_sta_flag(sta, WLAN_STA_SP); @@ -1093,7 +1091,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, } /* send to monitor interfaces */ - ieee80211_tx_monitor(local, skb, retry_count, shift, + ieee80211_tx_monitor(local, skb, retry_count, send_to_cooked, status); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d30b9f204d1b..9b845fbf923c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -43,7 +43,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, struct sk_buff *skb, int group_addr, int next_frag_len) { - int rate, mrate, erp, dur, i, shift = 0; + int rate, mrate, erp, dur, i; struct ieee80211_rate *txrate; struct ieee80211_local *local = tx->local; struct ieee80211_supported_band *sband; @@ -58,10 +58,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, rcu_read_lock(); chanctx_conf = rcu_dereference(tx->sdata->vif.bss_conf.chanctx_conf); - if (chanctx_conf) { - shift = ieee80211_chandef_get_shift(&chanctx_conf->def); + if (chanctx_conf) rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def); - } rcu_read_unlock(); /* uh huh? */ @@ -143,7 +141,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, continue; if (tx->sdata->vif.bss_conf.basic_rates & BIT(i)) - rate = DIV_ROUND_UP(r->bitrate, 1 << shift); + rate = r->bitrate; switch (sband->band) { case NL80211_BAND_2GHZ: @@ -173,7 +171,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, if (rate == -1) { /* No matching basic rate found; use highest suitable mandatory * PHY rate */ - rate = DIV_ROUND_UP(mrate, 1 << shift); + rate = mrate; } /* Don't calculate ACKs for QoS Frames with NoAck Policy set */ @@ -185,8 +183,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up * to closest integer */ dur = ieee80211_frame_duration(sband->band, 10, rate, erp, - tx->sdata->vif.bss_conf.use_short_preamble, - shift); + tx->sdata->vif.bss_conf.use_short_preamble); if (next_frag_len) { /* Frame is fragmented: duration increases with time needed to @@ -195,8 +192,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, /* next fragment */ dur += ieee80211_frame_duration(sband->band, next_frag_len, txrate->bitrate, erp, - tx->sdata->vif.bss_conf.use_short_preamble, - shift); + tx->sdata->vif.bss_conf.use_short_preamble); } return cpu_to_le16(dur); @@ -5556,7 +5552,6 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, IEEE80211_INCLUDE_ALL_MBSSID_ELEMS, NULL); struct sk_buff *copy; - int shift; if (!bcn) return bcn; @@ -5576,8 +5571,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!copy) return bcn; - shift = ieee80211_vif_get_shift(vif); - ieee80211_tx_monitor(hw_to_local(hw), copy, 1, shift, false, NULL); + ieee80211_tx_monitor(hw_to_local(hw), copy, 1, false, NULL); return bcn; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ed113028794a..88f714a75862 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -110,8 +110,7 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx) } int ieee80211_frame_duration(enum nl80211_band band, size_t len, - int rate, int erp, int short_preamble, - int shift) + int rate, int erp, int short_preamble) { int dur; @@ -122,9 +121,6 @@ int ieee80211_frame_duration(enum nl80211_band band, size_t len, * * rate is in 100 kbps, so divident is multiplied by 10 in the * DIV_ROUND_UP() operations. - * - * shift may be 2 for 5 MHz channels or 1 for 10 MHz channels, and - * is assumed to be 0 otherwise. */ if (band == NL80211_BAND_5GHZ || erp) { @@ -145,12 +141,6 @@ int ieee80211_frame_duration(enum nl80211_band band, size_t len, dur += 16; /* IEEE 802.11-2012 18.3.2.4: T_PREAMBLE = 16 usec */ dur += 4; /* IEEE 802.11-2012 18.3.2.4: T_SIGNAL = 4 usec */ - /* IEEE 802.11-2012 18.3.2.4: all values above are: - * * times 4 for 5 MHz - * * times 2 for 10 MHz - */ - dur *= 1 << shift; - /* rates should already consider the channel bandwidth, * don't apply divisor again. */ @@ -185,7 +175,7 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, { struct ieee80211_sub_if_data *sdata; u16 dur; - int erp, shift = 0; + int erp; bool short_preamble = false; erp = 0; @@ -194,11 +184,10 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, short_preamble = sdata->vif.bss_conf.use_short_preamble; if (sdata->deflink.operating_11g_mode) erp = rate->flags & IEEE80211_RATE_ERP_G; - shift = ieee80211_vif_get_shift(vif); } dur = ieee80211_frame_duration(band, frame_len, rate->bitrate, erp, - short_preamble, shift); + short_preamble); return cpu_to_le16(dur); } @@ -212,7 +201,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, struct ieee80211_rate *rate; struct ieee80211_sub_if_data *sdata; bool short_preamble; - int erp, shift = 0, bitrate; + int erp, bitrate; u16 dur; struct ieee80211_supported_band *sband; @@ -228,20 +217,19 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, short_preamble = sdata->vif.bss_conf.use_short_preamble; if (sdata->deflink.operating_11g_mode) erp = rate->flags & IEEE80211_RATE_ERP_G; - shift = ieee80211_vif_get_shift(vif); } - bitrate = DIV_ROUND_UP(rate->bitrate, 1 << shift); + bitrate = rate->bitrate; /* CTS duration */ dur = ieee80211_frame_duration(sband->band, 10, bitrate, - erp, short_preamble, shift); + erp, short_preamble); /* Data frame duration */ dur += ieee80211_frame_duration(sband->band, frame_len, bitrate, - erp, short_preamble, shift); + erp, short_preamble); /* ACK duration */ dur += ieee80211_frame_duration(sband->band, 10, bitrate, - erp, short_preamble, shift); + erp, short_preamble); return cpu_to_le16(dur); } @@ -256,7 +244,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, struct ieee80211_rate *rate; struct ieee80211_sub_if_data *sdata; bool short_preamble; - int erp, shift = 0, bitrate; + int erp, bitrate; u16 dur; struct ieee80211_supported_band *sband; @@ -271,18 +259,17 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, short_preamble = sdata->vif.bss_conf.use_short_preamble; if (sdata->deflink.operating_11g_mode) erp = rate->flags & IEEE80211_RATE_ERP_G; - shift = ieee80211_vif_get_shift(vif); } - bitrate = DIV_ROUND_UP(rate->bitrate, 1 << shift); + bitrate = rate->bitrate; /* Data frame duration */ dur = ieee80211_frame_duration(sband->band, frame_len, bitrate, - erp, short_preamble, shift); + erp, short_preamble); if (!(frame_txctl->flags & IEEE80211_TX_CTL_NO_ACK)) { /* ACK duration */ dur += ieee80211_frame_duration(sband->band, 10, bitrate, - erp, short_preamble, shift); + erp, short_preamble); } return cpu_to_le16(dur); @@ -1944,7 +1931,6 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, u8 rates[32]; int num_rates; int ext_rates_len; - int shift; u32 rate_flags; bool have_80mhz = false; @@ -1955,7 +1941,6 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, return 0; rate_flags = ieee80211_chandef_rate_flags(chandef); - shift = ieee80211_chandef_get_shift(chandef); /* For direct scan add S1G IE and consider its override bits */ if (band == NL80211_BAND_S1GHZ) { @@ -1973,8 +1958,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, continue; rates[num_rates++] = - (u8) DIV_ROUND_UP(sband->bitrates[i].bitrate, - (1 << shift) * 5); + (u8) DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); } supp_rates_len = min_t(int, num_rates, 8); @@ -2267,14 +2251,13 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; size_t num_rates; u32 supp_rates, rate_flags; - int i, j, shift; + int i, j; sband = sdata->local->hw.wiphy->bands[band]; if (WARN_ON(!sband)) return 1; rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef); - shift = ieee80211_vif_get_shift(&sdata->vif); num_rates = sband->n_bitrates; supp_rates = 0; @@ -2300,8 +2283,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, != rate_flags) continue; - brate = DIV_ROUND_UP(sband->bitrates[j].bitrate, - 1 << shift); + brate = sband->bitrates[j].bitrate; if (brate == own_rate) { supp_rates |= BIT(j); @@ -3998,7 +3980,6 @@ int ieee80211_parse_bitrates(enum nl80211_chan_width width, const u8 *srates, int srates_len, u32 *rates) { u32 rate_flags = ieee80211_chanwidth_rate_flags(width); - int shift = ieee80211_chanwidth_get_shift(width); struct ieee80211_rate *br; int brate, rate, i, j, count = 0; @@ -4012,7 +3993,7 @@ int ieee80211_parse_bitrates(enum nl80211_chan_width width, if ((rate_flags & br->flags) != rate_flags) continue; - brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5); + brate = DIV_ROUND_UP(br->bitrate, 5); if (brate == rate) { *rates |= BIT(j); count++; @@ -4029,12 +4010,11 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; - int rate, shift; + int rate; u8 i, rates, *pos; u32 basic_rates = sdata->vif.bss_conf.basic_rates; u32 rate_flags; - shift = ieee80211_vif_get_shift(&sdata->vif); rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef); sband = local->hw.wiphy->bands[band]; rates = 0; @@ -4059,8 +4039,7 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, if (need_basic && basic_rates & BIT(i)) basic = 0x80; - rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, - 5 * (1 << shift)); + rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); *pos++ = basic | (u8) rate; } @@ -4073,13 +4052,12 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; - int rate, shift; + int rate; u8 i, exrates, *pos; u32 basic_rates = sdata->vif.bss_conf.basic_rates; u32 rate_flags; rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef); - shift = ieee80211_vif_get_shift(&sdata->vif); sband = local->hw.wiphy->bands[band]; exrates = 0; @@ -4108,8 +4086,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, continue; if (need_basic && basic_rates & BIT(i)) basic = 0x80; - rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, - 5 * (1 << shift)); + rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); *pos++ = basic | (u8) rate; } } @@ -4268,25 +4245,13 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, fallthrough; case RX_ENC_LEGACY: { struct ieee80211_supported_band *sband; - int shift = 0; - int bitrate; - - switch (status->bw) { - case RATE_INFO_BW_10: - shift = 1; - break; - case RATE_INFO_BW_5: - shift = 2; - break; - } sband = local->hw.wiphy->bands[status->band]; - bitrate = sband->bitrates[status->rate_idx].bitrate; - ri.legacy = DIV_ROUND_UP(bitrate, (1 << shift)); + ri.legacy = sband->bitrates[status->rate_idx].bitrate; if (status->flag & RX_FLAG_MACTIME_PLCP_START) { if (status->band == NL80211_BAND_5GHZ) { - ts += 20 << shift; + ts += 20; mpdu_offset += 2; } else if (status->enc_flags & RX_ENC_FLAG_SHORTPRE) { ts += 96; -- cgit v1.2.3 From e04b1973e2ab1e58a79156317b0dc25f848efdc5 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 17 May 2023 08:33:10 -0400 Subject: wifi: lib80211: remove unused variables iv32 and iv16 clang with W=1 reports net/wireless/lib80211_crypt_tkip.c:667:7: error: variable 'iv32' set but not used [-Werror,-Wunused-but-set-variable] u32 iv32 = tkey->tx_iv32; ^ This variable not used so remove it. Then remove a similar iv16 variable. Change the comment because the unmodified value is returned. Signed-off-by: Tom Rix Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230517123310.873023-1-trix@redhat.com [change commit log wrt. 'length', add comment in the code] Signed-off-by: Johannes Berg --- net/wireless/lib80211_crypt_tkip.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index 1b4d6c87a5c5..5c8cdf7681e3 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -662,12 +662,12 @@ static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) memcpy(key, tkey->key, TKIP_KEY_LEN); if (seq) { - /* Return the sequence number of the last transmitted frame. */ - u16 iv16 = tkey->tx_iv16; - u32 iv32 = tkey->tx_iv32; - if (iv16 == 0) - iv32--; - iv16--; + /* + * Not clear if this should return the value as is + * or - as the code previously seemed to partially + * have been written as - subtract one from it. It + * was working this way for a long time so leave it. + */ seq[0] = tkey->tx_iv16; seq[1] = tkey->tx_iv16 >> 8; seq[2] = tkey->tx_iv32; -- cgit v1.2.3 From 0cfaec25995ad3be316631b945be7ced81daa4e7 Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Thu, 27 Jul 2023 10:40:56 -0700 Subject: wifi: nl80211: fixes to FILS discovery updates Add a new flag 'update' which is set to true during start_ap() if (and only if) one of the following two conditions are met: - Userspace passed an empty nested attribute which indicates that the feature should be disabled and templates deleted. - Userspace passed all the parameters for the nested attribute. Existing configuration will not be changed while the flag remains false. Add similar changes for unsolicited broadcast probe response transmission. Signed-off-by: Aloka Dixit Reviewed-by: Jeff Johnson Link: https://lore.kernel.org/r/20230727174100.11721-2-quic_alokad@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ include/uapi/linux/nl80211.h | 11 +++++++---- net/wireless/nl80211.c | 16 +++++++++++++++- 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2d3fa4a29781..aa8c4538f93d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1338,6 +1338,7 @@ struct cfg80211_acl_data { * struct cfg80211_fils_discovery - FILS discovery parameters from * IEEE Std 802.11ai-2016, Annex C.3 MIB detail. * + * @update: Set to true if the feature configuration should be updated. * @min_interval: Minimum packet interval in TUs (0 - 10000) * @max_interval: Maximum packet interval in TUs (0 - 10000) * @tmpl_len: Template length @@ -1345,6 +1346,7 @@ struct cfg80211_acl_data { * frame headers. */ struct cfg80211_fils_discovery { + bool update; u32 min_interval; u32 max_interval; size_t tmpl_len; @@ -1355,6 +1357,7 @@ struct cfg80211_fils_discovery { * struct cfg80211_unsol_bcast_probe_resp - Unsolicited broadcast probe * response parameters in 6GHz. * + * @update: Set to true if the feature configuration should be updated. * @interval: Packet interval in TUs. Maximum allowed is 20 TU, as mentioned * in IEEE P802.11ax/D6.0 26.17.2.3.2 - AP behavior for fast passive * scanning @@ -1362,6 +1365,7 @@ struct cfg80211_fils_discovery { * @tmpl: Template data for probe response */ struct cfg80211_unsol_bcast_probe_resp { + bool update; u32 interval; size_t tmpl_len; const u8 *tmpl; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b628126e06fa..f797ab7a6547 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2690,11 +2690,13 @@ enum nl80211_commands { * * @NL80211_ATTR_FILS_DISCOVERY: Optional parameter to configure FILS * discovery. It is a nested attribute, see - * &enum nl80211_fils_discovery_attributes. + * &enum nl80211_fils_discovery_attributes. Userspace should pass an empty + * nested attribute to disable this feature and delete the templates. * * @NL80211_ATTR_UNSOL_BCAST_PROBE_RESP: Optional parameter to configure * unsolicited broadcast probe response. It is a nested attribute, see - * &enum nl80211_unsol_bcast_probe_resp_attributes. + * &enum nl80211_unsol_bcast_probe_resp_attributes. Userspace should pass an empty + * nested attribute to disable this feature and delete the templates. * * @NL80211_ATTR_S1G_CAPABILITY: S1G Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) @@ -7606,7 +7608,7 @@ enum nl80211_iftype_akm_attributes { * @NL80211_FILS_DISCOVERY_ATTR_INT_MIN: Minimum packet interval (u32, TU). * Allowed range: 0..10000 (TU = Time Unit) * @NL80211_FILS_DISCOVERY_ATTR_INT_MAX: Maximum packet interval (u32, TU). - * Allowed range: 0..10000 (TU = Time Unit) + * Allowed range: 0..10000 (TU = Time Unit). If set to 0, the feature is disabled. * @NL80211_FILS_DISCOVERY_ATTR_TMPL: Template data for FILS discovery action * frame including the headers. * @@ -7639,7 +7641,8 @@ enum nl80211_fils_discovery_attributes { * * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT: Maximum packet interval (u32, TU). * Allowed range: 0..20 (TU = Time Unit). IEEE P802.11ax/D6.0 - * 26.17.2.3.2 (AP behavior for fast passive scanning). + * 26.17.2.3.2 (AP behavior for fast passive scanning). If set to 0, the feature is + * disabled. * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL: Unsolicited broadcast probe response * frame template (binary). * diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f2dd4c85a10f..53618a147907 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5647,6 +5647,13 @@ static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev, if (ret) return ret; + if (!tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] && + !tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] && + !tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]) { + fd->update = true; + return 0; + } + if (!tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] || !tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] || !tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]) @@ -5656,7 +5663,7 @@ static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev, fd->tmpl = nla_data(tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]); fd->min_interval = nla_get_u32(tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN]); fd->max_interval = nla_get_u32(tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX]); - + fd->update = true; return 0; } @@ -5679,6 +5686,12 @@ nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev, if (ret) return ret; + if (!tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] && + !tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]) { + presp->update = true; + return 0; + } + if (!tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] || !tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]) return -EINVAL; @@ -5686,6 +5699,7 @@ nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev, presp->tmpl = nla_data(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]); presp->tmpl_len = nla_len(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]); presp->interval = nla_get_u32(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT]); + presp->update = true; return 0; } -- cgit v1.2.3 From 3b1c256eb4aedfc71dd97d5951ccff824b41d628 Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Thu, 27 Jul 2023 10:40:57 -0700 Subject: wifi: mac80211: fixes in FILS discovery updates FILS discovery configuration gets updated only if the maximum interval is set to a non-zero value, hence there is no way to reset this value to 0 once set. Replace the check for interval with a new flag which is set only if the configuration should be updated. Add similar changes for the unsolicited broadcast probe response handling. Signed-off-by: Aloka Dixit Reviewed-by: Jeff Johnson Link: https://lore.kernel.org/r/20230727174100.11721-3-quic_alokad@quicinc.com [move NULL'ing to else branch to not have intermediate NULL visible] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 77 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4f30e80192e7..e28a22ebe581 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -952,25 +952,29 @@ static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata, struct fils_discovery_data *new, *old = NULL; struct ieee80211_fils_discovery *fd; - if (!params->tmpl || !params->tmpl_len) - return -EINVAL; + if (!params->update) + return 0; fd = &link_conf->fils_discovery; fd->min_interval = params->min_interval; fd->max_interval = params->max_interval; old = sdata_dereference(link->u.ap.fils_discovery, sdata); - new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); - if (!new) - return -ENOMEM; - new->len = params->tmpl_len; - memcpy(new->data, params->tmpl, params->tmpl_len); - rcu_assign_pointer(link->u.ap.fils_discovery, new); - if (old) kfree_rcu(old, rcu_head); - return 0; + if (params->tmpl && params->tmpl_len) { + new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); + if (!new) + return -ENOMEM; + new->len = params->tmpl_len; + memcpy(new->data, params->tmpl, params->tmpl_len); + rcu_assign_pointer(link->u.ap.fils_discovery, new); + } else { + RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL); + } + + return BSS_CHANGED_FILS_DISCOVERY; } static int @@ -981,23 +985,27 @@ ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata, { struct unsol_bcast_probe_resp_data *new, *old = NULL; - if (!params->tmpl || !params->tmpl_len) - return -EINVAL; + if (!params->update) + return 0; - old = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata); - new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); - if (!new) - return -ENOMEM; - new->len = params->tmpl_len; - memcpy(new->data, params->tmpl, params->tmpl_len); - rcu_assign_pointer(link->u.ap.unsol_bcast_probe_resp, new); + link_conf->unsol_bcast_probe_resp_interval = params->interval; + old = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata); if (old) kfree_rcu(old, rcu_head); - link_conf->unsol_bcast_probe_resp_interval = params->interval; + if (params->tmpl && params->tmpl_len) { + new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); + if (!new) + return -ENOMEM; + new->len = params->tmpl_len; + memcpy(new->data, params->tmpl, params->tmpl_len); + rcu_assign_pointer(link->u.ap.unsol_bcast_probe_resp, new); + } else { + RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL); + } - return 0; + return BSS_CHANGED_UNSOL_BCAST_PROBE_RESP; } static int ieee80211_set_ftm_responder_params( @@ -1428,23 +1436,18 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (err < 0) goto error; - if (params->fils_discovery.max_interval) { - err = ieee80211_set_fils_discovery(sdata, - ¶ms->fils_discovery, - link, link_conf); - if (err < 0) - goto error; - changed |= BSS_CHANGED_FILS_DISCOVERY; - } + err = ieee80211_set_fils_discovery(sdata, ¶ms->fils_discovery, + link, link_conf); + if (err < 0) + goto error; + changed |= err; - if (params->unsol_bcast_probe_resp.interval) { - err = ieee80211_set_unsol_bcast_probe_resp(sdata, - ¶ms->unsol_bcast_probe_resp, - link, link_conf); - if (err < 0) - goto error; - changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP; - } + err = ieee80211_set_unsol_bcast_probe_resp(sdata, + ¶ms->unsol_bcast_probe_resp, + link, link_conf); + if (err < 0) + goto error; + changed |= err; err = drv_start_ap(sdata->local, sdata, link_conf); if (err) { -- cgit v1.2.3 From 66f85d57b7109baf8a7d5ee04049ac9412611d35 Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Thu, 27 Jul 2023 10:40:58 -0700 Subject: wifi: cfg80211: modify prototype for change_beacon Modify the prototype for change_beacon() in struct cfg80211_op to accept cfg80211_ap_settings instead of cfg80211_beacon_data so that it can process data in addition to beacons. Modify the prototypes of ieee80211_change_beacon() and driver specific functions accordingly. Signed-off-by: Aloka Dixit Reviewed-by: Jeff Johnson Link: https://lore.kernel.org/r/20230727174100.11721-4-quic_alokad@quicinc.com [while at it, remove pointless "if (info)" check in tracing that just makes all the lines longer than they need be - it's never NULL] Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 4 +- drivers/net/wireless/ath/wil6210/cfg80211.c | 3 +- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 4 +- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 3 +- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 4 +- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 4 +- drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 6 +- include/net/cfg80211.h | 2 +- net/mac80211/cfg.c | 14 ++-- net/wireless/nl80211.c | 16 +++-- net/wireless/rdev-ops.h | 2 +- net/wireless/trace.h | 74 +++++++++++----------- 12 files changed, 73 insertions(+), 63 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 1dba55c2d9dc..eea60e2fca44 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -2954,7 +2954,7 @@ static int ath6kl_start_ap(struct wiphy *wiphy, struct net_device *dev, } static int ath6kl_change_beacon(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_beacon_data *beacon) + struct cfg80211_ap_settings *params) { struct ath6kl_vif *vif = netdev_priv(dev); @@ -2964,7 +2964,7 @@ static int ath6kl_change_beacon(struct wiphy *wiphy, struct net_device *dev, if (vif->next_mode != AP_NETWORK) return -EOPNOTSUPP; - return ath6kl_set_ies(vif, beacon); + return ath6kl_set_ies(vif, ¶ms->beacon); } static int ath6kl_stop_ap(struct wiphy *wiphy, struct net_device *dev, diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index 40f9a7ef8980..dfbb478ae274 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -2082,11 +2082,12 @@ void wil_cfg80211_ap_recovery(struct wil6210_priv *wil) static int wil_cfg80211_change_beacon(struct wiphy *wiphy, struct net_device *ndev, - struct cfg80211_beacon_data *bcon) + struct cfg80211_ap_settings *params) { struct wil6210_priv *wil = wiphy_to_wil(wiphy); struct wireless_dev *wdev = ndev->ieee80211_ptr; struct wil6210_vif *vif = ndev_to_vif(ndev); + struct cfg80211_beacon_data *bcon = ¶ms->beacon; int rc; u32 privacy = 0; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 94b4a7b8793d..9012456e1a18 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -5415,13 +5415,13 @@ static int brcmf_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *ndev, static s32 brcmf_cfg80211_change_beacon(struct wiphy *wiphy, struct net_device *ndev, - struct cfg80211_beacon_data *info) + struct cfg80211_ap_settings *info) { struct brcmf_if *ifp = netdev_priv(ndev); brcmf_dbg(TRACE, "Enter\n"); - return brcmf_config_ap_mgmt_ie(ifp->vif, info); + return brcmf_config_ap_mgmt_ie(ifp->vif, &info->beacon); } static int diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index ba4e29713a8c..70473be42d7b 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -1835,10 +1835,11 @@ static int mwifiex_cfg80211_set_cqm_rssi_config(struct wiphy *wiphy, */ static int mwifiex_cfg80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_beacon_data *data) + struct cfg80211_ap_settings *params) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); struct mwifiex_adapter *adapter = priv->adapter; + struct cfg80211_beacon_data *data = ¶ms->beacon; mwifiex_cancel_scan(adapter); diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index b545d93c6e37..3447470d3d02 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -1441,11 +1441,11 @@ static int start_ap(struct wiphy *wiphy, struct net_device *dev, } static int change_beacon(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_beacon_data *beacon) + struct cfg80211_ap_settings *params) { struct wilc_vif *vif = netdev_priv(dev); - return wilc_add_beacon(vif, 0, 0, beacon); + return wilc_add_beacon(vif, 0, 0, ¶ms->beacon); } static int stop_ap(struct wiphy *wiphy, struct net_device *dev, diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 73e6f9408b51..9388adcdcac1 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -331,11 +331,11 @@ out: } static int qtnf_change_beacon(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_beacon_data *info) + struct cfg80211_ap_settings *info) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); - return qtnf_mgmt_set_appie(vif, info); + return qtnf_mgmt_set_appie(vif, &info->beacon); } static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev, diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index 2ae7843abdf7..5ddc2d9a6060 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -2319,11 +2319,13 @@ static int cfg80211_rtw_start_ap(struct wiphy *wiphy, struct net_device *ndev, } static int cfg80211_rtw_change_beacon(struct wiphy *wiphy, struct net_device *ndev, - struct cfg80211_beacon_data *info) + struct cfg80211_ap_settings *info) { struct adapter *adapter = rtw_netdev_priv(ndev); - return rtw_add_beacon(adapter, info->head, info->head_len, info->tail, info->tail_len); + return rtw_add_beacon(adapter, info->beacon.head, + info->beacon.head_len, info->beacon.tail, + info->beacon.tail_len); } static int cfg80211_rtw_stop_ap(struct wiphy *wiphy, struct net_device *ndev, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index aa8c4538f93d..d69841f64459 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4495,7 +4495,7 @@ struct cfg80211_ops { int (*start_ap)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *settings); int (*change_beacon)(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_beacon_data *info); + struct cfg80211_ap_settings *info); int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e28a22ebe581..e17ce9b8b8cb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1477,10 +1477,12 @@ error: } static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_beacon_data *params) + struct cfg80211_ap_settings *params) + { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; + struct cfg80211_beacon_data *beacon = ¶ms->beacon; struct beacon_data *old; int err; struct ieee80211_bss_conf *link_conf; @@ -1488,7 +1490,7 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, lockdep_assert_wiphy(wiphy); - link = sdata_dereference(sdata->link[params->link_id], sdata); + link = sdata_dereference(sdata->link[beacon->link_id], sdata); if (!link) return -ENOLINK; @@ -1504,14 +1506,14 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, if (!old) return -ENOENT; - err = ieee80211_assign_beacon(sdata, link, params, NULL, NULL, + err = ieee80211_assign_beacon(sdata, link, beacon, NULL, NULL, &changed); if (err < 0) return err; - if (params->he_bss_color_valid && - params->he_bss_color.enabled != link_conf->he_bss_color.enabled) { - link_conf->he_bss_color.enabled = params->he_bss_color.enabled; + if (beacon->he_bss_color_valid && + beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) { + link_conf->he_bss_color.enabled = beacon->he_bss_color.enabled; changed |= BSS_CHANGED_HE_BSS_COLOR; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 53618a147907..6449072e8def 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6198,7 +6198,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_beacon_data params; + struct cfg80211_ap_settings *params; int err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && @@ -6211,15 +6211,21 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (!wdev->links[link_id].ap.beacon_interval) return -EINVAL; - err = nl80211_parse_beacon(rdev, info->attrs, ¶ms, info->extack); + params = kzalloc(sizeof(*params), GFP_KERNEL); + if (!params) + return -ENOMEM; + + err = nl80211_parse_beacon(rdev, info->attrs, ¶ms->beacon, + info->extack); if (err) goto out; - err = rdev_change_beacon(rdev, dev, ¶ms); + err = rdev_change_beacon(rdev, dev, params); out: - kfree(params.mbssid_ies); - kfree(params.rnr_ies); + kfree(params->beacon.mbssid_ies); + kfree(params->beacon.rnr_ies); + kfree(params); return err; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 90bb7ac4b930..c6a2c07e380b 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -173,7 +173,7 @@ static inline int rdev_start_ap(struct cfg80211_registered_device *rdev, static inline int rdev_change_beacon(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_beacon_data *info) + struct cfg80211_ap_settings *info) { int ret; trace_rdev_change_beacon(&rdev->wiphy, dev, info); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 126c3a03e43e..1557dc1d58e2 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -615,49 +615,47 @@ TRACE_EVENT(rdev_start_ap, TRACE_EVENT(rdev_change_beacon, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct cfg80211_beacon_data *info), + struct cfg80211_ap_settings *info), TP_ARGS(wiphy, netdev, info), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(int, link_id) - __dynamic_array(u8, head, info ? info->head_len : 0) - __dynamic_array(u8, tail, info ? info->tail_len : 0) - __dynamic_array(u8, beacon_ies, info ? info->beacon_ies_len : 0) - __dynamic_array(u8, proberesp_ies, - info ? info->proberesp_ies_len : 0) - __dynamic_array(u8, assocresp_ies, - info ? info->assocresp_ies_len : 0) - __dynamic_array(u8, probe_resp, info ? info->probe_resp_len : 0) - ), - TP_fast_assign( - WIPHY_ASSIGN; - NETDEV_ASSIGN; - if (info) { - __entry->link_id = info->link_id; - if (info->head) - memcpy(__get_dynamic_array(head), info->head, - info->head_len); - if (info->tail) - memcpy(__get_dynamic_array(tail), info->tail, - info->tail_len); - if (info->beacon_ies) - memcpy(__get_dynamic_array(beacon_ies), - info->beacon_ies, info->beacon_ies_len); - if (info->proberesp_ies) - memcpy(__get_dynamic_array(proberesp_ies), - info->proberesp_ies, - info->proberesp_ies_len); - if (info->assocresp_ies) - memcpy(__get_dynamic_array(assocresp_ies), - info->assocresp_ies, - info->assocresp_ies_len); - if (info->probe_resp) - memcpy(__get_dynamic_array(probe_resp), - info->probe_resp, info->probe_resp_len); - } else { - __entry->link_id = -1; - } + __dynamic_array(u8, head, info->beacon.head_len) + __dynamic_array(u8, tail, info->beacon.tail_len) + __dynamic_array(u8, beacon_ies, info->beacon.beacon_ies_len) + __dynamic_array(u8, proberesp_ies, info->beacon.proberesp_ies_len) + __dynamic_array(u8, assocresp_ies, info->beacon.assocresp_ies_len) + __dynamic_array(u8, probe_resp, info->beacon.probe_resp_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->link_id = info->beacon.link_id; + if (info->beacon.head) + memcpy(__get_dynamic_array(head), + info->beacon.head, + info->beacon.head_len); + if (info->beacon.tail) + memcpy(__get_dynamic_array(tail), + info->beacon.tail, + info->beacon.tail_len); + if (info->beacon.beacon_ies) + memcpy(__get_dynamic_array(beacon_ies), + info->beacon.beacon_ies, + info->beacon.beacon_ies_len); + if (info->beacon.proberesp_ies) + memcpy(__get_dynamic_array(proberesp_ies), + info->beacon.proberesp_ies, + info->beacon.proberesp_ies_len); + if (info->beacon.assocresp_ies) + memcpy(__get_dynamic_array(assocresp_ies), + info->beacon.assocresp_ies, + info->beacon.assocresp_ies_len); + if (info->beacon.probe_resp) + memcpy(__get_dynamic_array(probe_resp), + info->beacon.probe_resp, + info->beacon.probe_resp_len); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id:%d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id) -- cgit v1.2.3 From b2d431d43c8a3e61a384e0b7b3c9d595ea77895d Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Thu, 27 Jul 2023 10:40:59 -0700 Subject: wifi: nl80211: additions to NL80211_CMD_SET_BEACON FILS discovery and unsolicited broadcast probe response templates need to be updated along with beacon templates in some cases such as the channel switch operation. Add the missing implementation. Signed-off-by: Aloka Dixit Reviewed-by: Jeff Johnson Link: https://lore.kernel.org/r/20230727174100.11721-5-quic_alokad@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6449072e8def..218093607b29 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6199,6 +6199,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings *params; + struct nlattr *attr; int err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && @@ -6220,6 +6221,20 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (err) goto out; + attr = info->attrs[NL80211_ATTR_FILS_DISCOVERY]; + if (attr) { + err = nl80211_parse_fils_discovery(rdev, attr, params); + if (err) + goto out; + } + + attr = info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]; + if (attr) { + err = nl80211_parse_unsol_bcast_probe_resp(rdev, attr, params); + if (err) + goto out; + } + err = rdev_change_beacon(rdev, dev, params); out: -- cgit v1.2.3 From 6bc5ddb2fd0653a3e66a8e41fa4c20eced13e4d8 Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Thu, 27 Jul 2023 10:41:00 -0700 Subject: wifi: mac80211: additions to change_beacon() Process FILS discovery and unsolicited broadcast probe response transmission configurations in ieee80211_change_beacon(). Signed-off-by: Aloka Dixit Reviewed-by: Jeff Johnson Link: https://lore.kernel.org/r/20230727174100.11721-6-quic_alokad@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e17ce9b8b8cb..b25f612b39eb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1511,6 +1511,19 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, if (err < 0) return err; + err = ieee80211_set_fils_discovery(sdata, ¶ms->fils_discovery, + link, link_conf); + if (err < 0) + return err; + changed |= err; + + err = ieee80211_set_unsol_bcast_probe_resp(sdata, + ¶ms->unsol_bcast_probe_resp, + link, link_conf); + if (err < 0) + return err; + changed |= err; + if (beacon->he_bss_color_valid && beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) { link_conf->he_bss_color.enabled = beacon->he_bss_color.enabled; -- cgit v1.2.3 From 13ba6794d29ee273c26f26b6c7892797ac9957ae Mon Sep 17 00:00:00 2001 From: Raj Kumar Bhagat Date: Fri, 21 Apr 2023 11:43:12 +0530 Subject: wifi: cfg80211: allow reg update by driver even if wiphy->regd is set Currently regulatory update by driver is not allowed when the wiphy->regd is already set and drivers_request->intersect is false. During wiphy registration, some drivers (ath10k does this currently) first register the world regulatory to cfg80211 using wiphy_apply_custom_regulatory(). The driver then obtain the current operating country and tries to update the correct regulatory to cfg80211 using regulatory_hint(). But at this point, wiphy->regd is already set to world regulatory. Also, since this is the first request from driver after the world regulatory is set this will result in drivers_request->intersect set to false. In this condition the driver request regulatory is not allowed to update to cfg80211 in reg_set_rd_driver(). This restricts the device operation to the world regulatory. This driver request to update the regulatory with current operating country is valid and should be updated to cfg80211. Hence allow regulatory update by driver even if the wiphy->regd is already set and driver_request->intersect is false. Signed-off-by: Raj Kumar Bhagat Reviewed-by: Jeff Johnson Link: https://lore.kernel.org/r/20230421061312.13722-1-quic_rajkbhag@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/reg.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index f86ee1a6daad..8a99eafa4b62 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3827,7 +3827,7 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd, { const struct ieee80211_regdomain *regd; const struct ieee80211_regdomain *intersected_rd = NULL; - const struct ieee80211_regdomain *tmp; + const struct ieee80211_regdomain *tmp = NULL; struct wiphy *request_wiphy; if (is_world_regdom(rd->alpha2)) @@ -3850,10 +3850,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd, if (!driver_request->intersect) { ASSERT_RTNL(); wiphy_lock(request_wiphy); - if (request_wiphy->regd) { - wiphy_unlock(request_wiphy); - return -EALREADY; - } + if (request_wiphy->regd) + tmp = get_wiphy_regdom(request_wiphy); regd = reg_copy_regd(rd); if (IS_ERR(regd)) { @@ -3862,6 +3860,7 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd, } rcu_assign_pointer(request_wiphy->regd, regd); + rcu_free_regdom(tmp); wiphy_unlock(request_wiphy); reset_regdomains(false, rd); return 0; -- cgit v1.2.3 From b13b6bbfbb627884f18982600f7b5a5200652531 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Thu, 29 Jun 2023 03:52:54 +0000 Subject: wifi: cfg80211: call reg_call_notifier on beacon hints Currently the channel property updates are not propagated to driver. This causes issues in the discovery of hidden SSIDs and fails to connect to them. This change defines a new wiphy flag which when enabled by vendor driver, the reg_call_notifier callback will be trigger on beacon hints. This ensures that the channel property changes are visible to the vendor driver. The vendor changes the channels for active scans. This fixes the discovery issue of hidden SSID. Signed-off-by: Abhishek Kumar Link: https://lore.kernel.org/r/20230629035254.1.I059fe585f9f9e896c2d51028ef804d197c8c009e@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ net/wireless/reg.c | 20 ++++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d69841f64459..0551a38d6760 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4861,6 +4861,8 @@ struct cfg80211_ops { * @WIPHY_FLAG_SUPPORTS_EXT_KCK_32: The device supports 32-byte KCK keys. * @WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER: The device could handle reg notify for * NL80211_REGDOM_SET_BY_DRIVER. + * @WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON: reg_call_notifier() is called if driver + * set this flag to update channels on beacon hints. */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), @@ -4887,6 +4889,7 @@ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_5_10_MHZ = BIT(22), WIPHY_FLAG_HAS_CHANNEL_SWITCH = BIT(23), WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER = BIT(24), + WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON = BIT(25), }; /** diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 8a99eafa4b62..33e2570f2bd6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2151,6 +2151,13 @@ static bool reg_is_world_roaming(struct wiphy *wiphy) return false; } +static void reg_call_notifier(struct wiphy *wiphy, + struct regulatory_request *request) +{ + if (wiphy->reg_notifier) + wiphy->reg_notifier(wiphy, request); +} + static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, struct reg_beacon *reg_beacon) { @@ -2158,6 +2165,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, struct ieee80211_channel *chan; bool channel_changed = false; struct ieee80211_channel chan_before; + struct regulatory_request *lr = get_last_request(); sband = wiphy->bands[reg_beacon->chan.band]; chan = &sband->channels[chan_idx]; @@ -2183,8 +2191,11 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, channel_changed = true; } - if (channel_changed) + if (channel_changed) { nl80211_send_beacon_hint_event(wiphy, &chan_before, chan); + if (wiphy->flags & WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON) + reg_call_notifier(wiphy, lr); + } } /* @@ -2327,13 +2338,6 @@ static void reg_process_ht_flags(struct wiphy *wiphy) reg_process_ht_flags_band(wiphy, wiphy->bands[band]); } -static void reg_call_notifier(struct wiphy *wiphy, - struct regulatory_request *request) -{ - if (wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, request); -} - static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) { struct cfg80211_chan_def chandef = {}; -- cgit v1.2.3 From 30ca8b0c4d6c9fb1d76e5894b1e8bf7c6a12224d Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 12 Sep 2023 10:48:55 +0530 Subject: wifi: cfg80211: export DFS CAC time and usable state helper functions cfg80211 has cfg80211_chandef_dfs_usable() function to know whether at least one channel in the chandef is in usable state or not. Also, cfg80211_chandef_dfs_cac_time() function is there which tells the CAC time required for the given chandef. Make these two functions visible to drivers by exporting their symbol to global list of kernel symbols. Lower level drivers can make use of these two functions to be aware if CAC is required on the given chandef and for how long. For example drivers which maintains the CAC state internally can make use of these. Signed-off-by: Aditya Kumar Singh Reviewed-by: Jeff Johnson Link: https://lore.kernel.org/r/20230912051857.2284-2-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 24 ++++++++++++++++++++++++ net/wireless/chan.c | 2 ++ net/wireless/core.h | 17 ----------------- 3 files changed, 26 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0551a38d6760..a5841a96284d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1002,6 +1002,30 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype); +/** + * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable and we + * can/need start CAC on such channel + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * + * Return: true if all channels available and at least + * one channel requires CAC (NL80211_DFS_USABLE) + */ +bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef); + +/** + * cfg80211_chandef_dfs_cac_time - get the DFS CAC time (in ms) for given + * channel definition + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * + * Returns: DFS CAC time (in ms) which applies for this channel definition + */ +unsigned int +cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef); + /** * nl80211_send_chandef - sends the channel definition. * @msg: the msg to send channel definition diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 842190dfa100..2d21e423abdb 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -666,6 +666,7 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, return (r1 + r2 > 0); } +EXPORT_SYMBOL(cfg80211_chandef_dfs_usable); /* * Checks if center frequency of chan falls with in the bandwidth @@ -965,6 +966,7 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, return max(t1, t2); } +EXPORT_SYMBOL(cfg80211_chandef_dfs_cac_time); static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 center_freq, u32 bandwidth, diff --git a/net/wireless/core.h b/net/wireless/core.h index 98f41d9d2ba7..866f0a6934e6 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -452,29 +452,12 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev); extern struct work_struct cfg80211_disconnect_work; -/** - * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable - * @wiphy: the wiphy to validate against - * @chandef: the channel definition to check - * - * Checks if chandef is usable and we can/need start CAC on such channel. - * - * Return: true if all channels available and at least - * one channel requires CAC (NL80211_DFS_USABLE) - */ -bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, - const struct cfg80211_chan_def *chandef); - void cfg80211_set_dfs_state(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state); void cfg80211_dfs_channels_update_work(struct work_struct *work); -unsigned int -cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, - const struct cfg80211_chan_def *chandef); - void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev); int -- cgit v1.2.3 From ef765c25875941564edf447302ff339363441e58 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Mon, 11 Sep 2023 18:50:16 -0300 Subject: net/sched: cls_route: make netlink errors meaningful Use netlink extended ack and parsing policies to return more meaningful errors instead of the relying solely on errnos. Reviewed-by: Victor Nogueira Signed-off-by: Pedro Tammela Signed-off-by: David S. Miller --- net/sched/cls_route.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 1e20bbd687f1..1424bfeaca73 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -375,9 +375,9 @@ out: static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = { [TCA_ROUTE4_CLASSID] = { .type = NLA_U32 }, - [TCA_ROUTE4_TO] = { .type = NLA_U32 }, - [TCA_ROUTE4_FROM] = { .type = NLA_U32 }, - [TCA_ROUTE4_IIF] = { .type = NLA_U32 }, + [TCA_ROUTE4_TO] = NLA_POLICY_MAX(NLA_U32, 0xFF), + [TCA_ROUTE4_FROM] = NLA_POLICY_MAX(NLA_U32, 0xFF), + [TCA_ROUTE4_IIF] = NLA_POLICY_MAX(NLA_U32, 0x7FFF), }; static int route4_set_parms(struct net *net, struct tcf_proto *tp, @@ -397,33 +397,37 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, return err; if (tb[TCA_ROUTE4_TO]) { - if (new && handle & 0x8000) + if (new && handle & 0x8000) { + NL_SET_ERR_MSG(extack, "Invalid handle"); return -EINVAL; + } to = nla_get_u32(tb[TCA_ROUTE4_TO]); - if (to > 0xFF) - return -EINVAL; nhandle = to; } + if (tb[TCA_ROUTE4_FROM] && tb[TCA_ROUTE4_IIF]) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_ROUTE4_FROM], + "'from' and 'fromif' are mutually exclusive"); + return -EINVAL; + } + if (tb[TCA_ROUTE4_FROM]) { - if (tb[TCA_ROUTE4_IIF]) - return -EINVAL; id = nla_get_u32(tb[TCA_ROUTE4_FROM]); - if (id > 0xFF) - return -EINVAL; nhandle |= id << 16; } else if (tb[TCA_ROUTE4_IIF]) { id = nla_get_u32(tb[TCA_ROUTE4_IIF]); - if (id > 0x7FFF) - return -EINVAL; nhandle |= (id | 0x8000) << 16; } else nhandle |= 0xFFFF << 16; if (handle && new) { nhandle |= handle & 0x7F00; - if (nhandle != handle) + if (nhandle != handle) { + NL_SET_ERR_MSG_FMT(extack, + "Handle mismatch constructed: %x (expected: %x)", + handle, nhandle); return -EINVAL; + } } if (!nhandle) { @@ -478,7 +482,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, struct route4_filter __rcu **fp; struct route4_filter *fold, *f1, *pfp, *f = NULL; struct route4_bucket *b; - struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_ROUTE4_MAX + 1]; unsigned int h, th; int err; @@ -489,10 +492,12 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - if (opt == NULL) + if (NL_REQ_ATTR_CHECK(extack, NULL, tca, TCA_OPTIONS)) { + NL_SET_ERR_MSG_MOD(extack, "Missing options"); return -EINVAL; + } - err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt, + err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, tca[TCA_OPTIONS], route4_policy, NULL); if (err < 0) return err; -- cgit v1.2.3 From 3e99b4d282195435a9271fb738c4b146a9d35a5a Mon Sep 17 00:00:00 2001 From: Stephen Douthit Date: Mon, 13 Feb 2023 15:40:24 -0500 Subject: wifi: mac80211: Sanity check tx bitrate if not provided by driver If the driver doesn't fill NL80211_STA_INFO_TX_BITRATE in sta_set_sinfo() then as a fallback sta->deflink.tx_stats.last_rate is used. Unfortunately there's no guarantee that this has actually been set before it's used. Originally found when 'iw link' would always return a tx rate of 6Mbps regardless of actual link speed for the QCA9337 running firmware WLAN.TF.2.1-00021-QCARMSWP-1 in my netbook. Use the sanity check logic from ieee80211_fill_rx_status() and refactor that to use the new inline function. Signed-off-by: Stephen Douthit Link: https://lore.kernel.org/r/20230213204024.3377-1-stephen.douthit@gmail.com [change to bool ..._rate_valid() instead of int ..._rate_invalid()] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++++ net/mac80211/airtime.c | 2 +- net/mac80211/sta_info.c | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 09fe4601bf59..8d993f6ab919 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1078,6 +1078,11 @@ struct ieee80211_tx_rate { #define IEEE80211_MAX_TX_RETRY 31 +static inline bool ieee80211_rate_valid(struct ieee80211_tx_rate *rate) +{ + return rate->idx >= 0 && rate->count > 0; +} + static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate *rate, u8 mcs, u8 nss) { diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 14be7b526f1d..fdf8b658fede 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -557,7 +557,7 @@ static int ieee80211_fill_rx_status(struct ieee80211_rx_status *stat, if (ieee80211_fill_rate_info(hw, stat, band, ri)) return 0; - if (rate->idx < 0 || !rate->count) + if (!ieee80211_rate_valid(rate)) return -1; if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 7243c6aa2161..ba36fc29e532 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2703,7 +2703,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) && - !sta->sta.valid_links) { + !sta->sta.valid_links && + ieee80211_rate_valid(&sta->deflink.tx_stats.last_rate)) { sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate, &sinfo->txrate); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); -- cgit v1.2.3 From e160ab85166e77347d0cbe5149045cb25e83937f Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 3 Feb 2023 10:36:36 +0800 Subject: wifi: mac80211: don't return unset power in ieee80211_get_tx_power() We can get a UBSAN warning if ieee80211_get_tx_power() returns the INT_MIN value mac80211 internally uses for "unset power level". UBSAN: signed-integer-overflow in net/wireless/nl80211.c:3816:5 -2147483648 * 100 cannot be represented in type 'int' CPU: 0 PID: 20433 Comm: insmod Tainted: G WC OE Call Trace: dump_stack+0x74/0x92 ubsan_epilogue+0x9/0x50 handle_overflow+0x8d/0xd0 __ubsan_handle_mul_overflow+0xe/0x10 nl80211_send_iface+0x688/0x6b0 [cfg80211] [...] cfg80211_register_wdev+0x78/0xb0 [cfg80211] cfg80211_netdev_notifier_call+0x200/0x620 [cfg80211] [...] ieee80211_if_add+0x60e/0x8f0 [mac80211] ieee80211_register_hw+0xda5/0x1170 [mac80211] In this case, simply return an error instead, to indicate that no data is available. Cc: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://lore.kernel.org/r/20230203023636.4418-1-pkshih@realtek.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b25f612b39eb..e751d4eba8f5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3079,6 +3079,10 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, else *dbm = sdata->vif.bss_conf.txpower; + /* INT_MIN indicates no power level was set yet */ + if (*dbm == INT_MIN) + return -EINVAL; + return 0; } -- cgit v1.2.3 From 8ecf0cedc08a3a8e663918d22f0350a928ac115c Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Mon, 11 Sep 2023 23:20:26 +0300 Subject: vsock: send SIGPIPE on write to shutdowned socket POSIX requires to send SIGPIPE on write to SOCK_STREAM socket which was shutdowned with SHUT_WR flag or its peer was shutdowned with SHUT_RD flag. Also we must not send SIGPIPE if MSG_NOSIGNAL flag is set. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: Paolo Abeni --- net/vmw_vsock/af_vsock.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 020cf17ab7e4..013b65241b65 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1921,6 +1921,9 @@ out_err: err = total_written; } out: + if (sk->sk_type == SOCK_STREAM) + err = sk_stream_error(sk, msg->msg_flags, err); + release_sock(sk); return err; } -- cgit v1.2.3 From 81b36803ac139827538ac5ce4028e750a3c53f53 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:21 +0000 Subject: udp: introduce udp->udp_flags According to syzbot, it is time to use proper atomic flags for various UDP flags. Add udp_flags field, and convert udp->corkflag to first bit in it. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 28 +++++++++++++++++++++------- net/ipv4/udp.c | 12 ++++++------ net/ipv6/udp.c | 6 +++--- 3 files changed, 30 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/linux/udp.h b/include/linux/udp.h index 43c1fb2d2c21..23f0693e0d9c 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -32,14 +32,20 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) return (num + net_hash_mix(net)) & mask; } +enum { + UDP_FLAGS_CORK, /* Cork is required */ +}; + struct udp_sock { /* inet_sock has to be the first member */ struct inet_sock inet; #define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] #define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] #define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node + + unsigned long udp_flags; + int pending; /* Any pending frames ? */ - unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ @@ -51,6 +57,11 @@ struct udp_sock { gro_enabled:1, /* Request GRO aggregation */ accept_udp_l4:1, accept_udp_fraglist:1; +/* indicator bits used by pcflag: */ +#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ +#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ +#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ + __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -62,12 +73,6 @@ struct udp_sock { */ __u16 pcslen; __u16 pcrlen; -/* indicator bits used by pcflag: */ -#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ -#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ - __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ - __u8 unused[3]; /* * For encapsulation sockets. */ @@ -95,6 +100,15 @@ struct udp_sock { int forward_threshold; }; +#define udp_test_bit(nr, sk) \ + test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_set_bit(nr, sk) \ + set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_clear_bit(nr, sk) \ + clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_assign_bit(nr, sk, val) \ + assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) + #define UDP_MAX_SEGMENTS (1 << 6UL) #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f39b9c844580..9709f8a532dc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1051,7 +1051,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) u8 tos, scope; __be16 dport; int err, is_udplite = IS_UDPLITE(sk); - int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; + int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; struct ip_options_data opt_copy; @@ -1315,11 +1315,11 @@ void udp_splice_eof(struct socket *sock) struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); - if (!up->pending || READ_ONCE(up->corkflag)) + if (!up->pending || udp_test_bit(CORK, sk)) return; lock_sock(sk); - if (up->pending && !READ_ONCE(up->corkflag)) + if (up->pending && !udp_test_bit(CORK, sk)) udp_push_pending_frames(sk); release_sock(sk); } @@ -2658,9 +2658,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: if (val != 0) { - WRITE_ONCE(up->corkflag, 1); + udp_set_bit(CORK, sk); } else { - WRITE_ONCE(up->corkflag, 0); + udp_clear_bit(CORK, sk); lock_sock(sk); push_pending_frames(sk); release_sock(sk); @@ -2783,7 +2783,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: - val = READ_ONCE(up->corkflag); + val = udp_test_bit(CORK, sk); break; case UDP_ENCAP: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 86b5d509a468..0c6973cd22ce 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1332,7 +1332,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int addr_len = msg->msg_namelen; bool connected = false; int ulen = len; - int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; + int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int err; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); @@ -1644,11 +1644,11 @@ static void udpv6_splice_eof(struct socket *sock) struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); - if (!up->pending || READ_ONCE(up->corkflag)) + if (!up->pending || udp_test_bit(CORK, sk)) return; lock_sock(sk); - if (up->pending && !READ_ONCE(up->corkflag)) + if (up->pending && !udp_test_bit(CORK, sk)) udp_v6_push_pending_frames(sk); release_sock(sk); } -- cgit v1.2.3 From a0002127cd746fcaa182ad3386ef6931c37f3bda Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:22 +0000 Subject: udp: move udp->no_check6_tx to udp->udp_flags syzbot reported that udp->no_check6_tx can be read locklessly. Use one atomic bit from udp->udp_flags Fixes: 1c19448c9ba6 ("net: Make enabling of zero UDP6 csums more restrictive") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 10 +++++----- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/linux/udp.h b/include/linux/udp.h index 23f0693e0d9c..e3f2a6c7ac1d 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -34,6 +34,7 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) enum { UDP_FLAGS_CORK, /* Cork is required */ + UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ }; struct udp_sock { @@ -47,8 +48,7 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ + unsigned char no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ encap_enabled:1, /* This socket enabled encap * processing; UDP tunnels and * different encapsulation layer set @@ -115,7 +115,7 @@ struct udp_sock { static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_tx = val; + udp_assign_bit(NO_CHECK6_TX, sk, val); } static inline void udp_set_no_check6_rx(struct sock *sk, bool val) @@ -123,9 +123,9 @@ static inline void udp_set_no_check6_rx(struct sock *sk, bool val) udp_sk(sk)->no_check6_rx = val; } -static inline bool udp_get_no_check6_tx(struct sock *sk) +static inline bool udp_get_no_check6_tx(const struct sock *sk) { - return udp_sk(sk)->no_check6_tx; + return udp_test_bit(NO_CHECK6_TX, sk); } static inline bool udp_get_no_check6_rx(struct sock *sk) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9709f8a532dc..0c6998291c99 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2694,7 +2694,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, break; case UDP_NO_CHECK6_TX: - up->no_check6_tx = valbool; + udp_set_no_check6_tx(sk, valbool); break; case UDP_NO_CHECK6_RX: @@ -2791,7 +2791,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, break; case UDP_NO_CHECK6_TX: - val = up->no_check6_tx; + val = udp_get_no_check6_tx(sk); break; case UDP_NO_CHECK6_RX: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0c6973cd22ce..469df0ca561f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1241,7 +1241,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (udp_sk(sk)->no_check6_tx) { + if (udp_get_no_check6_tx(sk)) { kfree_skb(skb); return -EINVAL; } @@ -1262,7 +1262,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, if (is_udplite) csum = udplite_csum(skb); - else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */ + else if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ -- cgit v1.2.3 From bcbc1b1de884647aa0318bf74eb7f293d72a1e40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:23 +0000 Subject: udp: move udp->no_check6_rx to udp->udp_flags syzbot reported that udp->no_check6_rx can be read locklessly. Use one atomic bit from udp->udp_flags. Fixes: 1c19448c9ba6 ("net: Make enabling of zero UDP6 csums more restrictive") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 10 +++++----- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/linux/udp.h b/include/linux/udp.h index e3f2a6c7ac1d..8d4c3835b1b2 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -35,6 +35,7 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) enum { UDP_FLAGS_CORK, /* Cork is required */ UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ + UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ }; struct udp_sock { @@ -48,8 +49,7 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ - encap_enabled:1, /* This socket enabled encap + unsigned char encap_enabled:1, /* This socket enabled encap * processing; UDP tunnels and * different encapsulation layer set * this @@ -120,7 +120,7 @@ static inline void udp_set_no_check6_tx(struct sock *sk, bool val) static inline void udp_set_no_check6_rx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_rx = val; + udp_assign_bit(NO_CHECK6_RX, sk, val); } static inline bool udp_get_no_check6_tx(const struct sock *sk) @@ -128,9 +128,9 @@ static inline bool udp_get_no_check6_tx(const struct sock *sk) return udp_test_bit(NO_CHECK6_TX, sk); } -static inline bool udp_get_no_check6_rx(struct sock *sk) +static inline bool udp_get_no_check6_rx(const struct sock *sk) { - return udp_sk(sk)->no_check6_rx; + return udp_test_bit(NO_CHECK6_RX, sk); } static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0c6998291c99..cb32826a1db2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2698,7 +2698,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, break; case UDP_NO_CHECK6_RX: - up->no_check6_rx = valbool; + udp_set_no_check6_rx(sk, valbool); break; case UDP_SEGMENT: @@ -2795,7 +2795,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, break; case UDP_NO_CHECK6_RX: - val = up->no_check6_rx; + val = udp_get_no_check6_rx(sk); break; case UDP_SEGMENT: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 469df0ca561f..6e1ea3029260 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -858,7 +858,7 @@ start_lookup: /* If zero checksum and no_check is not on for * the socket then skip it. */ - if (!uh->check && !udp_sk(sk)->no_check6_rx) + if (!uh->check && !udp_get_no_check6_rx(sk)) continue; if (!first) { first = sk; @@ -980,7 +980,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp6_sk_rx_dst_set(sk, dst); - if (!uh->check && !udp_sk(sk)->no_check6_rx) { + if (!uh->check && !udp_get_no_check6_rx(sk)) { if (refcounted) sock_put(sk); goto report_csum_error; @@ -1002,7 +1002,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, /* Unicast */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk) { - if (!uh->check && !udp_sk(sk)->no_check6_rx) + if (!uh->check && !udp_get_no_check6_rx(sk)) goto report_csum_error; return udp6_unicast_rcv_skb(sk, skb, uh); } -- cgit v1.2.3 From e1dc0615c6b08ef36414f08c011965b8fb56198b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:24 +0000 Subject: udp: move udp->gro_enabled to udp->udp_flags syzbot reported that udp->gro_enabled can be read locklessly. Use one atomic bit from udp->udp_flags. Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 2 +- net/ipv4/udp.c | 6 +++--- net/ipv4/udp_offload.c | 4 ++-- net/ipv6/udp.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/linux/udp.h b/include/linux/udp.h index 8d4c3835b1b2..b344bd2e41fc 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -36,6 +36,7 @@ enum { UDP_FLAGS_CORK, /* Cork is required */ UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ + UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ }; struct udp_sock { @@ -54,7 +55,6 @@ struct udp_sock { * different encapsulation layer set * this */ - gro_enabled:1, /* Request GRO aggregation */ accept_udp_l4:1, accept_udp_fraglist:1; /* indicator bits used by pcflag: */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cb32826a1db2..1debc10a0f02 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1868,7 +1868,7 @@ try_again: (struct sockaddr *)sin); } - if (udp_sk(sk)->gro_enabled) + if (udp_test_bit(GRO_ENABLED, sk)) udp_cmsg_recv(msg, sk, skb); if (inet_cmsg_flags(inet)) @@ -2713,7 +2713,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, /* when enabling GRO, accept the related GSO packet type */ if (valbool) udp_tunnel_encap_enable(sk->sk_socket); - up->gro_enabled = valbool; + udp_assign_bit(GRO_ENABLED, sk, valbool); up->accept_udp_l4 = valbool; release_sock(sk); break; @@ -2803,7 +2803,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, break; case UDP_GRO: - val = up->gro_enabled; + val = udp_test_bit(GRO_ENABLED, sk); break; /* The following two cannot be changed on UDP sockets, the return is diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0f46b3c2e4ac..6c95d28d0c4a 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -557,10 +557,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { if (skb->dev->features & NETIF_F_GRO_FRAGLIST) - NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1; + NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1; if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) || - (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) + (sk && udp_test_bit(GRO_ENABLED, sk)) || NAPI_GRO_CB(skb)->is_flist) return call_gro_receive(udp_gro_receive_segment, head, skb); /* no GRO, be sure flush the current packet */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6e1ea3029260..2c3281879b6d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -413,7 +413,7 @@ try_again: (struct sockaddr *)sin6); } - if (udp_sk(sk)->gro_enabled) + if (udp_test_bit(GRO_ENABLED, sk)) udp_cmsg_recv(msg, sk, skb); if (np->rxopt.all) -- cgit v1.2.3 From 6d5a12eb91224d707f8691dccb40a5719fe5466d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:25 +0000 Subject: udp: add missing WRITE_ONCE() around up->encap_rcv UDP_ENCAP_ESPINUDP_NON_IKE setsockopt() writes over up->encap_rcv while other cpus read it. Fixes: 067b207b281d ("[UDP]: Cleanup UDP encapsulation code") Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- net/ipv4/udp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1debc10a0f02..db43907b9a3e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2675,10 +2675,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case UDP_ENCAP_ESPINUDP_NON_IKE: #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv; + WRITE_ONCE(up->encap_rcv, + ipv6_stub->xfrm6_udp_encap_rcv); else #endif - up->encap_rcv = xfrm4_udp_encap_rcv; + WRITE_ONCE(up->encap_rcv, + xfrm4_udp_encap_rcv); #endif fallthrough; case UDP_ENCAP_L2TPINUDP: -- cgit v1.2.3 From f5f52f0884a595ff99ab1a608643fe4025fca2d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:26 +0000 Subject: udp: move udp->accept_udp_{l4|fraglist} to udp->udp_flags These are read locklessly, move them to udp_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 16 +++++++++------- net/ipv4/udp.c | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/linux/udp.h b/include/linux/udp.h index b344bd2e41fc..bb2b87adfbea 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -37,6 +37,8 @@ enum { UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ + UDP_FLAGS_ACCEPT_FRAGLIST, + UDP_FLAGS_ACCEPT_L4, }; struct udp_sock { @@ -50,13 +52,11 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char encap_enabled:1, /* This socket enabled encap + unsigned char encap_enabled:1; /* This socket enabled encap * processing; UDP tunnels and * different encapsulation layer set * this */ - accept_udp_l4:1, - accept_udp_fraglist:1; /* indicator bits used by pcflag: */ #define UDPLITE_BIT 0x1 /* set by udplite proto init function */ #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ @@ -149,10 +149,12 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) if (!skb_is_gso(skb)) return false; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + !udp_test_bit(ACCEPT_L4, sk)) return true; - if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist) + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && + !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; return false; @@ -160,8 +162,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) static inline void udp_allow_gso(struct sock *sk) { - udp_sk(sk)->accept_udp_l4 = 1; - udp_sk(sk)->accept_udp_fraglist = 1; + udp_set_bit(ACCEPT_L4, sk); + udp_set_bit(ACCEPT_FRAGLIST, sk); } #define udp_portaddr_for_each_entry(__sk, list) \ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index db43907b9a3e..75ba86a87bb6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2716,7 +2716,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, if (valbool) udp_tunnel_encap_enable(sk->sk_socket); udp_assign_bit(GRO_ENABLED, sk, valbool); - up->accept_udp_l4 = valbool; + udp_assign_bit(ACCEPT_L4, sk, valbool); release_sock(sk); break; -- cgit v1.2.3 From ac9a7f4ce5dda1472e8f44096f33066c6ec1a3b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:27 +0000 Subject: udp: lockless UDP_ENCAP_L2TPINUDP / UDP_GRO Move udp->encap_enabled to udp->udp_flags. Add udp_test_and_set_bit() helper to allow lockless udp_tunnel_encap_enable() implementation. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 9 ++++----- include/net/udp_tunnel.h | 9 +++------ net/ipv4/udp.c | 10 +++------- net/ipv4/udp_tunnel_core.c | 2 +- net/ipv6/udp.c | 2 +- 5 files changed, 12 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/linux/udp.h b/include/linux/udp.h index bb2b87adfbea..0cf83270a4a2 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -39,6 +39,7 @@ enum { UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ UDP_FLAGS_ACCEPT_FRAGLIST, UDP_FLAGS_ACCEPT_L4, + UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ }; struct udp_sock { @@ -52,11 +53,7 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char encap_enabled:1; /* This socket enabled encap - * processing; UDP tunnels and - * different encapsulation layer set - * this - */ + /* indicator bits used by pcflag: */ #define UDPLITE_BIT 0x1 /* set by udplite proto init function */ #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ @@ -104,6 +101,8 @@ struct udp_sock { test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_set_bit(nr, sk) \ set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_test_and_set_bit(nr, sk) \ + test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_clear_bit(nr, sk) \ clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_assign_bit(nr, sk, val) \ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 0ca9b7a11baf..29251c3519cf 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -174,16 +174,13 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) } #endif -static inline void udp_tunnel_encap_enable(struct socket *sock) +static inline void udp_tunnel_encap_enable(struct sock *sk) { - struct udp_sock *up = udp_sk(sock->sk); - - if (up->encap_enabled) + if (udp_test_and_set_bit(ENCAP_ENABLED, sk)) return; - up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) - if (sock->sk->sk_family == PF_INET6) + if (READ_ONCE(sk->sk_family) == PF_INET6) ipv6_stub->udpv6_encap_enable(); #endif udp_encap_enable(); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 75ba86a87bb6..637a4faf9aff 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2618,7 +2618,7 @@ void udp_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (up->encap_enabled) + if (udp_test_bit(ENCAP_ENABLED, sk)) static_branch_dec(&udp_encap_needed_key); } } @@ -2685,9 +2685,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, fallthrough; case UDP_ENCAP_L2TPINUDP: up->encap_type = val; - lock_sock(sk); - udp_tunnel_encap_enable(sk->sk_socket); - release_sock(sk); + udp_tunnel_encap_enable(sk); break; default: err = -ENOPROTOOPT; @@ -2710,14 +2708,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, break; case UDP_GRO: - lock_sock(sk); /* when enabling GRO, accept the related GSO packet type */ if (valbool) - udp_tunnel_encap_enable(sk->sk_socket); + udp_tunnel_encap_enable(sk); udp_assign_bit(GRO_ENABLED, sk, valbool); udp_assign_bit(ACCEPT_L4, sk, valbool); - release_sock(sk); break; /* diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 9b18f371af0d..1e7e4aecdc48 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -78,7 +78,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->gro_receive = cfg->gro_receive; udp_sk(sk)->gro_complete = cfg->gro_complete; - udp_tunnel_encap_enable(sock); + udp_tunnel_encap_enable(sk); } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2c3281879b6d..90688877e900 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1670,7 +1670,7 @@ void udpv6_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (up->encap_enabled) { + if (udp_test_bit(ENCAP_ENABLED, sk)) { static_branch_dec(&udpv6_encap_needed_key); udp_encap_disable(); } -- cgit v1.2.3 From 70a36f571362a8de8b8c02d21ae524fc776287f2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:28 +0000 Subject: udp: annotate data-races around udp->encap_type syzbot/KCSAN complained about UDP_ENCAP_L2TPINUDP setsockopt() racing. Add READ_ONCE()/WRITE_ONCE() to document races on this lockless field. syzbot report was: BUG: KCSAN: data-race in udp_lib_setsockopt / udp_lib_setsockopt read-write to 0xffff8881083603fa of 1 bytes by task 16557 on cpu 0: udp_lib_setsockopt+0x682/0x6c0 udp_setsockopt+0x73/0xa0 net/ipv4/udp.c:2779 sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697 __sys_setsockopt+0x1c9/0x230 net/socket.c:2263 __do_sys_setsockopt net/socket.c:2274 [inline] __se_sys_setsockopt net/socket.c:2271 [inline] __x64_sys_setsockopt+0x66/0x80 net/socket.c:2271 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read-write to 0xffff8881083603fa of 1 bytes by task 16554 on cpu 1: udp_lib_setsockopt+0x682/0x6c0 udp_setsockopt+0x73/0xa0 net/ipv4/udp.c:2779 sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697 __sys_setsockopt+0x1c9/0x230 net/socket.c:2263 __do_sys_setsockopt net/socket.c:2274 [inline] __se_sys_setsockopt net/socket.c:2271 [inline] __x64_sys_setsockopt+0x66/0x80 net/socket.c:2271 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x01 -> 0x05 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 16554 Comm: syz-executor.5 Not tainted 6.5.0-rc7-syzkaller-00004-gf7757129e3de #0 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- drivers/net/gtp.c | 4 ++-- net/ipv4/udp.c | 9 +++++---- net/ipv4/xfrm4_input.c | 4 ++-- net/ipv6/udp.c | 5 +++-- net/ipv6/xfrm6_input.c | 4 ++-- net/l2tp/l2tp_core.c | 6 +++--- 6 files changed, 17 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 144ec626230d..b3aa0c3d5826 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -630,7 +630,7 @@ static void __gtp_encap_destroy(struct sock *sk) gtp->sk0 = NULL; else gtp->sk1u = NULL; - udp_sk(sk)->encap_type = 0; + WRITE_ONCE(udp_sk(sk)->encap_type, 0); rcu_assign_sk_user_data(sk, NULL); release_sock(sk); sock_put(sk); @@ -682,7 +682,7 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb) netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk); - switch (udp_sk(sk)->encap_type) { + switch (READ_ONCE(udp_sk(sk)->encap_type)) { case UDP_ENCAP_GTP0: netdev_dbg(gtp->dev, "received GTP0 packet\n"); ret = gtp0_udp_encap_recv(gtp, skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 637a4faf9aff..2eeab4af17a1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -714,7 +714,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) iph->saddr, uh->source, skb->dev->ifindex, inet_sdif(skb), udptable, NULL); - if (!sk || udp_sk(sk)->encap_type) { + if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udp_encap_needed_key)) { sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb, @@ -2081,7 +2081,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) } nf_reset_ct(skb); - if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) { + if (static_branch_unlikely(&udp_encap_needed_key) && + READ_ONCE(up->encap_type)) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); /* @@ -2684,7 +2685,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, #endif fallthrough; case UDP_ENCAP_L2TPINUDP: - up->encap_type = val; + WRITE_ONCE(up->encap_type, val); udp_tunnel_encap_enable(sk); break; default: @@ -2785,7 +2786,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, break; case UDP_ENCAP: - val = up->encap_type; + val = READ_ONCE(up->encap_type); break; case UDP_NO_CHECK6_TX: diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index eac206a290d0..183f6dc37242 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -85,11 +85,11 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct udphdr *uh; struct iphdr *iph; int iphlen, len; - __u8 *udpdata; __be32 *udpdata32; - __u16 encap_type = up->encap_type; + u16 encap_type; + encap_type = READ_ONCE(up->encap_type); /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 90688877e900..0e79d189613b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -571,7 +571,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, NULL); - if (!sk || udp_sk(sk)->encap_type) { + if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udpv6_encap_needed_key)) { sk = __udp6_lib_err_encap(net, hdr, offset, uh, @@ -688,7 +688,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) } nf_reset_ct(skb); - if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) { + if (static_branch_unlikely(&udpv6_encap_needed_key) && + READ_ONCE(up->encap_type)) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); /* diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4907ab241d6b..4156387248e4 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -81,14 +81,14 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct ipv6hdr *ip6h; int len; int ip6hlen = sizeof(struct ipv6hdr); - __u8 *udpdata; __be32 *udpdata32; - __u16 encap_type = up->encap_type; + u16 encap_type; if (skb->protocol == htons(ETH_P_IP)) return xfrm4_udp_encap_rcv(sk, skb); + encap_type = READ_ONCE(up->encap_type); /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 03608d3ded4b..8d21ff25f160 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1139,9 +1139,9 @@ static void l2tp_tunnel_destruct(struct sock *sk) switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: /* No longer an encapsulation socket. See net/ipv4/udp.c */ - (udp_sk(sk))->encap_type = 0; - (udp_sk(sk))->encap_rcv = NULL; - (udp_sk(sk))->encap_destroy = NULL; + WRITE_ONCE(udp_sk(sk)->encap_type, 0); + udp_sk(sk)->encap_rcv = NULL; + udp_sk(sk)->encap_destroy = NULL; break; case L2TP_ENCAPTYPE_IP: break; -- cgit v1.2.3 From 729549aa350c56a777bb342941ed4d69b6585769 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:29 +0000 Subject: udplite: remove UDPLITE_BIT This flag is set but never read, we can remove it. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 5 ++--- net/ipv4/udplite.c | 1 - net/ipv6/udplite.c | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/udp.h b/include/linux/udp.h index 0cf83270a4a2..58156edec009 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -55,9 +55,8 @@ struct udp_sock { __u8 encap_type; /* Is this an Encapsulation socket? */ /* indicator bits used by pcflag: */ -#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ -#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ +#define UDPLITE_SEND_CC 0x1 /* set via udplite setsockopt */ +#define UDPLITE_RECV_CC 0x2 /* set via udplite setsocktopt */ __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ /* * Following member retains the information to create a UDP header diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 39ecdad1b50c..af37af3ab727 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -21,7 +21,6 @@ EXPORT_SYMBOL(udplite_table); static int udplite_sk_init(struct sock *sk) { udp_init_sock(sk); - udp_sk(sk)->pcflag = UDPLITE_BIT; pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); return 0; diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 267d491e9707..a60bec9b14f1 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -17,7 +17,6 @@ static int udplitev6_sk_init(struct sock *sk) { udpv6_init_sock(sk); - udp_sk(sk)->pcflag = UDPLITE_BIT; pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); return 0; -- cgit v1.2.3 From 882af43a0fc37e26d85fb0df0c9edd3bed928de4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:30 +0000 Subject: udplite: fix various data-races udp->pcflag, udp->pcslen and udp->pcrlen reads/writes are racy. Move udp->pcflag to udp->udp_flags for atomicity, and add READ_ONCE()/WRITE_ONCE() annotations for pcslen and pcrlen. Fixes: ba4e58eca8aa ("[NET]: Supporting UDP-Lite (RFC 3828) in Linux") Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 6 ++---- include/net/udplite.h | 14 +++++++++----- net/ipv4/udp.c | 21 +++++++++++---------- net/ipv6/udp.c | 9 +++++---- 4 files changed, 27 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/include/linux/udp.h b/include/linux/udp.h index 58156edec009..d04188714dca 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -40,6 +40,8 @@ enum { UDP_FLAGS_ACCEPT_FRAGLIST, UDP_FLAGS_ACCEPT_L4, UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ + UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */ + UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */ }; struct udp_sock { @@ -54,10 +56,6 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ -/* indicator bits used by pcflag: */ -#define UDPLITE_SEND_CC 0x1 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x2 /* set via udplite setsocktopt */ - __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. diff --git a/include/net/udplite.h b/include/net/udplite.h index bd33ff2b8f42..786919d29f8d 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -66,14 +66,18 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) /* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { - const struct udp_sock *up = udp_sk(skb->sk); const int off = skb_transport_offset(skb); + const struct sock *sk = skb->sk; int len = skb->len - off; - if ((up->pcflag & UDPLITE_SEND_CC) && up->pcslen < len) { - if (0 < up->pcslen) - len = up->pcslen; - udp_hdr(skb)->len = htons(up->pcslen); + if (udp_test_bit(UDPLITE_SEND_CC, sk)) { + u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen); + + if (pcslen < len) { + if (pcslen > 0) + len = pcslen; + udp_hdr(skb)->len = htons(pcslen); + } } skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2eeab4af17a1..c3ff984b6354 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2120,7 +2120,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets */ - if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) { + u16 pcrlen = READ_ONCE(up->pcrlen); /* * MIB statistics other than incrementing the error count are @@ -2133,7 +2134,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) * delivery of packets with coverage values less than a value * provided by the application." */ - if (up->pcrlen == 0) { /* full coverage was set */ + if (pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n", UDP_SKB_CB(skb)->cscov, skb->len); goto drop; @@ -2144,9 +2145,9 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) * that it wants x while sender emits packets of smaller size y. * Therefore the above ...()->partial_cov statement is essential. */ - if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + if (UDP_SKB_CB(skb)->cscov < pcrlen) { net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + UDP_SKB_CB(skb)->cscov, pcrlen); goto drop; } } @@ -2729,8 +2730,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, val = 8; else if (val > USHRT_MAX) val = USHRT_MAX; - up->pcslen = val; - up->pcflag |= UDPLITE_SEND_CC; + WRITE_ONCE(up->pcslen, val); + udp_set_bit(UDPLITE_SEND_CC, sk); break; /* The receiver specifies a minimum checksum coverage value. To make @@ -2743,8 +2744,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, val = 8; else if (val > USHRT_MAX) val = USHRT_MAX; - up->pcrlen = val; - up->pcflag |= UDPLITE_RECV_CC; + WRITE_ONCE(up->pcrlen, val); + udp_set_bit(UDPLITE_RECV_CC, sk); break; default: @@ -2808,11 +2809,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: - val = up->pcslen; + val = READ_ONCE(up->pcslen); break; case UDPLITE_RECV_CSCOV: - val = up->pcrlen; + val = READ_ONCE(up->pcrlen); break; default: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0e79d189613b..f60ba4295435 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -727,16 +727,17 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ - if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) { + u16 pcrlen = READ_ONCE(up->pcrlen); - if (up->pcrlen == 0) { /* full coverage was set */ + if (pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } - if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + if (UDP_SKB_CB(skb)->cscov < pcrlen) { net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + UDP_SKB_CB(skb)->cscov, pcrlen); goto drop; } } -- cgit v1.2.3 From a613ed1afd9696ab6d235c0dab5f557863b94bc8 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Tue, 12 Sep 2023 08:42:34 +0000 Subject: ipv4: igmp: Remove redundant comparison in igmp_mcf_get_next() The 'state->im' value will always be non-zero after the 'while' statement, so the check can be removed. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Gavrilov Ilia Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230912084039.1501984-1-Ilia.Gavrilov@infotecs.ru Signed-off-by: Paolo Abeni --- net/ipv4/igmp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 418e5fb58fd3..76c3ea75b8dd 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2944,8 +2944,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l continue; state->im = rcu_dereference(state->idev->mc_list); } - if (!state->im) - break; spin_lock_bh(&state->im->lock); psf = state->im->sources; } -- cgit v1.2.3 From 59bb1d698028d7f01650f9def579efdfdfb7039f Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Tue, 12 Sep 2023 08:42:49 +0000 Subject: ipv6: mcast: Remove redundant comparison in igmp6_mcf_get_next() The 'state->im' value will always be non-zero after the 'while' statement, so the check can be removed. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Gavrilov Ilia Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230912084100.1502379-1-Ilia.Gavrilov@infotecs.ru Signed-off-by: Paolo Abeni --- net/ipv6/mcast.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 5ce25bcb9974..421264a69e97 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -3011,8 +3011,6 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s continue; state->im = rcu_dereference(state->idev->mc_list); } - if (!state->im) - break; psf = rcu_dereference(state->im->mca_sources); } out: -- cgit v1.2.3 From b0adfba7ee770fef20b1b6d86706c28f7fccfb07 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:01:59 +0000 Subject: ipv6: lockless IPV6_UNICAST_HOPS implementation Some np->hop_limit accesses are racy, when socket lock is not held. Add missing annotations and switch to full lockless implementation. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 12 +----------- include/net/ipv6.h | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/ipv6_sockglue.c | 20 +++++++++++--------- net/ipv6/mcast.c | 2 +- net/ipv6/ndisc.c | 2 +- 6 files changed, 16 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index af8a771a053c..c2e087071384 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -213,17 +213,7 @@ struct ipv6_pinfo { __be32 flow_label; __u32 frag_size; - /* - * Packed in 16bits. - * Omit one shift by putting the signed field at MSB. - */ -#if defined(__BIG_ENDIAN_BITFIELD) - __s16 hop_limit:9; - __u16 __unused_1:7; -#else - __u16 __unused_1:7; - __s16 hop_limit:9; -#endif + s16 hop_limit; #if defined(__BIG_ENDIAN_BITFIELD) /* Packed in 16bits. */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c6932d1a3fa8..2e8e7e31e02e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -916,7 +916,7 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, if (ipv6_addr_is_multicast(&fl6->daddr)) hlimit = np->mcast_hops; else - hlimit = np->hop_limit; + hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); return hlimit; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 54fc4c711f2c..1e16d56d8c38 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -309,7 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * Fill in the IPv6 header */ if (np) - hlimit = np->hop_limit; + hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0e2a0847b387..f27993a1470d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -415,6 +415,16 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); + /* Handle options that can be set without locking the socket. */ + switch (optname) { + case IPV6_UNICAST_HOPS: + if (optlen < sizeof(int)) + return -EINVAL; + if (val > 255 || val < -1) + return -EINVAL; + WRITE_ONCE(np->hop_limit, val); + return 0; + } if (needs_rtnl) rtnl_lock(); sockopt_lock_sock(sk); @@ -733,14 +743,6 @@ done: } break; } - case IPV6_UNICAST_HOPS: - if (optlen < sizeof(int)) - goto e_inval; - if (val > 255 || val < -1) - goto e_inval; - np->hop_limit = val; - retv = 0; - break; case IPV6_MULTICAST_HOPS: if (sk->sk_type == SOCK_STREAM) @@ -1347,7 +1349,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct dst_entry *dst; if (optname == IPV6_UNICAST_HOPS) - val = np->hop_limit; + val = READ_ONCE(np->hop_limit); else val = np->mcast_hops; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 421264a69e97..4a7967623909 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1716,7 +1716,7 @@ static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb, hdr->payload_len = htons(len); hdr->nexthdr = proto; - hdr->hop_limit = inet6_sk(sk)->hop_limit; + hdr->hop_limit = READ_ONCE(inet6_sk(sk)->hop_limit); hdr->saddr = *saddr; hdr->daddr = *daddr; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 553c8664e0a7..b554fd40bdc3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -500,7 +500,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, csum_partial(icmp6h, skb->len, 0)); - ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len); + ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); rcu_read_lock(); idev = __in6_dev_get(dst->dev); -- cgit v1.2.3 From d986f52124e062753e33b6fe303be5904a997eac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:00 +0000 Subject: ipv6: lockless IPV6_MULTICAST_LOOP implementation Add inet6_{test|set|clear|assign}_bit() helpers. Note that I am using bits from inet->inet_flags, this might change in the future if we need more flags. While solving data-races accessing np->mc_loop, this patch also allows to implement lockless accesses to np->mcast_hops in the following patch. Also constify sk_mc_loop() argument. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 18 ++++++++++++++---- include/net/inet_sock.h | 1 + include/net/sock.h | 2 +- net/core/sock.c | 4 ++-- net/ipv6/af_inet6.c | 2 +- net/ipv6/ipv6_sockglue.c | 18 ++++++++---------- net/ipv6/ndisc.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 8 ++------ 8 files changed, 30 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c2e087071384..68cf1ca94914 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -218,11 +218,9 @@ struct ipv6_pinfo { #if defined(__BIG_ENDIAN_BITFIELD) /* Packed in 16bits. */ __s16 mcast_hops:9; - __u16 __unused_2:6, - mc_loop:1; + __u16 __unused_2:7, #else - __u16 mc_loop:1, - __unused_2:6; + __u16 __unused_2:7; __s16 mcast_hops:9; #endif int ucast_oif; @@ -283,6 +281,18 @@ struct ipv6_pinfo { struct inet6_cork cork; }; +/* We currently use available bits from inet_sk(sk)->inet_flags, + * this could change in the future. + */ +#define inet6_test_bit(nr, sk) \ + test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_set_bit(nr, sk) \ + set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_clear_bit(nr, sk) \ + clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_assign_bit(nr, sk, val) \ + assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) + /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ struct raw6_sock { /* inet_sock has to be the first member of raw6_sock */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 2de0e4d4a027..b5a9dca92fb4 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -268,6 +268,7 @@ enum { INET_FLAGS_NODEFRAG = 17, INET_FLAGS_BIND_ADDRESS_NO_PORT = 18, INET_FLAGS_DEFER_CONNECT = 19, + INET_FLAGS_MC6_LOOP = 20, }; /* cmsg flags for inet */ diff --git a/include/net/sock.h b/include/net/sock.h index 676146e9d181..56ac1abadea5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2238,7 +2238,7 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) } } -bool sk_mc_loop(struct sock *sk); +bool sk_mc_loop(const struct sock *sk); static inline bool sk_can_gso(const struct sock *sk) { diff --git a/net/core/sock.c b/net/core/sock.c index bb89b88bc1e8..213a62ac13f2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -759,7 +759,7 @@ out: return ret; } -bool sk_mc_loop(struct sock *sk) +bool sk_mc_loop(const struct sock *sk) { if (dev_recursion_level()) return false; @@ -771,7 +771,7 @@ bool sk_mc_loop(struct sock *sk) return inet_test_bit(MC_LOOP, sk); #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - return inet6_sk(sk)->mc_loop; + return inet6_test_bit(MC6_LOOP, sk); #endif } WARN_ON_ONCE(1); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 368824fe9719..bbd4aa1b96d0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -217,7 +217,7 @@ lookup_protocol: inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk); np->hop_limit = -1; np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; - np->mc_loop = 1; + inet6_set_bit(MC6_LOOP, sk); np->mc_all = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index f27993a1470d..755fac85a120 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -424,6 +424,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; WRITE_ONCE(np->hop_limit, val); return 0; + case IPV6_MULTICAST_LOOP: + if (optlen < sizeof(int)) + return -EINVAL; + if (val != valbool) + return -EINVAL; + inet6_assign_bit(MC6_LOOP, sk, valbool); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -755,15 +762,6 @@ done: retv = 0; break; - case IPV6_MULTICAST_LOOP: - if (optlen < sizeof(int)) - goto e_inval; - if (val != valbool) - goto e_inval; - np->mc_loop = valbool; - retv = 0; - break; - case IPV6_UNICAST_IF: { struct net_device *dev = NULL; @@ -1367,7 +1365,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_MULTICAST_LOOP: - val = np->mc_loop; + val = inet6_test_bit(MC6_LOOP, sk); break; case IPV6_MULTICAST_IF: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b554fd40bdc3..679443d7ecb5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1996,7 +1996,7 @@ static int __net_init ndisc_net_init(struct net *net) np = inet6_sk(sk); np->hop_limit = 255; /* Do not loopback ndisc messages */ - np->mc_loop = 0; + inet6_clear_bit(MC6_LOOP, sk); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index da5af28ff57b..3c2251cabd04 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1298,17 +1298,13 @@ static void set_sock_size(struct sock *sk, int mode, int val) static void set_mcast_loop(struct sock *sk, u_char loop) { /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ - lock_sock(sk); inet_assign_bit(MC_LOOP, sk, loop); #ifdef CONFIG_IP_VS_IPV6 - if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - + if (READ_ONCE(sk->sk_family) == AF_INET6) { /* IPV6_MULTICAST_LOOP */ - np->mc_loop = loop ? 1 : 0; + inet6_assign_bit(MC6_LOOP, sk, loop); } #endif - release_sock(sk); } /* -- cgit v1.2.3 From 2da23eb07c91241d962f3ff05565065484cd8929 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:01 +0000 Subject: ipv6: lockless IPV6_MULTICAST_HOPS implementation This fixes data-races around np->mcast_hops, and make IPV6_MULTICAST_HOPS lockless. Note that np->mcast_hops is never negative, thus can fit an u8 field instead of s16. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 9 +-------- include/net/ipv6.h | 2 +- net/dccp/ipv6.c | 2 +- net/ipv6/ipv6_sockglue.c | 28 +++++++++++++++------------- net/ipv6/tcp_ipv6.c | 3 ++- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 6 files changed, 21 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 68cf1ca94914..9cc278b5e4f4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -214,15 +214,8 @@ struct ipv6_pinfo { __u32 frag_size; s16 hop_limit; + u8 mcast_hops; -#if defined(__BIG_ENDIAN_BITFIELD) - /* Packed in 16bits. */ - __s16 mcast_hops:9; - __u16 __unused_2:7, -#else - __u16 __unused_2:7; - __s16 mcast_hops:9; -#endif int ucast_oif; int mcast_oif; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2e8e7e31e02e..8a04a8985336 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -914,7 +914,7 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, int hlimit; if (ipv6_addr_is_multicast(&fl6->daddr)) - hlimit = np->mcast_hops; + hlimit = READ_ONCE(np->mcast_hops); else hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 33f6ccf6ba77..83617a16b98e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -676,7 +676,7 @@ ipv6_pktoptions: if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) - np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); if (np->repflow) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 755fac85a120..5fff19a87c75 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -431,6 +431,16 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet6_assign_bit(MC6_LOOP, sk, valbool); return 0; + case IPV6_MULTICAST_HOPS: + if (sk->sk_type == SOCK_STREAM) + return retv; + if (optlen < sizeof(int)) + return -EINVAL; + if (val > 255 || val < -1) + return -EINVAL; + WRITE_ONCE(np->mcast_hops, + val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -751,16 +761,6 @@ done: break; } - case IPV6_MULTICAST_HOPS: - if (sk->sk_type == SOCK_STREAM) - break; - if (optlen < sizeof(int)) - goto e_inval; - if (val > 255 || val < -1) - goto e_inval; - np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); - retv = 0; - break; case IPV6_UNICAST_IF: { @@ -1180,7 +1180,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { - int hlim = np->mcast_hops; + int hlim = READ_ONCE(np->mcast_hops); + put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxtclass) { @@ -1197,7 +1198,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { - int hlim = np->mcast_hops; + int hlim = READ_ONCE(np->mcast_hops); + put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxflow) { @@ -1349,7 +1351,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_UNICAST_HOPS) val = READ_ONCE(np->hop_limit); else - val = np->mcast_hops; + val = READ_ONCE(np->mcast_hops); if (val < 0) { rcu_read_lock(); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3a88545a265d..54db5fab318b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1542,7 +1542,8 @@ ipv6_pktoptions: if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = tcp_v6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) - np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + WRITE_ONCE(np->mcast_hops, + ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); if (np->repflow) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3c2251cabd04..df1b33b61059 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1322,7 +1322,7 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl) struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MULTICAST_HOPS */ - np->mcast_hops = ttl; + WRITE_ONCE(np->mcast_hops, ttl); } #endif release_sock(sk); -- cgit v1.2.3 From 15f926c4457aa65b1ac83bda1bbdcaad3f48e4e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:02 +0000 Subject: ipv6: lockless IPV6_MTU implementation np->frag_size can be read/written without holding socket lock. Add missing annotations and make IPV6_MTU setsockopt() lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 19 +++++++++++-------- net/ipv6/ipv6_sockglue.c | 15 +++++++-------- 2 files changed, 18 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1e16d56d8c38..ab7ede4a731a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -881,9 +881,11 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, mtu = IPV6_MIN_MTU; } - if (np && np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; + if (np) { + u32 frag_size = READ_ONCE(np->frag_size); + + if (frag_size && frag_size < mtu) + mtu = frag_size; } if (mtu < hlen + sizeof(struct frag_hdr) + 8) goto fail_toobig; @@ -1392,7 +1394,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, struct rt6_info *rt) { struct ipv6_pinfo *np = inet6_sk(sk); - unsigned int mtu; + unsigned int mtu, frag_size; struct ipv6_txoptions *nopt, *opt = ipc6->opt; /* callers pass dst together with a reference, set it first so @@ -1441,10 +1443,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, else mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); - if (np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; - } + + frag_size = READ_ONCE(np->frag_size); + if (frag_size && frag_size < mtu) + mtu = frag_size; + cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 5fff19a87c75..3b2a34828daa 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -441,6 +441,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, WRITE_ONCE(np->mcast_hops, val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); return 0; + case IPV6_MTU: + if (optlen < sizeof(int)) + return -EINVAL; + if (val && val < IPV6_MIN_MTU) + return -EINVAL; + WRITE_ONCE(np->frag_size, val); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -910,14 +917,6 @@ done: np->pmtudisc = val; retv = 0; break; - case IPV6_MTU: - if (optlen < sizeof(int)) - goto e_inval; - if (val && val < IPV6_MIN_MTU) - goto e_inval; - np->frag_size = val; - retv = 0; - break; case IPV6_RECVERR: if (optlen < sizeof(int)) goto e_inval; -- cgit v1.2.3 From 273784d3c5741522199011772651dbb50db8c810 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:03 +0000 Subject: ipv6: lockless IPV6_MINHOPCOUNT implementation Add one missing READ_ONCE() annotation in do_ipv6_getsockopt() and make IPV6_MINHOPCOUNT setsockopt() lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3b2a34828daa..bbc8a009e05d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -448,6 +448,20 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; WRITE_ONCE(np->frag_size, val); return 0; + case IPV6_MINHOPCOUNT: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < 0 || val > 255) + return -EINVAL; + + if (val) + static_branch_enable(&ip6_min_hopcount); + + /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount + * while we are changing it. + */ + WRITE_ONCE(np->min_hopcount, val); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -947,21 +961,6 @@ done: goto e_inval; retv = __ip6_sock_set_addr_preferences(sk, val); break; - case IPV6_MINHOPCOUNT: - if (optlen < sizeof(int)) - goto e_inval; - if (val < 0 || val > 255) - goto e_inval; - - if (val) - static_branch_enable(&ip6_min_hopcount); - - /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount - * while we are changing it. - */ - WRITE_ONCE(np->min_hopcount, val); - retv = 0; - break; case IPV6_DONTFRAG: np->dontfrag = valbool; retv = 0; @@ -1443,7 +1442,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MINHOPCOUNT: - val = np->min_hopcount; + val = READ_ONCE(np->min_hopcount); break; case IPV6_DONTFRAG: -- cgit v1.2.3 From dcae74622c051b219ee628669a31716473efda2c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:04 +0000 Subject: ipv6: lockless IPV6_RECVERR_RFC4884 implementation Move np->recverr_rfc4884 to an atomic flag to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - include/net/inet_sock.h | 1 + net/ipv6/datagram.c | 2 +- net/ipv6/ipv6_sockglue.c | 17 ++++++++--------- 4 files changed, 10 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 9cc278b5e4f4..0d2b0a1b2dae 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -256,7 +256,6 @@ struct ipv6_pinfo { autoflowlabel:1, autoflowlabel_set:1, mc_all:1, - recverr_rfc4884:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b5a9dca92fb4..8cf1f7b44234 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -269,6 +269,7 @@ enum { INET_FLAGS_BIND_ADDRESS_NO_PORT = 18, INET_FLAGS_DEFER_CONNECT = 19, INET_FLAGS_MC6_LOOP = 20, + INET_FLAGS_RECVERR6_RFC4884 = 21, }; /* cmsg flags for inet */ diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 41ebc4e57473..e81892814935 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -332,7 +332,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __skb_pull(skb, payload - skb->data); - if (inet6_sk(sk)->recverr_rfc4884) + if (inet6_test_bit(RECVERR6_RFC4884, sk)) ipv6_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); skb_reset_transport_header(skb); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index bbc8a009e05d..b65e73ac2ccd 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -462,6 +462,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, */ WRITE_ONCE(np->min_hopcount, val); return 0; + case IPV6_RECVERR_RFC4884: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < 0 || val > 1) + return -EINVAL; + inet6_assign_bit(RECVERR6_RFC4884, sk, valbool); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -974,14 +981,6 @@ done: np->rxopt.bits.recvfragsize = valbool; retv = 0; break; - case IPV6_RECVERR_RFC4884: - if (optlen < sizeof(int)) - goto e_inval; - if (val < 0 || val > 1) - goto e_inval; - np->recverr_rfc4884 = valbool; - retv = 0; - break; } unlock: @@ -1462,7 +1461,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_RECVERR_RFC4884: - val = np->recverr_rfc4884; + val = inet6_test_bit(RECVERR6_RFC4884, sk); break; default: -- cgit v1.2.3 From 6559c0ff3bc27d7e4d447d31c1d7e8eae0e959f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:05 +0000 Subject: ipv6: lockless IPV6_MULTICAST_ALL implementation Move np->mc_all to an atomic flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - include/net/inet_sock.h | 1 + net/ipv6/af_inet6.c | 2 +- net/ipv6/ipv6_sockglue.c | 14 ++++++-------- net/ipv6/mcast.c | 2 +- 5 files changed, 9 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0d2b0a1b2dae..d88e91b7f0a3 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -255,7 +255,6 @@ struct ipv6_pinfo { dontfrag:1, autoflowlabel:1, autoflowlabel_set:1, - mc_all:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 8cf1f7b44234..97e70a97dae8 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -270,6 +270,7 @@ enum { INET_FLAGS_DEFER_CONNECT = 19, INET_FLAGS_MC6_LOOP = 20, INET_FLAGS_RECVERR6_RFC4884 = 21, + INET_FLAGS_MC6_ALL = 22, }; /* cmsg flags for inet */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bbd4aa1b96d0..372fb7b9112c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -218,7 +218,7 @@ lookup_protocol: np->hop_limit = -1; np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; inet6_set_bit(MC6_LOOP, sk); - np->mc_all = 1; + inet6_set_bit(MC6_ALL, sk); np->pmtudisc = IPV6_PMTUDISC_WANT; np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED; sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index b65e73ac2ccd..7a181831f226 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -469,6 +469,11 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet6_assign_bit(RECVERR6_RFC4884, sk, valbool); return 0; + case IPV6_MULTICAST_ALL: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(MC6_ALL, sk, valbool); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -890,13 +895,6 @@ done: retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); break; } - case IPV6_MULTICAST_ALL: - if (optlen < sizeof(int)) - goto e_inval; - np->mc_all = valbool; - retv = 0; - break; - case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: if (in_compat_syscall()) @@ -1372,7 +1370,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MULTICAST_ALL: - val = np->mc_all; + val = inet6_test_bit(MC6_ALL, sk); break; case IPV6_UNICAST_IF: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 4a7967623909..99e28b444a4c 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -642,7 +642,7 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, } if (!mc) { rcu_read_unlock(); - return np->mc_all; + return inet6_test_bit(MC6_ALL, sk); } psl = rcu_dereference(mc->sflist); if (!psl) { -- cgit v1.2.3 From 5121516b0c4736b7977d977b239e36d23ec64401 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:06 +0000 Subject: ipv6: lockless IPV6_AUTOFLOWLABEL implementation Move np->autoflowlabel and np->autoflowlabel_set in inet->inet_flags, to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 -- include/net/inet_sock.h | 2 ++ include/net/ipv6.h | 2 +- net/ipv6/ip6_output.c | 12 +++++------- net/ipv6/ipv6_sockglue.c | 11 +++++------ 5 files changed, 13 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index d88e91b7f0a3..e3be5dc21b7d 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -253,8 +253,6 @@ struct ipv6_pinfo { * 100: prefer care-of address */ dontfrag:1, - autoflowlabel:1, - autoflowlabel_set:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 97e70a97dae8..f1af64a40673 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -271,6 +271,8 @@ enum { INET_FLAGS_MC6_LOOP = 20, INET_FLAGS_RECVERR6_RFC4884 = 21, INET_FLAGS_MC6_ALL = 22, + INET_FLAGS_AUTOFLOWLABEL_SET = 23, + INET_FLAGS_AUTOFLOWLABEL = 24, }; /* cmsg flags for inet */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8a04a8985336..4b6cbec059e2 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -428,7 +428,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); -bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); +bool ip6_autoflowlabel(struct net *net, const struct sock *sk); static inline void fl6_sock_release(struct ip6_flowlabel *fl) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ab7ede4a731a..47aa42f93ccd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -232,12 +232,11 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(ip6_output); -bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +bool ip6_autoflowlabel(struct net *net, const struct sock *sk) { - if (!np->autoflowlabel_set) + if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk)) return ip6_default_np_autolabel(net); - else - return np->autoflowlabel; + return inet6_test_bit(AUTOFLOWLABEL, sk); } /* @@ -314,7 +313,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - ip6_autoflowlabel(net, np), fl6)); + ip6_autoflowlabel(net, sk), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -1938,7 +1937,6 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr *final_dst; - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = v6_cork->opt; @@ -1981,7 +1979,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - ip6_autoflowlabel(net, np), fl6)); + ip6_autoflowlabel(net, sk), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 7a181831f226..d5d428a695f7 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -474,6 +474,10 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet6_assign_bit(MC6_ALL, sk, valbool); return 0; + case IPV6_AUTOFLOWLABEL: + inet6_assign_bit(AUTOFLOWLABEL, sk, valbool); + inet6_set_bit(AUTOFLOWLABEL_SET, sk); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -970,11 +974,6 @@ done: np->dontfrag = valbool; retv = 0; break; - case IPV6_AUTOFLOWLABEL: - np->autoflowlabel = valbool; - np->autoflowlabel_set = 1; - retv = 0; - break; case IPV6_RECVFRAGSIZE: np->rxopt.bits.recvfragsize = valbool; retv = 0; @@ -1447,7 +1446,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_AUTOFLOWLABEL: - val = ip6_autoflowlabel(sock_net(sk), np); + val = ip6_autoflowlabel(sock_net(sk), sk); break; case IPV6_RECVFRAGSIZE: -- cgit v1.2.3 From 1086ca7cce292bb498d7f8f85f4593c9ef4902b7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:07 +0000 Subject: ipv6: lockless IPV6_DONTFRAG implementation Move np->dontfrag flag to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - include/net/inet_sock.h | 1 + include/net/ipv6.h | 6 +++--- include/net/xfrm.h | 2 +- net/ipv6/icmp.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- net/ipv6/ipv6_sockglue.c | 9 ++++----- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- 11 files changed, 16 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e3be5dc21b7d..57d563f1d4b1 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -252,7 +252,6 @@ struct ipv6_pinfo { * 010: prefer public address * 100: prefer care-of address */ - dontfrag:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index f1af64a40673..ac75324e9e1e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -273,6 +273,7 @@ enum { INET_FLAGS_MC6_ALL = 22, INET_FLAGS_AUTOFLOWLABEL_SET = 23, INET_FLAGS_AUTOFLOWLABEL = 24, + INET_FLAGS_DONTFRAG = 25, }; /* cmsg flags for inet */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 4b6cbec059e2..5a1f2993680d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -373,12 +373,12 @@ static inline void ipcm6_init(struct ipcm6_cookie *ipc6) } static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6, - const struct ipv6_pinfo *np) + const struct sock *sk) { *ipc6 = (struct ipcm6_cookie) { .hlimit = -1, - .tclass = np->tclass, - .dontfrag = np->dontfrag, + .tclass = inet6_sk(sk)->tclass, + .dontfrag = inet6_test_bit(DONTFRAG, sk), }; } diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 363c7d510554..98d7aa78adda 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -2166,7 +2166,7 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk) proto = sk->sk_protocol; if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) - return inet6_sk(sk)->dontfrag; + return inet6_test_bit(DONTFRAG, sk); return false; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 93a594a901d1..8fb4a791881a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -588,7 +588,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.sockc.mark = mark; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); @@ -791,7 +791,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) msg.offset = 0; msg.type = type; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); ipc6.sockc.mark = mark; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 47aa42f93ccd..8851fe5d45a0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -2092,7 +2092,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, return ERR_PTR(err); } if (ipc6->dontfrag < 0) - ipc6->dontfrag = inet6_sk(sk)->dontfrag; + ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d5d428a695f7..33dd4dd872e6 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -478,6 +478,9 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, inet6_assign_bit(AUTOFLOWLABEL, sk, valbool); inet6_set_bit(AUTOFLOWLABEL_SET, sk); return 0; + case IPV6_DONTFRAG: + inet6_assign_bit(DONTFRAG, sk, valbool); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -970,10 +973,6 @@ done: goto e_inval; retv = __ip6_sock_set_addr_preferences(sk, val); break; - case IPV6_DONTFRAG: - np->dontfrag = valbool; - retv = 0; - break; case IPV6_RECVFRAGSIZE: np->rxopt.bits.recvfragsize = valbool; retv = 0; @@ -1442,7 +1441,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_DONTFRAG: - val = np->dontfrag; + val = inet6_test_bit(DONTFRAG, sk); break; case IPV6_AUTOFLOWLABEL: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 5831aaa53d75..4444b61eb23b 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -118,7 +118,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if)) return -EINVAL; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 42fcec3ecf5e..cc9673c1809f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -898,7 +898,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f60ba4295435..e4301500741a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1595,7 +1595,7 @@ back_from_confirm: do_append_data: if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, fl6, (struct rt6_info *)dst, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ed8ebb6f5909..40af2431e73a 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -621,7 +621,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; -- cgit v1.2.3 From 3fa29971c69519629370b119b0b618ee88ade6b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:08 +0000 Subject: ipv6: lockless IPV6_RECVERR implemetation np->recverr is moved to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +-- include/net/inet_sock.h | 1 + include/net/ipv6.h | 4 +--- net/dccp/ipv6.c | 2 +- net/ipv4/ping.c | 2 +- net/ipv6/datagram.c | 6 ++---- net/ipv6/ipv6_sockglue.c | 17 ++++++++--------- net/ipv6/raw.c | 10 +++++----- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 6 +++--- net/sctp/ipv6.c | 4 +--- 11 files changed, 25 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 57d563f1d4b1..53f4f1b97a78 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,8 +243,7 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u16 recverr:1, - sndflow:1, + __u16 sndflow:1, repflow:1, pmtudisc:3, padding:1, /* 1 bit hole */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index ac75324e9e1e..3b79bc759ff4 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -274,6 +274,7 @@ enum { INET_FLAGS_AUTOFLOWLABEL_SET = 23, INET_FLAGS_AUTOFLOWLABEL = 24, INET_FLAGS_DONTFRAG = 25, + INET_FLAGS_RECVERR6 = 26, }; /* cmsg flags for inet */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5a1f2993680d..bd115980809f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1303,9 +1303,7 @@ static inline int ip6_sock_set_v6only(struct sock *sk) static inline void ip6_sock_set_recverr(struct sock *sk) { - lock_sock(sk); - inet6_sk(sk)->recverr = true; - release_sock(sk); + inet6_set_bit(RECVERR6, sk); } static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 83617a16b98e..e6c3d84c2b9e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -185,7 +185,7 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 75e0aee35eb7..bc01ad5fc01a 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -581,7 +581,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) * 4.1.3.3. */ if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) || - (family == AF_INET6 && !inet6_sk(sk)->recverr)) { + (family == AF_INET6 && !inet6_test_bit(RECVERR6, sk))) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e81892814935..74673a5eff31 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -305,11 +305,10 @@ static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb, void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { - struct ipv6_pinfo *np = inet6_sk(sk); struct icmp6hdr *icmph = icmp6_hdr(skb); struct sock_exterr_skb *serr; - if (!np->recverr) + if (!inet6_test_bit(RECVERR6, sk)) return; skb = skb_clone(skb, GFP_ATOMIC); @@ -344,12 +343,11 @@ EXPORT_SYMBOL_GPL(ipv6_icmp_error); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) { - const struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; struct ipv6hdr *iph; struct sk_buff *skb; - if (!np->recverr) + if (!inet6_test_bit(RECVERR6, sk)) return; skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 33dd4dd872e6..ec10b45c49c1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -481,6 +481,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_DONTFRAG: inet6_assign_bit(DONTFRAG, sk, valbool); return 0; + case IPV6_RECVERR: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(RECVERR6, sk, valbool); + if (!val) + skb_errqueue_purge(&sk->sk_error_queue); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -943,14 +950,6 @@ done: np->pmtudisc = val; retv = 0; break; - case IPV6_RECVERR: - if (optlen < sizeof(int)) - goto e_inval; - np->recverr = valbool; - if (!val) - skb_errqueue_purge(&sk->sk_error_queue); - retv = 0; - break; case IPV6_FLOWINFO_SEND: if (optlen < sizeof(int)) goto e_inval; @@ -1380,7 +1379,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_RECVERR: - val = np->recverr; + val = inet6_test_bit(RECVERR6, sk); break; case IPV6_FLOWINFO_SEND: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index cc9673c1809f..71f6bdccfa1f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -291,6 +291,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + bool recverr = inet6_test_bit(RECVERR6, sk); struct ipv6_pinfo *np = inet6_sk(sk); int err; int harderr; @@ -300,7 +301,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, 2. Socket is connected (otherwise the error indication is useless without recverr and error is hard. */ - if (!np->recverr && sk->sk_state != TCP_ESTABLISHED) + if (!recverr && sk->sk_state != TCP_ESTABLISHED) return; harderr = icmpv6_err_convert(type, code, &err); @@ -312,14 +313,14 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, ip6_sk_redirect(skb, sk); return; } - if (np->recverr) { + if (recverr) { u8 *payload = skb->data; if (!inet_test_bit(HDRINCL, sk)) payload += offset; ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); } - if (np->recverr || harderr) { + if (recverr || harderr) { sk->sk_err = err; sk_error_report(sk); } @@ -587,7 +588,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags, const struct sockcm_cookie *sockc) { - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *iph; struct sk_buff *skb; @@ -668,7 +668,7 @@ out: error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); error_check: - if (err == -ENOBUFS && !np->recverr) + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) err = 0; return err; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 54db5fab318b..b5954b136b57 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -508,7 +508,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tcp_ld_RTO_revert(sk, seq); } - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e4301500741a..90e873689b88 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -619,7 +619,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if (!np->recverr) { + if (!inet6_test_bit(RECVERR6, sk)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { @@ -1283,7 +1283,7 @@ csum_partial: send: err = ip6_send_skb(skb); if (err) { - if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; @@ -1608,7 +1608,7 @@ do_append_data: up->pending = 0; if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; + err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0; release_sock(sk); out: diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 43f2731bf590..42b5b853ea01 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -128,7 +128,6 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, { struct sctp_association *asoc = t->asoc; struct sock *sk = asoc->base.sk; - struct ipv6_pinfo *np; int err = 0; switch (type) { @@ -149,9 +148,8 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, break; } - np = inet6_sk(sk); icmpv6_err_convert(type, code, &err); - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { -- cgit v1.2.3 From 3cccda8db2cf2f2a224d55d5b6e2251d478c58ca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:09 +0000 Subject: ipv6: move np->repflow to atomic flags Move np->repflow to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - include/net/inet_sock.h | 1 + net/dccp/ipv6.c | 2 +- net/ipv6/af_inet6.c | 3 ++- net/ipv6/ip6_flowlabel.c | 8 ++++---- net/ipv6/tcp_ipv6.c | 14 ++++++-------- 6 files changed, 14 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 53f4f1b97a78..e62413371ea4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -244,7 +244,6 @@ struct ipv6_pinfo { /* sockopt flags */ __u16 sndflow:1, - repflow:1, pmtudisc:3, padding:1, /* 1 bit hole */ srcprefs:3, /* 001: prefer temporary address diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 3b79bc759ff4..5d61c7dc6577 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -275,6 +275,7 @@ enum { INET_FLAGS_AUTOFLOWLABEL = 24, INET_FLAGS_DONTFRAG = 25, INET_FLAGS_RECVERR6 = 26, + INET_FLAGS_REPFLOW = 27, }; /* cmsg flags for inet */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index e6c3d84c2b9e..d7e63eea705d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -679,7 +679,7 @@ ipv6_pktoptions: WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &DCCP_SKB_CB(opt_skb)->header.h6)) { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 372fb7b9112c..48737363377f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -220,7 +220,8 @@ lookup_protocol: inet6_set_bit(MC6_LOOP, sk); inet6_set_bit(MC6_ALL, sk); np->pmtudisc = IPV6_PMTUDISC_WANT; - np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED; + inet6_assign_bit(REPFLOW, sk, net->ipv6.sysctl.flowlabel_reflect & + FLOWLABEL_REFLECT_ESTABLISHED); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b3ca4beb4405..eca07e10e21f 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -513,7 +513,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, return 0; } - if (np->repflow) { + if (inet6_test_bit(REPFLOW, sk)) { freq->flr_label = np->flow_label; return 0; } @@ -551,10 +551,10 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) if (freq->flr_flags & IPV6_FL_F_REFLECT) { if (sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; - if (!np->repflow) + if (!inet6_test_bit(REPFLOW, sk)) return -ESRCH; np->flow_label = 0; - np->repflow = 0; + inet6_clear_bit(REPFLOW, sk); return 0; } @@ -626,7 +626,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, if (sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; - np->repflow = 1; + inet6_set_bit(REPFLOW, sk); return 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b5954b136b57..201caf88bb99 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -548,7 +548,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; - if (np->repflow && ireq->pktopts) + if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? @@ -797,7 +797,7 @@ static void tcp_v6_init_req(struct request_sock *req, (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || - np->rxopt.bits.rxohlim || np->repflow)) { + np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { refcount_inc(&skb->users); ireq->pktopts = skb; } @@ -1055,10 +1055,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) { oif = sk->sk_bound_dev_if; if (sk_fullsock(sk)) { - const struct ipv6_pinfo *np = tcp_inet6_sk(sk); - trace_tcp_send_reset(sk, skb); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) label = ip6_flowlabel(ipv6h); priority = sk->sk_priority; txhash = sk->sk_txhash; @@ -1247,7 +1245,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->mcast_oif = inet_iif(skb); newnp->mcast_hops = ip_hdr(skb)->ttl; newnp->rcv_flowinfo = 0; - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = 0; /* @@ -1320,7 +1318,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); /* Set ToS of the new socket based upon the value of incoming SYN. @@ -1546,7 +1544,7 @@ ipv6_pktoptions: ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { tcp_v6_restore_cb(opt_skb); -- cgit v1.2.3 From 83cd5eb654b320c1972254f243531f3f3cebcccf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:10 +0000 Subject: ipv6: lockless IPV6_ROUTER_ALERT_ISOLATE implementation Reads from np->rtalert_isolate are racy. Move this flag to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +-- include/net/inet_sock.h | 1 + net/ipv6/ip6_output.c | 3 +-- net/ipv6/ipv6_sockglue.c | 13 ++++++------- 4 files changed, 9 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e62413371ea4..f288a35f157f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -246,11 +246,10 @@ struct ipv6_pinfo { __u16 sndflow:1, pmtudisc:3, padding:1, /* 1 bit hole */ - srcprefs:3, /* 001: prefer temporary address + srcprefs:3; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ - rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 5d61c7dc6577..befee0f66c05 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -276,6 +276,7 @@ enum { INET_FLAGS_DONTFRAG = 25, INET_FLAGS_RECVERR6 = 26, INET_FLAGS_REPFLOW = 27, + INET_FLAGS_RTALERT_ISOLATE = 28, }; /* cmsg flags for inet */ diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8851fe5d45a0..f87d8491d7e2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -368,9 +368,8 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) if (sk && ra->sel == sel && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { - struct ipv6_pinfo *np = inet6_sk(sk); - if (np && np->rtalert_isolate && + if (inet6_test_bit(RTALERT_ISOLATE, sk) && !net_eq(sock_net(sk), dev_net(skb->dev))) { continue; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ec10b45c49c1..c22a492e0536 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -488,6 +488,11 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (!val) skb_errqueue_purge(&sk->sk_error_queue); return 0; + case IPV6_ROUTER_ALERT_ISOLATE: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(RTALERT_ISOLATE, sk, valbool); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -936,12 +941,6 @@ done: goto e_inval; retv = ip6_ra_control(sk, val); break; - case IPV6_ROUTER_ALERT_ISOLATE: - if (optlen < sizeof(int)) - goto e_inval; - np->rtalert_isolate = valbool; - retv = 0; - break; case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; @@ -1452,7 +1451,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_ROUTER_ALERT_ISOLATE: - val = np->rtalert_isolate; + val = inet6_test_bit(RTALERT_ISOLATE, sk); break; case IPV6_RECVERR_RFC4884: -- cgit v1.2.3 From 6b724bc4300b431443f3b99520994a5aece347cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:11 +0000 Subject: ipv6: lockless IPV6_MTU_DISCOVER implementation Most np->pmtudisc reads are racy. Move this 3bit field on a full byte, add annotations and make IPV6_MTU_DISCOVER setsockopt() lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 ++--- include/net/ip6_route.h | 14 +++++++++----- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/ipv6_sockglue.c | 17 ++++++++--------- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 7 files changed, 24 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f288a35f157f..10f521a6a9c8 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,13 +243,12 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u16 sndflow:1, - pmtudisc:3, - padding:1, /* 1 bit hole */ + __u8 sndflow:1, srcprefs:3; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ + __u8 pmtudisc; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index b32539bb0fb0..b1ea49900b4a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -266,7 +266,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) const struct dst_entry *dst = skb_dst(skb); unsigned int mtu; - if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { + if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) { mtu = READ_ONCE(dst->dev->mtu); mtu -= lwtunnel_headroom(dst->lwtstate, mtu); } else { @@ -277,14 +277,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) static inline bool ip6_sk_accept_pmtu(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE && - inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc); + + return pmtudisc != IPV6_PMTUDISC_INTERFACE && + pmtudisc != IPV6_PMTUDISC_OMIT; } static inline bool ip6_sk_ignore_df(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO || - inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc); + + return pmtudisc < IPV6_PMTUDISC_DO || + pmtudisc == IPV6_PMTUDISC_OMIT; } static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f87d8491d7e2..7e5d9eeb990f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1436,10 +1436,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); frag_size = READ_ONCE(np->frag_size); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index c22a492e0536..85ea42644dcb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -493,6 +493,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet6_assign_bit(RTALERT_ISOLATE, sk, valbool); return 0; + case IPV6_MTU_DISCOVER: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) + return -EINVAL; + WRITE_ONCE(np->pmtudisc, val); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -941,14 +948,6 @@ done: goto e_inval; retv = ip6_ra_control(sk, val); break; - case IPV6_MTU_DISCOVER: - if (optlen < sizeof(int)) - goto e_inval; - if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) - goto e_inval; - np->pmtudisc = val; - retv = 0; - break; case IPV6_FLOWINFO_SEND: if (optlen < sizeof(int)) goto e_inval; @@ -1374,7 +1373,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MTU_DISCOVER: - val = np->pmtudisc; + val = READ_ONCE(np->pmtudisc); break; case IPV6_RECVERR: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 71f6bdccfa1f..47372cceb98f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -307,7 +307,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, harderr = icmpv6_err_convert(type, code, &err); if (type == ICMPV6_PKT_TOOBIG) { ip6_sk_update_pmtu(skb, sk, info); - harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); + harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO); } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 90e873689b88..c17e19fece1b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -598,7 +598,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!ip6_sk_accept_pmtu(sk)) goto out; ip6_sk_update_pmtu(skb, sk, info); - if (np->pmtudisc != IPV6_PMTUDISC_DONT) + if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT) harderr = 1; } if (type == NDISC_REDIRECT) { diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index df1b33b61059..5820a8156c47 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1341,7 +1341,7 @@ static void set_mcast_pmtudisc(struct sock *sk, int val) struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MTU_DISCOVER */ - np->pmtudisc = val; + WRITE_ONCE(np->pmtudisc, val); } #endif release_sock(sk); -- cgit v1.2.3 From 859f8b265fc2a11af0fb0c52b4087e0409250592 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:12 +0000 Subject: ipv6: lockless IPV6_FLOWINFO_SEND implementation np->sndflow reads are racy. Use one bit ftom atomic inet->inet_flags instead, IPV6_FLOWINFO_SEND setsockopt() can be lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +-- include/net/inet_sock.h | 1 + net/dccp/ipv6.c | 2 +- net/ipv4/ping.c | 3 +-- net/ipv6/af_inet6.c | 2 +- net/ipv6/datagram.c | 7 ++++--- net/ipv6/ipv6_sockglue.c | 13 ++++++------- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 2 +- net/l2tp/l2tp_ip6.c | 4 ++-- net/sctp/ipv6.c | 3 ++- 13 files changed, 23 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 10f521a6a9c8..09253825c99c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,8 +243,7 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u8 sndflow:1, - srcprefs:3; /* 001: prefer temporary address + __u8 srcprefs:3; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index befee0f66c05..98e11958cdff 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -277,6 +277,7 @@ enum { INET_FLAGS_RECVERR6 = 26, INET_FLAGS_REPFLOW = 27, INET_FLAGS_RTALERT_ISOLATE = 28, + INET_FLAGS_SNDFLOW = 29, }; /* cmsg flags for inet */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d7e63eea705d..4803f0614848 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -844,7 +844,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, memset(&fl6, 0, sizeof(fl6)); - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index bc01ad5fc01a..4dd809b7b188 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -899,7 +899,6 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *ip6 = ipv6_hdr(skb); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -908,7 +907,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, sin6->sin6_port = 0; sin6->sin6_addr = ip6->saddr; sin6->sin6_flowinfo = 0; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin6->sin6_flowinfo = ip6_flowinfo(ip6); sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 48737363377f..c6ad0d6e99b5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -537,7 +537,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin6_port = inet->inet_dport; sin->sin6_addr = sk->sk_v6_daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = np->flow_label; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, CGROUP_INET6_GETPEERNAME); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 74673a5eff31..cc6a502db39d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -80,7 +80,8 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) struct flowi6 fl6; int err = 0; - if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) { + if (inet6_test_bit(SNDFLOW, sk) && + (np->flow_label & IPV6_FLOWLABEL_MASK)) { flowlabel = fl6_sock_lookup(sk, np->flow_label); if (IS_ERR(flowlabel)) return -EINVAL; @@ -163,7 +164,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; if (ipv6_addr_any(&usin->sin6_addr)) { @@ -491,7 +492,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset), struct ipv6hdr, daddr); sin->sin6_addr = ip6h->daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = ip6_flowinfo(ip6h); sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 85ea42644dcb..e9dc6f881bb9 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -500,6 +500,11 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; WRITE_ONCE(np->pmtudisc, val); return 0; + case IPV6_FLOWINFO_SEND: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(SNDFLOW, sk, valbool); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -948,12 +953,6 @@ done: goto e_inval; retv = ip6_ra_control(sk, val); break; - case IPV6_FLOWINFO_SEND: - if (optlen < sizeof(int)) - goto e_inval; - np->sndflow = valbool; - retv = 0; - break; case IPV6_FLOWLABEL_MGR: retv = ipv6_flowlabel_opt(sk, optval, optlen); break; @@ -1381,7 +1380,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_FLOWINFO_SEND: - val = np->sndflow; + val = inet6_test_bit(SNDFLOW, sk); break; case IPV6_FLOWLABEL_MGR: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 4444b61eb23b..e8fb0d275cc2 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -89,7 +89,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EAFNOSUPPORT; } daddr = &(u->sin6_addr); - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) fl6.flowlabel = u->sin6_flowinfo & IPV6_FLOWINFO_MASK; if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr))) oif = u->sin6_scope_id; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 47372cceb98f..a2aa54a2baae 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -795,7 +795,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EINVAL; daddr = &sin6->sin6_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 201caf88bb99..94afb8d0f2d0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -163,7 +163,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, memset(&fl6, 0, sizeof(fl6)); - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c17e19fece1b..5e9312eefed0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1429,7 +1429,7 @@ do_udp_sendmsg: fl6->fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 40af2431e73a..44cfb72bbd18 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -431,7 +431,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, return -ENOTCONN; lsa->l2tp_conn_id = lsk->peer_conn_id; lsa->l2tp_addr = sk->sk_v6_daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) lsa->l2tp_flowinfo = np->flow_label; } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) @@ -529,7 +529,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EAFNOSUPPORT; daddr = &lsa->l2tp_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK; if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 42b5b853ea01..5c0ed5909d85 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -296,7 +296,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK) fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK); - if (np->sndflow && (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) { + if (inet6_test_bit(SNDFLOW, sk) && + (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); -- cgit v1.2.3 From 4fa5ce3e3a10da4ecc438a548fc701dec5f28758 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Sep 2023 12:36:29 +0300 Subject: tcp: indent an if statement Indent this if statement one tab. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 984ab4a0421e..d7d64682b068 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -394,7 +394,7 @@ static void tcp_probe_timer(struct sock *sk) if (user_timeout && (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >= msecs_to_jiffies(user_timeout)) - goto abort; + goto abort; } max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { -- cgit v1.2.3 From d609f3d228a8efe991f44f11f24146e2a5209755 Mon Sep 17 00:00:00 2001 From: Tirthendu Sarkar Date: Thu, 7 Sep 2023 09:20:32 +0530 Subject: xsk: add multi-buffer support for sockets sharing umem Userspace applications indicate their multi-buffer capability to xsk using XSK_USE_SG socket bind flag. For sockets using shared umem the bind flag may contain XSK_USE_SG only for the first socket. For any subsequent socket the only option supported is XDP_SHARED_UMEM. Add option XDP_UMEM_SG_FLAG in umem config flags to store the multi-buffer handling capability when indicated by XSK_USE_SG option in bing flag by the first socket. Use this to derive multi-buffer capability for subsequent sockets in xsk core. Signed-off-by: Tirthendu Sarkar Fixes: 81470b5c3c66 ("xsk: introduce XSK_USE_SG bind flag for xsk socket") Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20230907035032.2627879-1-tirthendu.sarkar@intel.com Signed-off-by: Alexei Starovoitov --- include/net/xdp_sock.h | 2 ++ net/xdp/xsk.c | 2 +- net/xdp/xsk_buff_pool.c | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 1617af380162..69b472604b86 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -14,6 +14,8 @@ #include #include +#define XDP_UMEM_SG_FLAG (1 << 1) + struct net_device; struct xsk_queue; struct xdp_buff; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 55f8b9b0e06d..7482d0aca504 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1228,7 +1228,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->dev = dev; xs->zc = xs->umem->zc; - xs->sg = !!(flags & XDP_USE_SG); + xs->sg = !!(xs->umem->flags & XDP_UMEM_SG_FLAG); xs->queue_id = qid; xp_add_xsk(xs->pool, xs); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index b3f7b310811e..49cb9f9a09be 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -170,6 +170,9 @@ int xp_assign_dev(struct xsk_buff_pool *pool, if (err) return err; + if (flags & XDP_USE_SG) + pool->umem->flags |= XDP_UMEM_SG_FLAG; + if (flags & XDP_USE_NEED_WAKEUP) pool->uses_need_wakeup = true; /* Tx needs to be explicitly woken up the first time. Also -- cgit v1.2.3 From fc45c5b642dbcac3bb10f4f904e4b863233e5369 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 13 Sep 2023 10:13:48 -0700 Subject: bpf: make it easier to add new metadata kfunc No functional changes. Instead of having hand-crafted code in bpf_dev_bound_resolve_kfunc, move kfunc <> xmo handler relationship into XDP_METADATA_KFUNC_xxx. This way, any time new kfunc is added, we don't have to touch bpf_dev_bound_resolve_kfunc. Also document XDP_METADATA_KFUNC_xxx arguments since we now have more than two and it might be confusing what is what. Cc: netdev@vger.kernel.org Cc: Willem de Bruijn Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230913171350.369987-2-sdf@google.com Signed-off-by: Martin KaFai Lau --- include/net/xdp.h | 16 ++++++++++++---- kernel/bpf/offload.c | 9 +++++---- net/core/xdp.c | 4 ++-- 3 files changed, 19 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/xdp.h b/include/net/xdp.h index de08c8e0d134..d59e12f8f311 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -383,14 +383,22 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE +/* Define the relationship between xdp-rx-metadata kfunc and + * various other entities: + * - xdp_rx_metadata enum + * - kfunc name + * - xdp_metadata_ops field + */ #define XDP_METADATA_KFUNC_xxx \ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \ - bpf_xdp_metadata_rx_timestamp) \ + bpf_xdp_metadata_rx_timestamp, \ + xmo_rx_timestamp) \ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \ - bpf_xdp_metadata_rx_hash) \ + bpf_xdp_metadata_rx_hash, \ + xmo_rx_hash) \ -enum { -#define XDP_METADATA_KFUNC(name, _) name, +enum xdp_rx_metadata { +#define XDP_METADATA_KFUNC(name, _, __) name, XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC MAX_XDP_METADATA_KFUNC, diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 3e4f2ec1af06..6aa6de8d715d 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -845,10 +845,11 @@ void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id) if (!ops) goto out; - if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_TIMESTAMP)) - p = ops->xmo_rx_timestamp; - else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH)) - p = ops->xmo_rx_hash; +#define XDP_METADATA_KFUNC(name, _, xmo) \ + if (func_id == bpf_xdp_metadata_kfunc_id(name)) p = ops->xmo; + XDP_METADATA_KFUNC_xxx +#undef XDP_METADATA_KFUNC + out: up_read(&bpf_devs_lock); diff --git a/net/core/xdp.c b/net/core/xdp.c index a70670fe9a2d..bab563b2f812 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -741,7 +741,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, __diag_pop(); BTF_SET8_START(xdp_metadata_kfunc_ids) -#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) +#define XDP_METADATA_KFUNC(_, name, __) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC BTF_SET8_END(xdp_metadata_kfunc_ids) @@ -752,7 +752,7 @@ static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = { }; BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted) -#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str) +#define XDP_METADATA_KFUNC(name, str, _) BTF_ID(func, str) XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC -- cgit v1.2.3 From a9c2a608549bb1a2363d289d63907640afcf22af Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 13 Sep 2023 10:13:49 -0700 Subject: bpf: expose information about supported xdp metadata kfunc Add new xdp-rx-metadata-features member to netdev netlink which exports a bitmask of supported kfuncs. Most of the patch is autogenerated (headers), the only relevant part is netdev.yaml and the changes in netdev-genl.c to marshal into netlink. Example output on veth: $ ip link add veth0 type veth peer name veth1 # ifndex == 12 $ ./tools/net/ynl/samples/netdev 12 Select ifc ($ifindex; or 0 = dump; or -2 ntf check): 12 veth1[12] xdp-features (23): basic redirect rx-sg xdp-rx-metadata-features (3): timestamp hash xdp-zc-max-segs=0 Cc: netdev@vger.kernel.org Cc: Willem de Bruijn Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230913171350.369987-3-sdf@google.com Signed-off-by: Martin KaFai Lau --- Documentation/netlink/specs/netdev.yaml | 21 +++++++++++++++++++++ Documentation/networking/xdp-rx-metadata.rst | 7 +++++++ include/net/xdp.h | 5 ++++- include/uapi/linux/netdev.h | 16 ++++++++++++++++ kernel/bpf/offload.c | 2 +- net/core/netdev-genl.c | 12 +++++++++++- net/core/xdp.c | 4 ++-- tools/include/uapi/linux/netdev.h | 16 ++++++++++++++++ 8 files changed, 78 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 1c7284fd535b..c46fcc78fc04 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -42,6 +42,19 @@ definitions: doc: This feature informs if netdev implements non-linear XDP buffer support in ndo_xdp_xmit callback. + - + type: flags + name: xdp-rx-metadata + render-max: true + entries: + - + name: timestamp + doc: + Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp(). + - + name: hash + doc: + Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash(). attribute-sets: - @@ -68,6 +81,13 @@ attribute-sets: type: u32 checks: min: 1 + - + name: xdp-rx-metadata-features + doc: Bitmask of supported XDP receive metadata features. + See Documentation/networking/xdp-rx-metadata.rst for more details. + type: u64 + enum: xdp-rx-metadata + enum-as-flags: true operations: list: @@ -84,6 +104,7 @@ operations: - ifindex - xdp-features - xdp-zc-max-segs + - xdp-rx-metadata-features dump: reply: *dev-all - diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst index 25ce72af81c2..205696780b78 100644 --- a/Documentation/networking/xdp-rx-metadata.rst +++ b/Documentation/networking/xdp-rx-metadata.rst @@ -105,6 +105,13 @@ bpf_tail_call Adding programs that access metadata kfuncs to the ``BPF_MAP_TYPE_PROG_ARRAY`` is currently not supported. +Supported Devices +================= + +It is possible to query which kfunc the particular netdev implements via +netlink. See ``xdp-rx-metadata-features`` attribute set in +``Documentation/netlink/specs/netdev.yaml``. + Example ======= diff --git a/include/net/xdp.h b/include/net/xdp.h index d59e12f8f311..349c36fb5fd8 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -386,19 +386,22 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, /* Define the relationship between xdp-rx-metadata kfunc and * various other entities: * - xdp_rx_metadata enum + * - netdev netlink enum (Documentation/netlink/specs/netdev.yaml) * - kfunc name * - xdp_metadata_ops field */ #define XDP_METADATA_KFUNC_xxx \ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \ + NETDEV_XDP_RX_METADATA_TIMESTAMP, \ bpf_xdp_metadata_rx_timestamp, \ xmo_rx_timestamp) \ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \ + NETDEV_XDP_RX_METADATA_HASH, \ bpf_xdp_metadata_rx_hash, \ xmo_rx_hash) \ enum xdp_rx_metadata { -#define XDP_METADATA_KFUNC(name, _, __) name, +#define XDP_METADATA_KFUNC(name, _, __, ___) name, XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC MAX_XDP_METADATA_KFUNC, diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index c1634b95c223..2943a151d4f1 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -38,11 +38,27 @@ enum netdev_xdp_act { NETDEV_XDP_ACT_MASK = 127, }; +/** + * enum netdev_xdp_rx_metadata + * @NETDEV_XDP_RX_METADATA_TIMESTAMP: Device is capable of exposing receive HW + * timestamp via bpf_xdp_metadata_rx_timestamp(). + * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet + * hash via bpf_xdp_metadata_rx_hash(). + */ +enum netdev_xdp_rx_metadata { + NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, + NETDEV_XDP_RX_METADATA_HASH = 2, + + /* private: */ + NETDEV_XDP_RX_METADATA_MASK = 3, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, NETDEV_A_DEV_XDP_FEATURES, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, + NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, __NETDEV_A_DEV_MAX, NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 6aa6de8d715d..e7a1752b5a09 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -845,7 +845,7 @@ void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id) if (!ops) goto out; -#define XDP_METADATA_KFUNC(name, _, xmo) \ +#define XDP_METADATA_KFUNC(name, _, __, xmo) \ if (func_id == bpf_xdp_metadata_kfunc_id(name)) p = ops->xmo; XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index c1aea8b756b6..fe61f85bcf33 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "netdev-genl-gen.h" @@ -12,15 +13,24 @@ static int netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { + u64 xdp_rx_meta = 0; void *hdr; hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; +#define XDP_METADATA_KFUNC(_, flag, __, xmo) \ + if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \ + xdp_rx_meta |= flag; +XDP_METADATA_KFUNC_xxx +#undef XDP_METADATA_KFUNC + if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, - netdev->xdp_features, NETDEV_A_DEV_PAD)) { + netdev->xdp_features, NETDEV_A_DEV_PAD) || + nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, + xdp_rx_meta, NETDEV_A_DEV_PAD)) { genlmsg_cancel(rsp, hdr); return -EINVAL; } diff --git a/net/core/xdp.c b/net/core/xdp.c index bab563b2f812..df4789ab512d 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -741,7 +741,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, __diag_pop(); BTF_SET8_START(xdp_metadata_kfunc_ids) -#define XDP_METADATA_KFUNC(_, name, __) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) +#define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC BTF_SET8_END(xdp_metadata_kfunc_ids) @@ -752,7 +752,7 @@ static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = { }; BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted) -#define XDP_METADATA_KFUNC(name, str, _) BTF_ID(func, str) +#define XDP_METADATA_KFUNC(name, _, str, __) BTF_ID(func, str) XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index c1634b95c223..2943a151d4f1 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -38,11 +38,27 @@ enum netdev_xdp_act { NETDEV_XDP_ACT_MASK = 127, }; +/** + * enum netdev_xdp_rx_metadata + * @NETDEV_XDP_RX_METADATA_TIMESTAMP: Device is capable of exposing receive HW + * timestamp via bpf_xdp_metadata_rx_timestamp(). + * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet + * hash via bpf_xdp_metadata_rx_hash(). + */ +enum netdev_xdp_rx_metadata { + NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, + NETDEV_XDP_RX_METADATA_HASH = 2, + + /* private: */ + NETDEV_XDP_RX_METADATA_MASK = 3, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, NETDEV_A_DEV_XDP_FEATURES, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, + NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, __NETDEV_A_DEV_MAX, NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) -- cgit v1.2.3 From 41862d12e77f78b4ecb59b028bf44de92991bda2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Sep 2023 12:58:35 +0000 Subject: net: use indirect call helpers for sk->sk_prot->release_cb() When adding sk->sk_prot->release_cb() call from __sk_flush_backlog() Paolo suggested using indirect call helpers to take care of CONFIG_RETPOLINE=y case. It turns out Google had such mitigation for years in release_sock(), it is time to make this public :) Suggested-by: Paolo Abeni Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 213a62ac13f2..a5995750c5c5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3003,7 +3003,9 @@ void __sk_flush_backlog(struct sock *sk) __release_sock(sk); if (sk->sk_prot->release_cb) - sk->sk_prot->release_cb(sk); + INDIRECT_CALL_INET_1(sk->sk_prot->release_cb, + tcp_release_cb, sk); + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL_GPL(__sk_flush_backlog); @@ -3523,7 +3525,8 @@ void release_sock(struct sock *sk) __release_sock(sk); if (sk->sk_prot->release_cb) - sk->sk_prot->release_cb(sk); + INDIRECT_CALL_INET_1(sk->sk_prot->release_cb, + tcp_release_cb, sk); sock_release_ownership(sk); if (waitqueue_active(&sk->sk_lock.wq)) -- cgit v1.2.3 From c123e0d30bdb54a0f91ec348827eef76877165d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Sep 2023 13:48:41 +0000 Subject: net: add truesize debug checks in skb_{add|coalesce}_rx_frag() It can be time consuming to track driver bugs, that might be detected too late from this confusing warning in skb_try_coalesce() WARN_ON_ONCE(delta < len); Add sanity check in skb_add_rx_frag() and skb_coalesce_rx_frag() to better track bug origin for CONFIG_DEBUG_NET=y builds. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4eaf7ed0d1f4..2198979470ec 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -847,6 +847,8 @@ EXPORT_SYMBOL(__napi_alloc_skb); void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, unsigned int truesize) { + DEBUG_NET_WARN_ON_ONCE(size > truesize); + skb_fill_page_desc(skb, i, page, off, size); skb->len += size; skb->data_len += size; @@ -859,6 +861,8 @@ void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + DEBUG_NET_WARN_ON_ONCE(size > truesize); + skb_frag_size_add(frag, size); skb->len += size; skb->data_len += size; -- cgit v1.2.3 From e326578a21414738de45f77badd332fb00bd0f58 Mon Sep 17 00:00:00 2001 From: Aananth V Date: Thu, 14 Sep 2023 14:36:20 +0000 Subject: tcp: call tcp_try_undo_recovery when an RTOd TFO SYNACK is ACKed For passive TCP Fast Open sockets that had SYN/ACK timeout and did not send more data in SYN_RECV, upon receiving the final ACK in 3WHS, the congestion state may awkwardly stay in CA_Loss mode unless the CA state was undone due to TCP timestamp checks. However, if tcp_rcv_synrecv_state_fastopen() decides not to undo, then we should enter CA_Open, because at that point we have received an ACK covering the retransmitted SYNACKs. Currently, the icsk_ca_state is only set to CA_Open after we receive an ACK for a data-packet. This is because tcp_ack does not call tcp_fastretrans_alert (and tcp_process_loss) if !prior_packets Note that tcp_process_loss() calls tcp_try_undo_recovery(), so having tcp_rcv_synrecv_state_fastopen() decide that if we're in CA_Loss we should call tcp_try_undo_recovery() is consistent with that, and low risk. Fixes: dad8cea7add9 ("tcp: fix TFO SYNACK undo to avoid double-timestamp-undo") Signed-off-by: Aananth V Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 41b471748437..8d2c91703158 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6444,22 +6444,23 @@ reset_and_undo: static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); struct request_sock *req; /* If we are still handling the SYNACK RTO, see if timestamp ECR allows * undo. If peer SACKs triggered fast recovery, we can't undo here. */ - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) - tcp_try_undo_loss(sk, false); + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss && !tp->packets_out) + tcp_try_undo_recovery(sk); /* Reset rtx states to prevent spurious retransmits_timed_out() */ - tcp_sk(sk)->retrans_stamp = 0; + tp->retrans_stamp = 0; inet_csk(sk)->icsk_retransmits = 0; /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1, * we no longer need req so release it. */ - req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, + req = rcu_dereference_protected(tp->fastopen_rsk, lockdep_sock_is_held(sk)); reqsk_fastopen_remove(sk, req, false); -- cgit v1.2.3 From 3868ab0f192581eff978501a05f3dc2e01541d77 Mon Sep 17 00:00:00 2001 From: Aananth V Date: Thu, 14 Sep 2023 14:36:21 +0000 Subject: tcp: new TCP_INFO stats for RTO events The 2023 SIGCOMM paper "Improving Network Availability with Protective ReRoute" has indicated Linux TCP's RTO-triggered txhash rehashing can effectively reduce application disruption during outages. To better measure the efficacy of this feature, this patch adds three more detailed stats during RTO recovery and exports via TCP_INFO. Applications and monitoring systems can leverage this data to measure the network path diversity and end-to-end repair latency during network outages to improve their network infrastructure. The following counters are added to tcp_sock in order to track RTO events over the lifetime of a TCP socket. 1. u16 total_rto - Counts the total number of RTO timeouts. 2. u16 total_rto_recoveries - Counts the total number of RTO recoveries. 3. u32 total_rto_time - Counts the total time spent (ms) in RTO recoveries. (time spent in CA_Loss and CA_Recovery states) To compute total_rto_time, we add a new u32 rto_stamp field to tcp_sock. rto_stamp records the start timestamp (ms) of the last RTO recovery (CA_Loss). Corresponding fields are also added to the tcp_info struct. Signed-off-by: Aananth V Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 8 ++++++++ include/uapi/linux/tcp.h | 12 ++++++++++++ net/ipv4/tcp.c | 9 +++++++++ net/ipv4/tcp_input.c | 15 +++++++++++++++ net/ipv4/tcp_minisocks.c | 4 ++++ net/ipv4/tcp_timer.c | 17 +++++++++++++++-- 6 files changed, 63 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 44d946161d4a..e15452df9804 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -377,6 +377,14 @@ struct tcp_sock { * Total data bytes retransmitted */ u32 total_retrans; /* Total retransmits for entire connection */ + u32 rto_stamp; /* Start time (ms) of last CA_Loss recovery */ + u16 total_rto; /* Total number of RTO timeouts, including + * SYN/SYN-ACK and recurring timeouts. + */ + u16 total_rto_recoveries; /* Total number of RTO recoveries, + * including any unfinished recovery. + */ + u32 total_rto_time; /* ms spent in (completed) RTO recoveries. */ u32 urg_seq; /* Seq of received urgent pointer */ unsigned int keepalive_time; /* time before keep alive takes place */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 879eeb0a084b..d1d08da6331a 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -289,6 +289,18 @@ struct tcp_info { */ __u32 tcpi_rehash; /* PLB or timeout triggered rehash attempts */ + + __u16 tcpi_total_rto; /* Total number of RTO timeouts, including + * SYN/SYN-ACK and recurring timeouts. + */ + __u16 tcpi_total_rto_recoveries; /* Total number of RTO + * recoveries, including any + * unfinished recovery. + */ + __u32 tcpi_total_rto_time; /* Total time spent in RTO recoveries + * in milliseconds, including any + * unfinished recovery. + */ }; /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0c3040a63ebd..69b8d7073708 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3818,6 +3818,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rcv_wnd = tp->rcv_wnd; info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash; info->tcpi_fastopen_client_fail = tp->fastopen_client_fail; + + info->tcpi_total_rto = tp->total_rto; + info->tcpi_total_rto_recoveries = tp->total_rto_recoveries; + info->tcpi_total_rto_time = tp->total_rto_time; + if (tp->rto_stamp) { + info->tcpi_total_rto_time += tcp_time_stamp_raw() - + tp->rto_stamp; + } + unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8d2c91703158..584825ddd0a0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2088,6 +2088,10 @@ void tcp_clear_retrans(struct tcp_sock *tp) tp->undo_marker = 0; tp->undo_retrans = -1; tp->sacked_out = 0; + tp->rto_stamp = 0; + tp->total_rto = 0; + tp->total_rto_recoveries = 0; + tp->total_rto_time = 0; } static inline void tcp_init_undo(struct tcp_sock *tp) @@ -2825,6 +2829,14 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack) tcp_set_ca_state(sk, TCP_CA_Recovery); } +static void tcp_update_rto_time(struct tcp_sock *tp) +{ + if (tp->rto_stamp) { + tp->total_rto_time += tcp_time_stamp(tp) - tp->rto_stamp; + tp->rto_stamp = 0; + } +} + /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are * recovered or spurious. Otherwise retransmits more on partial ACKs. */ @@ -3029,6 +3041,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, break; case TCP_CA_Loss: tcp_process_loss(sk, flag, num_dupack, rexmit); + if (icsk->icsk_ca_state != TCP_CA_Loss) + tcp_update_rto_time(tp); tcp_identify_packet_loss(sk, ack_flag); if (!(icsk->icsk_ca_state == TCP_CA_Open || (*ack_flag & FLAG_LOST_RETRANS))) @@ -6454,6 +6468,7 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) tcp_try_undo_recovery(sk); /* Reset rtx states to prevent spurious retransmits_timed_out() */ + tcp_update_rto_time(tp); tp->retrans_stamp = 0; inet_csk(sk)->icsk_retransmits = 0; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b98d476f1594..eee8ab1bfa0e 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -565,6 +565,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->undo_marker = treq->snt_isn; newtp->retrans_stamp = div_u64(treq->snt_synack, USEC_PER_SEC / TCP_TS_HZ); + newtp->total_rto = req->num_timeout; + newtp->total_rto_recoveries = 1; + newtp->total_rto_time = tcp_time_stamp_raw() - + newtp->retrans_stamp; } newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d7d64682b068..3f61c6a70a1f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -415,6 +415,19 @@ abort: tcp_write_err(sk); } } +static void tcp_update_rto_stats(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + + if (!icsk->icsk_retransmits) { + tp->total_rto_recoveries++; + tp->rto_stamp = tcp_time_stamp(tp); + } + icsk->icsk_retransmits++; + tp->total_rto++; +} + /* * Timer for Fast Open socket to retransmit SYNACK. Note that the * sk here is the child socket, not the parent (listener) socket. @@ -447,7 +460,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) */ inet_rtx_syn_ack(sk, req); req->num_timeout++; - icsk->icsk_retransmits++; + tcp_update_rto_stats(sk); if (!tp->retrans_stamp) tp->retrans_stamp = tcp_time_stamp(tp); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, @@ -575,7 +588,7 @@ void tcp_retransmit_timer(struct sock *sk) tcp_enter_loss(sk); - icsk->icsk_retransmits++; + tcp_update_rto_stats(sk); if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { /* Retransmission failed because of local congestion, * Let senders fight for local resources conservatively. -- cgit v1.2.3 From 5f18426928800c59fb0f9bc8fb0c182bb6f5ee24 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 21:49:39 +0100 Subject: netdev: expose DPLL pin handle for netdevice In case netdevice represents a SyncE port, the user needs to understand the connection between netdevice and associated DPLL pin. There might me multiple netdevices pointing to the same pin, in case of VF/SF implementation. Add a IFLA Netlink attribute to nest the DPLL pin handle, similar to how it is implemented for devlink port. Add a struct dpll_pin pointer to netdev and protect access to it by RTNL. Expose netdev_dpll_pin_set() and netdev_dpll_pin_clear() helpers to the drivers so they can set/clear the DPLL pin relationship to netdev. Note that during the lifetime of struct dpll_pin the pin handle does not change. Therefore it is save to access it lockless. It is drivers responsibility to call netdev_dpll_pin_clear() before dpll_pin_put(). Signed-off-by: Jiri Pirko Signed-off-by: Arkadiusz Kubalewski Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- drivers/dpll/dpll_netlink.c | 16 ++++++++++++++-- include/linux/dpll.h | 15 +++++++++++++++ include/linux/netdevice.h | 21 +++++++++++++++++++++ include/uapi/linux/if_link.h | 2 +- net/core/dev.c | 22 ++++++++++++++++++++++ net/core/rtnetlink.c | 36 ++++++++++++++++++++++++++++++++++++ 6 files changed, 109 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 9464a6865977..764437a0661b 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -47,6 +47,18 @@ dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id) return 0; } +/** + * dpll_msg_pin_handle_size - get size of pin handle attribute for given pin + * @pin: pin pointer + * + * Return: byte size of pin handle attribute for given pin. + */ +size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) +{ + return pin ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */ +} +EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size); + /** * dpll_msg_add_pin_handle - attach pin handle attribute to a given message * @msg: pointer to sk_buff message to attach a pin handle @@ -56,8 +68,7 @@ dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id) * * 0 - success * * -EMSGSIZE - no space in message to attach pin handle */ -static int -dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) { if (!pin) return 0; @@ -65,6 +76,7 @@ dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) return -EMSGSIZE; return 0; } +EXPORT_SYMBOL_GPL(dpll_msg_add_pin_handle); static int dpll_msg_add_mode(struct sk_buff *msg, struct dpll_device *dpll, diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 2202310c10cd..bbc480cd2932 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -101,6 +101,21 @@ struct dpll_pin_properties { struct dpll_pin_frequency *freq_supported; }; +#if IS_ENABLED(CONFIG_DPLL) +size_t dpll_msg_pin_handle_size(struct dpll_pin *pin); +int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin); +#else +static inline size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) +{ + return 0; +} + +static inline int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +{ + return 0; +} +#endif + struct dpll_device * dpll_device_get(u64 clock_id, u32 dev_driver_id, struct module *module); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0896aaa91dd7..db3d8429d50d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -79,6 +79,8 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; +/* DPLL specific */ +struct dpll_pin; typedef u32 xdp_features_t; @@ -2049,6 +2051,9 @@ enum netdev_ml_priv_type { * SET_NETDEV_DEVLINK_PORT macro. This pointer is static * during the time netdevice is registered. * + * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, + * where the clock is recovered. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2405,6 +2410,10 @@ struct net_device { struct rtnl_hw_stats64 *offload_xstats_l3; struct devlink_port *devlink_port; + +#if IS_ENABLED(CONFIG_DPLL) + struct dpll_pin *dpll_pin; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -3940,6 +3949,18 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); +void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); +void netdev_dpll_pin_clear(struct net_device *dev); + +static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_DPLL) + return dev->dpll_pin; +#else + return NULL; +#endif +} + struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ce3117df9cec..fac351a93aed 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -376,7 +376,7 @@ enum { IFLA_GSO_IPV4_MAX_SIZE, IFLA_GRO_IPV4_MAX_SIZE, - + IFLA_DPLL_PIN, __IFLA_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index ccff2b6ef958..cc03a5758d2d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9023,6 +9023,28 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) } EXPORT_SYMBOL(netdev_port_same_parent_id); +static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin) +{ +#if IS_ENABLED(CONFIG_DPLL) + rtnl_lock(); + dev->dpll_pin = dpll_pin; + rtnl_unlock(); +#endif +} + +void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) +{ + WARN_ON(!dpll_pin); + netdev_dpll_pin_assign(dev, dpll_pin); +} +EXPORT_SYMBOL(netdev_dpll_pin_set); + +void netdev_dpll_pin_clear(struct net_device *dev) +{ + netdev_dpll_pin_assign(dev, NULL); +} +EXPORT_SYMBOL(netdev_dpll_pin_clear); + /** * dev_change_proto_down - set carrier according to proto_down. * diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4a2ec33bfb51..7452a6d190c5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -57,6 +57,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include #endif +#include #include "dev.h" @@ -1055,6 +1056,15 @@ static size_t rtnl_devlink_port_size(const struct net_device *dev) return size; } +static size_t rtnl_dpll_pin_size(const struct net_device *dev) +{ + size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */ + + size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev)); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1111,6 +1121,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_prop_list_size(dev) + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */ + rtnl_devlink_port_size(dev) + + rtnl_dpll_pin_size(dev) + 0; } @@ -1774,6 +1785,28 @@ nest_cancel: return ret; } +static int rtnl_fill_dpll_pin(struct sk_buff *skb, + const struct net_device *dev) +{ + struct nlattr *dpll_pin_nest; + int ret; + + dpll_pin_nest = nla_nest_start(skb, IFLA_DPLL_PIN); + if (!dpll_pin_nest) + return -EMSGSIZE; + + ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev)); + if (ret < 0) + goto nest_cancel; + + nla_nest_end(skb, dpll_pin_nest); + return 0; + +nest_cancel: + nla_nest_cancel(skb, dpll_pin_nest); + return ret; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, @@ -1916,6 +1949,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (rtnl_fill_devlink_port(skb, dev)) goto nla_put_failure; + if (rtnl_fill_dpll_pin(skb, dev)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; -- cgit v1.2.3 From d0b7e990f760ec9a614fbe5f89a5cede4335a7bb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 09:12:32 +0200 Subject: devlink: move linecard struct into linecard.c Instead of exposing linecard struct, expose a simple helper to get the linecard index, which is all is needed outside linecard.c. Move the linecard struct to linecard.c and keep it private similar to the rest of the devlink objects. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/devlink/devl_internal.h | 14 +------------- net/devlink/linecard.c | 19 +++++++++++++++++++ net/devlink/port.c | 4 ++-- 3 files changed, 22 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index f6b5fea2e13c..1b05c2c09e27 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -206,19 +206,7 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack); /* Linecards */ -struct devlink_linecard { - struct list_head list; - struct devlink *devlink; - unsigned int index; - const struct devlink_linecard_ops *ops; - void *priv; - enum devlink_linecard_state state; - struct mutex state_lock; /* Protects state */ - const char *type; - struct devlink_linecard_type *types; - unsigned int types_count; - struct devlink *nested_devlink; -}; +unsigned int devlink_linecard_index(struct devlink_linecard *linecard); /* Devlink nl cmds */ int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info); diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c index 85c32c314b0f..a0210ba56f2d 100644 --- a/net/devlink/linecard.c +++ b/net/devlink/linecard.c @@ -6,6 +6,25 @@ #include "devl_internal.h" +struct devlink_linecard { + struct list_head list; + struct devlink *devlink; + unsigned int index; + const struct devlink_linecard_ops *ops; + void *priv; + enum devlink_linecard_state state; + struct mutex state_lock; /* Protects state */ + const char *type; + struct devlink_linecard_type *types; + unsigned int types_count; + struct devlink *nested_devlink; +}; + +unsigned int devlink_linecard_index(struct devlink_linecard *linecard) +{ + return linecard->index; +} + static struct devlink_linecard * devlink_linecard_get_by_index(struct devlink *devlink, unsigned int linecard_index) diff --git a/net/devlink/port.c b/net/devlink/port.c index 4763b42885fb..7b300a322ed9 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -483,7 +483,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, goto nla_put_failure; if (devlink_port->linecard && nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, - devlink_port->linecard->index)) + devlink_linecard_index(devlink_port->linecard))) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -1420,7 +1420,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, case DEVLINK_PORT_FLAVOUR_PHYSICAL: if (devlink_port->linecard) n = snprintf(name, len, "l%u", - devlink_port->linecard->index); + devlink_linecard_index(devlink_port->linecard)); if (n < len) n += snprintf(name + n, len - n, "p%u", attrs->phys.port_number); -- cgit v1.2.3 From ad99637ac92dc18b979e6fa26eb440f38c0c6b55 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 09:12:35 +0200 Subject: devlink: put netnsid to nested handle If netns of devlink instance and nested devlink instance differs, put netnsid attr to indicate that. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/devlink/linecard.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c index a0210ba56f2d..f95abdc93c66 100644 --- a/net/devlink/linecard.c +++ b/net/devlink/linecard.c @@ -65,7 +65,8 @@ devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info) return devlink_linecard_get_from_attrs(devlink, info->attrs); } -static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink) +static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, + struct devlink *devlink) { struct nlattr *nested_attr; @@ -74,6 +75,13 @@ static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *dev return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; + if (!net_eq(net, devlink_net(devlink))) { + int id = peernet2id_alloc(net, devlink_net(devlink), + GFP_KERNEL); + + if (nla_put_s32(msg, DEVLINK_ATTR_NETNS_ID, id)) + return -EMSGSIZE; + } nla_nest_end(msg, nested_attr); return 0; @@ -131,7 +139,8 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, } if (linecard->nested_devlink && - devlink_nl_put_nested_handle(msg, linecard->nested_devlink)) + devlink_nl_put_nested_handle(msg, devlink_net(devlink), + linecard->nested_devlink)) goto nla_put_failure; genlmsg_end(msg, hdr); -- cgit v1.2.3 From af1f1400af02e5a069d86ae7001b563c99395ea2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 09:12:36 +0200 Subject: devlink: move devlink_nl_put_nested_handle() into netlink.c As the next patch is going to call this helper out of the linecard.c, move to netlink.c. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/devlink/devl_internal.h | 2 ++ net/devlink/linecard.c | 26 -------------------------- net/devlink/netlink.c | 26 ++++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 1b05c2c09e27..fbf00de1accf 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -145,6 +145,8 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) return 0; } +int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, + struct devlink *devlink); int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info); /* Notify */ diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c index f95abdc93c66..688e89daee6a 100644 --- a/net/devlink/linecard.c +++ b/net/devlink/linecard.c @@ -65,32 +65,6 @@ devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info) return devlink_linecard_get_from_attrs(devlink, info->attrs); } -static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, - struct devlink *devlink) -{ - struct nlattr *nested_attr; - - nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK); - if (!nested_attr) - return -EMSGSIZE; - if (devlink_nl_put_handle(msg, devlink)) - goto nla_put_failure; - if (!net_eq(net, devlink_net(devlink))) { - int id = peernet2id_alloc(net, devlink_net(devlink), - GFP_KERNEL); - - if (nla_put_s32(msg, DEVLINK_ATTR_NETNS_ID, id)) - return -EMSGSIZE; - } - - nla_nest_end(msg, nested_attr); - return 0; - -nla_put_failure: - nla_nest_cancel(msg, nested_attr); - return -EMSGSIZE; -} - struct devlink_linecard_type { const char *type; const void *priv; diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index fc3e7c029a3b..48b5cfc2842f 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -82,6 +82,32 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG }, }; +int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, + struct devlink *devlink) +{ + struct nlattr *nested_attr; + + nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK); + if (!nested_attr) + return -EMSGSIZE; + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (!net_eq(net, devlink_net(devlink))) { + int id = peernet2id_alloc(net, devlink_net(devlink), + GFP_KERNEL); + + if (nla_put_s32(msg, DEVLINK_ATTR_NETNS_ID, id)) + return -EMSGSIZE; + } + + nla_nest_end(msg, nested_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(msg, nested_attr); + return -EMSGSIZE; +} + int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info) { int err; -- cgit v1.2.3 From 1c2197c47a93d0ea36e73e437271c7cbcc0e1ceb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 09:12:37 +0200 Subject: devlink: extend devlink_nl_put_nested_handle() with attrtype arg As the next patch is going to call this helper with need to fill another type of nested attribute, pass it over function arg. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/devlink/devl_internal.h | 2 +- net/devlink/linecard.c | 3 ++- net/devlink/netlink.c | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index fbf00de1accf..53449dbd6545 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -146,7 +146,7 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) } int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, - struct devlink *devlink); + struct devlink *devlink, int attrtype); int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info); /* Notify */ diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c index 688e89daee6a..36170f466878 100644 --- a/net/devlink/linecard.c +++ b/net/devlink/linecard.c @@ -114,7 +114,8 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, if (linecard->nested_devlink && devlink_nl_put_nested_handle(msg, devlink_net(devlink), - linecard->nested_devlink)) + linecard->nested_devlink, + DEVLINK_ATTR_NESTED_DEVLINK)) goto nla_put_failure; genlmsg_end(msg, hdr); diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index 48b5cfc2842f..499304d9de49 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -83,11 +83,11 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { }; int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, - struct devlink *devlink) + struct devlink *devlink, int attrtype) { struct nlattr *nested_attr; - nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK); + nested_attr = nla_nest_start(msg, attrtype); if (!nested_attr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) -- cgit v1.2.3 From c137743bce02b18c1537d4681aa515f7b80bf0a8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 09:12:38 +0200 Subject: devlink: introduce object and nested devlink relationship infra It is a bit tricky to maintain relationship between devlink objects and nested devlink instances due to following aspects: 1) Locking. It is necessary to lock the devlink instance that contains the object first, only after that to lock the nested instance. 2) Lifetimes. Objects (e.g devlink port) may be removed before the nested devlink instance. 3) Notifications. If nested instance changes (e.g. gets registered/unregistered) the nested-in object needs to send appropriate notifications. Resolve this by introducing an xarray that holds 1:1 relationships between devlink object and related nested devlink instance. Use that xarray index to get the object/nested devlink instance on the other side. Provide necessary helpers: devlink_rel_nested_in_add/clear() to add and clear the relationship. devlink_rel_nested_in_notify() to call the nested-in object to send notifications during nested instance register/unregister/netns change. devlink_rel_devlink_handle_put() to be used by nested-in object fill function to fill the nested handle. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/devlink/core.c | 215 ++++++++++++++++++++++++++++++++++++++++++++ net/devlink/dev.c | 1 + net/devlink/devl_internal.h | 17 ++++ 3 files changed, 233 insertions(+) (limited to 'net') diff --git a/net/devlink/core.c b/net/devlink/core.c index 6cec4afb01fb..2a98ff9a2f6b 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -16,6 +16,219 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report); DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); +static struct devlink *devlinks_xa_get(unsigned long index) +{ + struct devlink *devlink; + + rcu_read_lock(); + devlink = xa_find(&devlinks, &index, index, DEVLINK_REGISTERED); + if (!devlink || !devlink_try_get(devlink)) + devlink = NULL; + rcu_read_unlock(); + return devlink; +} + +/* devlink_rels xarray contains 1:1 relationships between + * devlink object and related nested devlink instance. + * The xarray index is used to get the nested object from + * the nested-in object code. + */ +static DEFINE_XARRAY_FLAGS(devlink_rels, XA_FLAGS_ALLOC1); + +#define DEVLINK_REL_IN_USE XA_MARK_0 + +struct devlink_rel { + u32 index; + refcount_t refcount; + u32 devlink_index; + struct { + u32 devlink_index; + u32 obj_index; + devlink_rel_notify_cb_t *notify_cb; + devlink_rel_cleanup_cb_t *cleanup_cb; + struct work_struct notify_work; + } nested_in; +}; + +static void devlink_rel_free(struct devlink_rel *rel) +{ + xa_erase(&devlink_rels, rel->index); + kfree(rel); +} + +static void __devlink_rel_get(struct devlink_rel *rel) +{ + refcount_inc(&rel->refcount); +} + +static void __devlink_rel_put(struct devlink_rel *rel) +{ + if (refcount_dec_and_test(&rel->refcount)) + devlink_rel_free(rel); +} + +static void devlink_rel_nested_in_notify_work(struct work_struct *work) +{ + struct devlink_rel *rel = container_of(work, struct devlink_rel, + nested_in.notify_work); + struct devlink *devlink; + + devlink = devlinks_xa_get(rel->nested_in.devlink_index); + if (!devlink) + goto rel_put; + if (!devl_trylock(devlink)) { + devlink_put(devlink); + goto reschedule_work; + } + if (!devl_is_registered(devlink)) { + devl_unlock(devlink); + devlink_put(devlink); + goto rel_put; + } + if (!xa_get_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE)) + rel->nested_in.cleanup_cb(devlink, rel->nested_in.obj_index, rel->index); + rel->nested_in.notify_cb(devlink, rel->nested_in.obj_index); + devl_unlock(devlink); + devlink_put(devlink); + +rel_put: + __devlink_rel_put(rel); + return; + +reschedule_work: + schedule_work(&rel->nested_in.notify_work); +} + +static void devlink_rel_nested_in_notify_work_schedule(struct devlink_rel *rel) +{ + __devlink_rel_get(rel); + schedule_work(&rel->nested_in.notify_work); +} + +static struct devlink_rel *devlink_rel_alloc(void) +{ + struct devlink_rel *rel; + static u32 next; + int err; + + rel = kzalloc(sizeof(*rel), GFP_KERNEL); + if (!rel) + return ERR_PTR(-ENOMEM); + + err = xa_alloc_cyclic(&devlink_rels, &rel->index, rel, + xa_limit_32b, &next, GFP_KERNEL); + if (err) { + kfree(rel); + return ERR_PTR(err); + } + + refcount_set(&rel->refcount, 1); + INIT_WORK(&rel->nested_in.notify_work, + &devlink_rel_nested_in_notify_work); + return rel; +} + +static void devlink_rel_put(struct devlink *devlink) +{ + struct devlink_rel *rel = devlink->rel; + + if (!rel) + return; + xa_clear_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE); + devlink_rel_nested_in_notify_work_schedule(rel); + __devlink_rel_put(rel); + devlink->rel = NULL; +} + +void devlink_rel_nested_in_clear(u32 rel_index) +{ + xa_clear_mark(&devlink_rels, rel_index, DEVLINK_REL_IN_USE); +} + +int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index, + u32 obj_index, devlink_rel_notify_cb_t *notify_cb, + devlink_rel_cleanup_cb_t *cleanup_cb, + struct devlink *devlink) +{ + struct devlink_rel *rel = devlink_rel_alloc(); + + ASSERT_DEVLINK_NOT_REGISTERED(devlink); + + if (IS_ERR(rel)) + return PTR_ERR(rel); + + rel->devlink_index = devlink->index; + rel->nested_in.devlink_index = devlink_index; + rel->nested_in.obj_index = obj_index; + rel->nested_in.notify_cb = notify_cb; + rel->nested_in.cleanup_cb = cleanup_cb; + *rel_index = rel->index; + xa_set_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE); + devlink->rel = rel; + return 0; +} + +void devlink_rel_nested_in_notify(struct devlink *devlink) +{ + struct devlink_rel *rel = devlink->rel; + + if (!rel) + return; + devlink_rel_nested_in_notify_work_schedule(rel); +} + +static struct devlink_rel *devlink_rel_find(unsigned long rel_index) +{ + return xa_find(&devlink_rels, &rel_index, rel_index, + DEVLINK_REL_IN_USE); +} + +static struct devlink *devlink_rel_devlink_get_lock(u32 rel_index) +{ + struct devlink *devlink; + struct devlink_rel *rel; + u32 devlink_index; + + if (!rel_index) + return NULL; + xa_lock(&devlink_rels); + rel = devlink_rel_find(rel_index); + if (rel) + devlink_index = rel->devlink_index; + xa_unlock(&devlink_rels); + if (!rel) + return NULL; + devlink = devlinks_xa_get(devlink_index); + if (!devlink) + return NULL; + devl_lock(devlink); + if (!devl_is_registered(devlink)) { + devl_unlock(devlink); + devlink_put(devlink); + return NULL; + } + return devlink; +} + +int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink, + u32 rel_index, int attrtype, + bool *msg_updated) +{ + struct net *net = devlink_net(devlink); + struct devlink *rel_devlink; + int err; + + rel_devlink = devlink_rel_devlink_get_lock(rel_index); + if (!rel_devlink) + return 0; + err = devlink_nl_put_nested_handle(msg, net, rel_devlink, attrtype); + devl_unlock(rel_devlink); + devlink_put(rel_devlink); + if (!err && msg_updated) + *msg_updated = true; + return err; +} + void *devlink_priv(struct devlink *devlink) { return &devlink->priv; @@ -142,6 +355,7 @@ int devl_register(struct devlink *devlink) xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); devlink_notify_register(devlink); + devlink_rel_nested_in_notify(devlink); return 0; } @@ -166,6 +380,7 @@ void devl_unregister(struct devlink *devlink) devlink_notify_unregister(devlink); xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); + devlink_rel_put(devlink); } EXPORT_SYMBOL_GPL(devl_unregister); diff --git a/net/devlink/dev.c b/net/devlink/dev.c index bba4ace7d22b..3ae26d9088ab 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -372,6 +372,7 @@ static void devlink_reload_netns_change(struct devlink *devlink, devlink_notify_unregister(devlink); write_pnet(&devlink->_net, dest_net); devlink_notify_register(devlink); + devlink_rel_nested_in_notify(devlink); } int devlink_reload(struct devlink *devlink, struct net *dest_net, diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 53449dbd6545..4cb534aff44d 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -17,6 +17,8 @@ #include "netlink_gen.h" +struct devlink_rel; + #define DEVLINK_REGISTERED XA_MARK_1 #define DEVLINK_RELOAD_STATS_ARRAY_SIZE \ @@ -55,6 +57,7 @@ struct devlink { u8 reload_failed:1; refcount_t refcount; struct rcu_work rwork; + struct devlink_rel *rel; char priv[] __aligned(NETDEV_ALIGN); }; @@ -92,6 +95,20 @@ static inline bool devl_is_registered(struct devlink *devlink) return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); } +typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index); +typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index, + u32 rel_index); + +void devlink_rel_nested_in_clear(u32 rel_index); +int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index, + u32 obj_index, devlink_rel_notify_cb_t *notify_cb, + devlink_rel_cleanup_cb_t *cleanup_cb, + struct devlink *devlink); +void devlink_rel_nested_in_notify(struct devlink *devlink); +int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink, + u32 rel_index, int attrtype, + bool *msg_updated); + /* Netlink */ #define DEVLINK_NL_FLAG_NEED_PORT BIT(0) #define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) -- cgit v1.2.3 From 0b7a2721e36c11313f8b0f251a508d25a872cd28 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 09:12:39 +0200 Subject: devlink: expose peer SF devlink instance Introduce a new helper devl_port_fn_devlink_set() to be used by driver assigning a devlink instance to the peer devlink port function. Expose this to user over new netlink attribute nested under port function nest to expose devlink handle related to the port function. This is particularly helpful for user to understand the relationship between devlink instances created for SFs and the port functions they belong to. Note that caller of devlink_port_notify() needs to hold devlink instance lock, put the assertion to devl_port_fn_devlink_set() to make this requirement explicit. Also note the limitations that only allow to make this assignment for registered objects. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 3 +++ include/uapi/linux/devlink.h | 1 + net/devlink/port.c | 51 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) (limited to 'net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 29fd1b4ee654..2655ab6101ec 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -150,6 +150,7 @@ struct devlink_port { struct devlink_rate *devlink_rate; struct devlink_linecard *linecard; + u32 rel_index; }; struct devlink_port_new_attrs { @@ -1697,6 +1698,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u32 sf, bool external); +int devl_port_fn_devlink_set(struct devlink_port *devlink_port, + struct devlink *fn_devlink); struct devlink_rate * devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name, struct devlink_rate *parent); diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 03875e078be8..cd4b82458d1b 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -680,6 +680,7 @@ enum devlink_port_function_attr { DEVLINK_PORT_FN_ATTR_STATE, /* u8 */ DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */ + DEVLINK_PORT_FN_ATTR_DEVLINK, /* nested */ __DEVLINK_PORT_FUNCTION_ATTR_MAX, DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 diff --git a/net/devlink/port.c b/net/devlink/port.c index 7b300a322ed9..4e9003242448 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -428,6 +428,13 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por if (err) goto out; err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated); + if (err) + goto out; + err = devlink_rel_devlink_handle_put(msg, port->devlink, + port->rel_index, + DEVLINK_PORT_FN_ATTR_DEVLINK, + &msg_updated); + out: if (err || !msg_updated) nla_nest_cancel(msg, function_attr); @@ -1392,6 +1399,50 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro } EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set); +static void devlink_port_rel_notify_cb(struct devlink *devlink, u32 port_index) +{ + struct devlink_port *devlink_port; + + devlink_port = devlink_port_get_by_index(devlink, port_index); + if (!devlink_port) + return; + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); +} + +static void devlink_port_rel_cleanup_cb(struct devlink *devlink, u32 port_index, + u32 rel_index) +{ + struct devlink_port *devlink_port; + + devlink_port = devlink_port_get_by_index(devlink, port_index); + if (devlink_port && devlink_port->rel_index == rel_index) + devlink_port->rel_index = 0; +} + +/** + * devl_port_fn_devlink_set - Attach peer devlink + * instance to port function. + * @devlink_port: devlink port + * @fn_devlink: devlink instance to attach + */ +int devl_port_fn_devlink_set(struct devlink_port *devlink_port, + struct devlink *fn_devlink) +{ + ASSERT_DEVLINK_PORT_REGISTERED(devlink_port); + + if (WARN_ON(devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_SF || + devlink_port->attrs.pci_sf.external)) + return -EINVAL; + + return devlink_rel_nested_in_add(&devlink_port->rel_index, + devlink_port->devlink->index, + devlink_port->index, + devlink_port_rel_notify_cb, + devlink_port_rel_cleanup_cb, + fn_devlink); +} +EXPORT_SYMBOL_GPL(devl_port_fn_devlink_set); + /** * devlink_port_linecard_set - Link port with a linecard * -- cgit v1.2.3 From 9473bc0119e7e7630d7c1c7c3816c290a6f3ae19 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 09:12:41 +0200 Subject: devlink: convert linecard nested devlink to new rel infrastructure Benefit from the newly introduced rel infrastructure, treat the linecard nested devlink instances in the same way as port function instances. Convert the code to use the rel infrastructure. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/core_linecard_dev.c | 9 +++-- include/net/devlink.h | 4 +- net/devlink/linecard.c | 47 ++++++++++++++++------ 3 files changed, 42 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c index af37e650a8ad..e8d6fe35bf36 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c @@ -132,6 +132,7 @@ static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev, struct mlxsw_linecard *linecard = linecard_bdev->linecard; struct mlxsw_linecard_dev *linecard_dev; struct devlink *devlink; + int err; devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops, sizeof(*linecard_dev), &adev->dev); @@ -141,8 +142,12 @@ static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev, linecard_dev->linecard = linecard_bdev->linecard; linecard_bdev->linecard_dev = linecard_dev; + err = devlink_linecard_nested_dl_set(linecard->devlink_linecard, devlink); + if (err) { + devlink_free(devlink); + return err; + } devlink_register(devlink); - devlink_linecard_nested_dl_set(linecard->devlink_linecard, devlink); return 0; } @@ -151,9 +156,7 @@ static void mlxsw_linecard_bdev_remove(struct auxiliary_device *adev) struct mlxsw_linecard_bdev *linecard_bdev = container_of(adev, struct mlxsw_linecard_bdev, adev); struct devlink *devlink = priv_to_devlink(linecard_bdev->linecard_dev); - struct mlxsw_linecard *linecard = linecard_bdev->linecard; - devlink_linecard_nested_dl_set(linecard->devlink_linecard, NULL); devlink_unregister(devlink); devlink_free(devlink); } diff --git a/include/net/devlink.h b/include/net/devlink.h index 2655ab6101ec..0dfcd7d7fa18 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1720,8 +1720,8 @@ void devlink_linecard_provision_clear(struct devlink_linecard *linecard); void devlink_linecard_provision_fail(struct devlink_linecard *linecard); void devlink_linecard_activate(struct devlink_linecard *linecard); void devlink_linecard_deactivate(struct devlink_linecard *linecard); -void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, - struct devlink *nested_devlink); +int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink); int devl_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c index 36170f466878..9ff1813f88c5 100644 --- a/net/devlink/linecard.c +++ b/net/devlink/linecard.c @@ -17,7 +17,7 @@ struct devlink_linecard { const char *type; struct devlink_linecard_type *types; unsigned int types_count; - struct devlink *nested_devlink; + u32 rel_index; }; unsigned int devlink_linecard_index(struct devlink_linecard *linecard) @@ -112,10 +112,10 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, nla_nest_end(msg, attr); } - if (linecard->nested_devlink && - devlink_nl_put_nested_handle(msg, devlink_net(devlink), - linecard->nested_devlink, - DEVLINK_ATTR_NESTED_DEVLINK)) + if (devlink_rel_devlink_handle_put(msg, devlink, + linecard->rel_index, + DEVLINK_ATTR_NESTED_DEVLINK, + NULL)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -524,7 +524,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set); void devlink_linecard_provision_clear(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); - WARN_ON(linecard->nested_devlink); linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; linecard->type = NULL; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); @@ -543,7 +542,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear); void devlink_linecard_provision_fail(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); - WARN_ON(linecard->nested_devlink); linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); mutex_unlock(&linecard->state_lock); @@ -591,6 +589,27 @@ void devlink_linecard_deactivate(struct devlink_linecard *linecard) } EXPORT_SYMBOL_GPL(devlink_linecard_deactivate); +static void devlink_linecard_rel_notify_cb(struct devlink *devlink, + u32 linecard_index) +{ + struct devlink_linecard *linecard; + + linecard = devlink_linecard_get_by_index(devlink, linecard_index); + if (!linecard) + return; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); +} + +static void devlink_linecard_rel_cleanup_cb(struct devlink *devlink, + u32 linecard_index, u32 rel_index) +{ + struct devlink_linecard *linecard; + + linecard = devlink_linecard_get_by_index(devlink, linecard_index); + if (linecard && linecard->rel_index == rel_index) + linecard->rel_index = 0; +} + /** * devlink_linecard_nested_dl_set - Attach/detach nested devlink * instance to linecard. @@ -598,12 +617,14 @@ EXPORT_SYMBOL_GPL(devlink_linecard_deactivate); * @linecard: devlink linecard * @nested_devlink: devlink instance to attach or NULL to detach */ -void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, - struct devlink *nested_devlink) +int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink) { - mutex_lock(&linecard->state_lock); - linecard->nested_devlink = nested_devlink; - devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); - mutex_unlock(&linecard->state_lock); + return devlink_rel_nested_in_add(&linecard->rel_index, + linecard->devlink->index, + linecard->index, + devlink_linecard_rel_notify_cb, + devlink_linecard_rel_cleanup_cb, + nested_devlink); } EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set); -- cgit v1.2.3 From c5e1bf8a51cfe5060e91c7533098e329c0118f6d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 09:12:42 +0200 Subject: devlink: introduce possibility to expose info about nested devlinks In mlx5, there is a devlink instance created for PCI device. Also, one separate devlink instance is created for auxiliary device that represents the netdev of uplink port. This relation is currently invisible to the devlink user. Benefit from the rel infrastructure and allow for nested devlink instance to set the relationship for the nested-in devlink instance. Note that there may be many nested instances, therefore use xarray to hold the list of rel_indexes for individual nested instances. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 2 ++ net/devlink/core.c | 2 ++ net/devlink/dev.c | 49 +++++++++++++++++++++++++++++++++++++++++++++ net/devlink/devl_internal.h | 1 + 4 files changed, 54 insertions(+) (limited to 'net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 0dfcd7d7fa18..fad8e36e3d98 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1921,6 +1921,8 @@ devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, void devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter); +int devl_nested_devlink_set(struct devlink *devlink, + struct devlink *nested_devlink); bool devlink_is_reload_failed(const struct devlink *devlink); void devlink_remote_reload_actions_performed(struct devlink *devlink, enum devlink_reload_limit limit, diff --git a/net/devlink/core.c b/net/devlink/core.c index 2a98ff9a2f6b..bcbbb952569f 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -430,6 +430,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); xa_init_flags(&devlink->params, XA_FLAGS_ALLOC); xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); + xa_init_flags(&devlink->nested_rels, XA_FLAGS_ALLOC); write_pnet(&devlink->_net, net); INIT_LIST_HEAD(&devlink->rate_list); INIT_LIST_HEAD(&devlink->linecard_list); @@ -476,6 +477,7 @@ void devlink_free(struct devlink *devlink) WARN_ON(!list_empty(&devlink->linecard_list)); WARN_ON(!xa_empty(&devlink->ports)); + xa_destroy(&devlink->nested_rels); xa_destroy(&devlink->snapshot_ids); xa_destroy(&devlink->params); xa_destroy(&devlink->ports); diff --git a/net/devlink/dev.c b/net/devlink/dev.c index 3ae26d9088ab..dc8039ca2b38 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -138,6 +138,23 @@ nla_put_failure: return -EMSGSIZE; } +static int devlink_nl_nested_fill(struct sk_buff *msg, struct devlink *devlink) +{ + unsigned long rel_index; + void *unused; + int err; + + xa_for_each(&devlink->nested_rels, rel_index, unused) { + err = devlink_rel_devlink_handle_put(msg, devlink, + rel_index, + DEVLINK_ATTR_NESTED_DEVLINK, + NULL); + if (err) + return err; + } + return 0; +} + static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags) @@ -164,6 +181,10 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink, goto dev_stats_nest_cancel; nla_nest_end(msg, dev_stats); + + if (devlink_nl_nested_fill(msg, devlink)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -230,6 +251,34 @@ int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one); } +static void devlink_rel_notify_cb(struct devlink *devlink, u32 obj_index) +{ + devlink_notify(devlink, DEVLINK_CMD_NEW); +} + +static void devlink_rel_cleanup_cb(struct devlink *devlink, u32 obj_index, + u32 rel_index) +{ + xa_erase(&devlink->nested_rels, rel_index); +} + +int devl_nested_devlink_set(struct devlink *devlink, + struct devlink *nested_devlink) +{ + u32 rel_index; + int err; + + err = devlink_rel_nested_in_add(&rel_index, devlink->index, 0, + devlink_rel_notify_cb, + devlink_rel_cleanup_cb, + nested_devlink); + if (err) + return err; + return xa_insert(&devlink->nested_rels, rel_index, + xa_mk_value(0), GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(devl_nested_devlink_set); + void devlink_notify_register(struct devlink *devlink) { devlink_notify(devlink, DEVLINK_CMD_NEW); diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 4cb534aff44d..741d1bf1bec8 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -58,6 +58,7 @@ struct devlink { refcount_t refcount; struct rcu_work rwork; struct devlink_rel *rel; + struct xarray nested_rels; char priv[] __aligned(NETDEV_ALIGN); }; -- cgit v1.2.3 From ddd7f45c899f7524bdbe6a32fe4906cde8b07b9b Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Thu, 14 Sep 2023 04:20:26 -0400 Subject: wifi: cfg80211: save power spectral density(psd) of regulatory rule 6 GHz regulatory domains introduces Power Spectral Density (PSD). The PSD value of the regulatory rule should be taken into effect for the ieee80211_channels falling into that particular regulatory rule. Save the values in the channel which has PSD value and add nl80211 attributes accordingly to handle it. Co-developed-by: Aditya Kumar Singh Signed-off-by: Aditya Kumar Singh Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/20230914082026.3709-1-quic_wgong@quicinc.com [use hole in chan flags, reword docs] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 +++++- include/net/regulatory.h | 1 + include/uapi/linux/nl80211.h | 9 +++++++++ net/wireless/nl80211.c | 9 +++++++++ net/wireless/reg.c | 17 +++++++++++++++++ 5 files changed, 41 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8fcfe1869424..9af714431b22 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -76,6 +76,8 @@ struct wiphy; * @IEEE80211_CHAN_DISABLED: This channel is disabled. * @IEEE80211_CHAN_NO_IR: do not initiate radiation, this includes * sending probe requests or beaconing. + * @IEEE80211_CHAN_PSD: Power spectral density (in dBm) is set for this + * channel. * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel. * @IEEE80211_CHAN_NO_HT40PLUS: extension channel above this channel * is not permitted. @@ -119,7 +121,7 @@ struct wiphy; enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, IEEE80211_CHAN_NO_IR = 1<<1, - /* hole at 1<<2 */ + IEEE80211_CHAN_PSD = 1<<2, IEEE80211_CHAN_RADAR = 1<<3, IEEE80211_CHAN_NO_HT40PLUS = 1<<4, IEEE80211_CHAN_NO_HT40MINUS = 1<<5, @@ -171,6 +173,7 @@ enum ieee80211_channel_flags { * on this channel. * @dfs_state_entered: timestamp (jiffies) when the dfs state was entered. * @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels. + * @psd: power spectral density (in dBm) */ struct ieee80211_channel { enum nl80211_band band; @@ -187,6 +190,7 @@ struct ieee80211_channel { enum nl80211_dfs_state dfs_state; unsigned long dfs_state_entered; unsigned int dfs_cac_ms; + s8 psd; }; /** diff --git a/include/net/regulatory.h b/include/net/regulatory.h index b2cb4a9eb04d..ebf9e028d1ef 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -213,6 +213,7 @@ struct ieee80211_reg_rule { u32 flags; u32 dfs_cac_ms; bool has_wmm; + s8 psd; }; struct ieee80211_regdomain { diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f797ab7a6547..367e5fbc8930 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4215,6 +4215,8 @@ enum nl80211_wmm_rule { * as the primary or any of the secondary channels isn't possible * @NL80211_FREQUENCY_ATTR_NO_EHT: EHT operation is not allowed on this channel * in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_PSD: Power spectral density (in dBm) that + * is allowed on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4253,6 +4255,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_16MHZ, NL80211_FREQUENCY_ATTR_NO_320MHZ, NL80211_FREQUENCY_ATTR_NO_EHT, + NL80211_FREQUENCY_ATTR_PSD, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4353,6 +4356,8 @@ enum nl80211_reg_type { * a given frequency range. The value is in mBm (100 * dBm). * @NL80211_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds. * If not present or 0 default CAC time will be used. + * @NL80211_ATTR_POWER_RULE_PSD: power spectral density (in dBm). + * This could be negative. * @NL80211_REG_RULE_ATTR_MAX: highest regulatory rule attribute number * currently defined * @__NL80211_REG_RULE_ATTR_AFTER_LAST: internal use @@ -4370,6 +4375,8 @@ enum nl80211_reg_rule_attr { NL80211_ATTR_DFS_CAC_TIME, + NL80211_ATTR_POWER_RULE_PSD, + /* keep last */ __NL80211_REG_RULE_ATTR_AFTER_LAST, NL80211_REG_RULE_ATTR_MAX = __NL80211_REG_RULE_ATTR_AFTER_LAST - 1 @@ -4453,6 +4460,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_NO_HE: HE operation not allowed * @NL80211_RRF_NO_320MHZ: 320MHz operation not allowed * @NL80211_RRF_NO_EHT: EHT operation not allowed + * @NL80211_RRF_PSD: Ruleset has power spectral density value */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4473,6 +4481,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_HE = 1<<17, NL80211_RRF_NO_320MHZ = 1<<18, NL80211_RRF_NO_EHT = 1<<19, + NL80211_RRF_PSD = 1<<20, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 218093607b29..e64bf2a58b36 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1115,6 +1115,10 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_OFFSET, chan->freq_offset)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_PSD) && + nla_put_s8(msg, NL80211_FREQUENCY_ATTR_PSD, chan->psd)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_DISABLED) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DISABLED)) goto nla_put_failure; @@ -8529,6 +8533,11 @@ static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom, reg_rule->dfs_cac_ms)) goto nla_put_failure; + if ((reg_rule->flags & NL80211_RRF_PSD) && + nla_put_s8(msg, NL80211_ATTR_POWER_RULE_PSD, + reg_rule->psd)) + goto nla_put_failure; + nla_nest_end(msg, nl_reg_rule); } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 33e2570f2bd6..eb2fa97457b4 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1589,6 +1589,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_320MHZ; if (rd_flags & NL80211_RRF_NO_EHT) channel_flags |= IEEE80211_CHAN_NO_EHT; + if (rd_flags & NL80211_RRF_PSD) + channel_flags |= IEEE80211_CHAN_PSD; return channel_flags; } @@ -1795,6 +1797,9 @@ static void handle_channel_single_rule(struct wiphy *wiphy, chan->dfs_cac_ms = reg_rule->dfs_cac_ms; } + if (chan->flags & IEEE80211_CHAN_PSD) + chan->psd = reg_rule->psd; + return; } @@ -1815,6 +1820,9 @@ static void handle_channel_single_rule(struct wiphy *wiphy, chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; } + if (chan->flags & IEEE80211_CHAN_PSD) + chan->psd = reg_rule->psd; + if (chan->orig_mpwr) { /* * Devices that use REGULATORY_COUNTRY_IE_FOLLOW_POWER @@ -1884,6 +1892,12 @@ static void handle_channel_adjacent_rules(struct wiphy *wiphy, rrule2->dfs_cac_ms); } + if ((rrule1->flags & NL80211_RRF_PSD) && + (rrule2->flags & NL80211_RRF_PSD)) + chan->psd = min_t(s8, rrule1->psd, rrule2->psd); + else + chan->flags &= ~NL80211_RRF_PSD; + return; } @@ -2570,6 +2584,9 @@ static void handle_channel_custom(struct wiphy *wiphy, chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; } + if (chan->flags & IEEE80211_CHAN_PSD) + chan->psd = reg_rule->psd; + chan->max_power = chan->max_reg_power; } -- cgit v1.2.3 From a2713257ee2be22827d7bc248302d408c91bfb95 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Sep 2023 13:12:38 -0600 Subject: tls: Use size_add() in call to struct_size() If, for any reason, the open-coded arithmetic causes a wraparound, the protection that `struct_size()` adds against potential integer overflows is defeated. Fix this by hardening call to `struct_size()` with `size_add()`. Fixes: b89fec54fd61 ("tls: rx: wrap decrypt params in a struct") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d1fc295b83b5..270712b8d391 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1487,7 +1487,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, */ aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); aead_size = ALIGN(aead_size, __alignof__(*dctx)); - mem = kmalloc(aead_size + struct_size(dctx, sg, n_sgin + n_sgout), + mem = kmalloc(aead_size + struct_size(dctx, sg, size_add(n_sgin, n_sgout)), sk->sk_allocation); if (!mem) { err = -ENOMEM; -- cgit v1.2.3 From 2506a91734754de690869824fb0d1ac592ec1266 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Sep 2023 13:16:26 -0600 Subject: tipc: Use size_add() in calls to struct_size() If, for any reason, the open-coded arithmetic causes a wraparound, the protection that `struct_size()` adds against potential integer overflows is defeated. Fix this by hardening call to `struct_size()` with `size_add()`. Fixes: e034c6d23bc4 ("tipc: Use struct_size() helper") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: David S. Miller --- net/tipc/link.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index e33b4f29f77c..d0143823658d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1446,7 +1446,7 @@ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, p = (struct tipc_gap_ack_blks *)msg_data(hdr); sz = ntohs(p->len); /* Sanity check */ - if (sz == struct_size(p, gacks, p->ugack_cnt + p->bgack_cnt)) { + if (sz == struct_size(p, gacks, size_add(p->ugack_cnt, p->bgack_cnt))) { /* Good, check if the desired type exists */ if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt)) goto ok; @@ -1533,7 +1533,7 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr) __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0; /* Total len */ - len = struct_size(ga, gacks, ga->bgack_cnt + ga->ugack_cnt); + len = struct_size(ga, gacks, size_add(ga->bgack_cnt, ga->ugack_cnt)); ga->len = htons(len); return len; } -- cgit v1.2.3 From 1cb6422ecac8804ebe0b71f4b3440674955fec73 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Sep 2023 13:15:10 -0700 Subject: ceph: Annotate struct ceph_monmap with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ceph_monmap. Additionally, since the element count member must be set before accessing the annotated flexible array member, move its initialization earlier. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Ilya Dryomov Cc: Xiubo Li Cc: Jeff Layton Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: ceph-devel@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Reviewed-by: Xiubo Li Signed-off-by: David S. Miller --- include/linux/ceph/mon_client.h | 2 +- net/ceph/mon_client.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index b658961156a0..7a9a40163c0f 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -19,7 +19,7 @@ struct ceph_monmap { struct ceph_fsid fsid; u32 epoch; u32 num_mon; - struct ceph_entity_inst mon_inst[]; + struct ceph_entity_inst mon_inst[] __counted_by(num_mon); }; struct ceph_mon_client; diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index faabad6603db..f263f7e91a21 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1136,6 +1136,7 @@ static int build_initial_monmap(struct ceph_mon_client *monc) GFP_KERNEL); if (!monc->monmap) return -ENOMEM; + monc->monmap->num_mon = num_mon; for (i = 0; i < num_mon; i++) { struct ceph_entity_inst *inst = &monc->monmap->mon_inst[i]; @@ -1147,7 +1148,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc) inst->name.type = CEPH_ENTITY_TYPE_MON; inst->name.num = cpu_to_le64(i); } - monc->monmap->num_mon = num_mon; return 0; } -- cgit v1.2.3 From fa17a6d8a5bd0cd7565b613cb804242cd0f6b7ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Sep 2023 14:23:21 +0000 Subject: ipv6: lockless IPV6_ADDR_PREFERENCES implementation We have data-races while reading np->srcprefs Switch the field to a plain byte, add READ_ONCE() and WRITE_ONCE() annotations where needed, and IPV6_ADDR_PREFERENCES setsockopt() can now be lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230918142321.1794107-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/ipv6.h | 2 +- include/net/ip6_route.h | 5 ++--- include/net/ipv6.h | 20 +++++++------------- net/ipv6/ip6_output.c | 2 +- net/ipv6/ipv6_sockglue.c | 19 ++++++++++--------- net/ipv6/route.c | 2 +- 6 files changed, 22 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 09253825c99c..e400ff757f13 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,7 +243,7 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u8 srcprefs:3; /* 001: prefer temporary address + __u8 srcprefs; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index b1ea49900b4a..28b065790261 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -53,13 +53,12 @@ struct route_info { */ static inline int rt6_srcprefs2flags(unsigned int srcprefs) { - /* No need to bitmask because srcprefs have only 3 bits. */ - return srcprefs << 3; + return (srcprefs & IPV6_PREFER_SRC_MASK) << 3; } static inline unsigned int rt6_flags2srcprefs(int flags) { - return (flags >> 3) & 7; + return (flags >> 3) & IPV6_PREFER_SRC_MASK; } static inline bool rt6_need_strict(const struct in6_addr *daddr) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index bd115980809f..b3444c8a6f74 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1306,10 +1306,13 @@ static inline void ip6_sock_set_recverr(struct sock *sk) inet6_set_bit(RECVERR6, sk); } -static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) +#define IPV6_PREFER_SRC_MASK (IPV6_PREFER_SRC_TMP | IPV6_PREFER_SRC_PUBLIC | \ + IPV6_PREFER_SRC_COA) + +static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val) { + unsigned int prefmask = ~IPV6_PREFER_SRC_MASK; unsigned int pref = 0; - unsigned int prefmask = ~0; /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ switch (val & (IPV6_PREFER_SRC_PUBLIC | @@ -1359,20 +1362,11 @@ static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) return -EINVAL; } - inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref; + WRITE_ONCE(inet6_sk(sk)->srcprefs, + (READ_ONCE(inet6_sk(sk)->srcprefs) & prefmask) | pref); return 0; } -static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val) -{ - int ret; - - lock_sock(sk); - ret = __ip6_sock_set_addr_preferences(sk, val); - release_sock(sk); - return ret; -} - static inline void ip6_sock_set_recvpktinfo(struct sock *sk) { lock_sock(sk); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7e5d9eeb990f..951ba8089b5b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1113,7 +1113,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, + sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, &fl6->saddr); rcu_read_unlock(); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index e9dc6f881bb9..7d661735cb9d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -505,6 +505,10 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet6_assign_bit(SNDFLOW, sk, valbool); return 0; + case IPV6_ADDR_PREFERENCES: + if (optlen < sizeof(int)) + return -EINVAL; + return ip6_sock_set_addr_preferences(sk, val); } if (needs_rtnl) rtnl_lock(); @@ -964,11 +968,6 @@ done: retv = xfrm_user_policy(sk, optname, optval, optlen); break; - case IPV6_ADDR_PREFERENCES: - if (optlen < sizeof(int)) - goto e_inval; - retv = __ip6_sock_set_addr_preferences(sk, val); - break; case IPV6_RECVFRAGSIZE: np->rxopt.bits.recvfragsize = valbool; retv = 0; @@ -1415,23 +1414,25 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_ADDR_PREFERENCES: + { + u8 srcprefs = READ_ONCE(np->srcprefs); val = 0; - if (np->srcprefs & IPV6_PREFER_SRC_TMP) + if (srcprefs & IPV6_PREFER_SRC_TMP) val |= IPV6_PREFER_SRC_TMP; - else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC) + else if (srcprefs & IPV6_PREFER_SRC_PUBLIC) val |= IPV6_PREFER_SRC_PUBLIC; else { /* XXX: should we return system default? */ val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT; } - if (np->srcprefs & IPV6_PREFER_SRC_COA) + if (srcprefs & IPV6_PREFER_SRC_COA) val |= IPV6_PREFER_SRC_COA; else val |= IPV6_PREFER_SRC_HOME; break; - + } case IPV6_MINHOPCOUNT: val = READ_ONCE(np->min_hopcount); break; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9d8dfc7423e4..b132feae3393 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2622,7 +2622,7 @@ static struct dst_entry *ip6_route_output_flags_noref(struct net *net, if (!any_src) flags |= RT6_LOOKUP_F_HAS_SADDR; else if (sk) - flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); + flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs)); return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); } -- cgit v1.2.3 From 6c0da8406382d39ec06ad54b0d4935bd7d63612c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Sep 2023 10:52:05 +0100 Subject: wifi: cfg80211: make read-only array centers_80mhz static const Don't populate the read-only array lanes on the stack, instead make it static const. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/mac80211/tdls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index a4af3b7675ef..c3a60fd19c37 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -309,7 +309,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { /* IEEE802.11ac-2013 Table E-4 */ - u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 }; + static const u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 }; struct cfg80211_chan_def uc = sta->tdls_chandef; enum nl80211_chan_width max_width = ieee80211_sta_cap_chan_bw(&sta->deflink); -- cgit v1.2.3 From 0df7cd3c13e44d01f9f28e29cbce74e2931b00fe Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Sat, 16 Sep 2023 16:09:15 +0300 Subject: vsock/virtio/vhost: read data from non-linear skb This is preparation patch for MSG_ZEROCOPY support. It adds handling of non-linear skbs by replacing direct calls of 'memcpy_to_msg()' with 'skb_copy_datagram_iter()'. Main advantage of the second one is that it can handle paged part of the skb by using 'kmap()' on each page, but if there are no pages in the skb, it behaves like simple copying to iov iterator. This patch also adds new field to the control block of skb - this value shows current offset in the skb to read next portion of data (it doesn't matter linear it or not). Idea behind this field is that 'skb_copy_datagram_iter()' handles both types of skb internally - it just needs an offset from which to copy data from the given skb. This offset is incremented on each read from skb. This approach allows to simplify handling of both linear and non-linear skbs, because for linear skb we need to call 'skb_pull()' after reading data from it, while in non-linear case we need to update 'data_len'. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- drivers/vhost/vsock.c | 14 +++++++++----- include/linux/virtio_vsock.h | 1 + net/vmw_vsock/virtio_transport_common.c | 32 +++++++++++++++++++------------- 3 files changed, 29 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 817d377a3f36..83711aad855c 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -114,6 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct sk_buff *skb; unsigned out, in; size_t nbytes; + u32 offset; int head; skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue); @@ -156,7 +157,8 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, } iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[out], in, iov_len); - payload_len = skb->len; + offset = VIRTIO_VSOCK_SKB_CB(skb)->offset; + payload_len = skb->len - offset; hdr = virtio_vsock_hdr(skb); /* If the packet is greater than the space available in the @@ -197,8 +199,10 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, break; } - nbytes = copy_to_iter(skb->data, payload_len, &iov_iter); - if (nbytes != payload_len) { + if (skb_copy_datagram_iter(skb, + offset, + &iov_iter, + payload_len)) { kfree_skb(skb); vq_err(vq, "Faulted on copying pkt buf\n"); break; @@ -212,13 +216,13 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, vhost_add_used(vq, head, sizeof(*hdr) + payload_len); added = true; - skb_pull(skb, payload_len); + VIRTIO_VSOCK_SKB_CB(skb)->offset += payload_len; total_len += payload_len; /* If we didn't send all the payload we can requeue the packet * to send it with the next available buffer. */ - if (skb->len > 0) { + if (VIRTIO_VSOCK_SKB_CB(skb)->offset < skb->len) { hdr->flags |= cpu_to_le32(flags_to_restore); /* We are queueing the same skb to handle diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index c58453699ee9..a91fbdf233e4 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -12,6 +12,7 @@ struct virtio_vsock_skb_cb { bool reply; bool tap_delivered; + u32 offset; }; #define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb)) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 352d042b130b..3e08d52a9355 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -364,9 +364,10 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk, spin_unlock_bh(&vvs->rx_lock); /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ - err = memcpy_to_msg(msg, skb->data, bytes); + err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, + &msg->msg_iter, bytes); if (err) goto out; @@ -410,25 +411,27 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, while (total < len && !skb_queue_empty(&vvs->rx_queue)) { skb = skb_peek(&vvs->rx_queue); - bytes = len - total; - if (bytes > skb->len) - bytes = skb->len; + bytes = min_t(size_t, len - total, + skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset); /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ spin_unlock_bh(&vvs->rx_lock); - err = memcpy_to_msg(msg, skb->data, bytes); + err = skb_copy_datagram_iter(skb, + VIRTIO_VSOCK_SKB_CB(skb)->offset, + &msg->msg_iter, bytes); if (err) goto out; spin_lock_bh(&vvs->rx_lock); total += bytes; - skb_pull(skb, bytes); - if (skb->len == 0) { + VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes; + + if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) { u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); virtio_transport_dec_rx_pkt(vvs, pkt_len); @@ -492,9 +495,10 @@ virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk, spin_unlock_bh(&vvs->rx_lock); /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ - err = memcpy_to_msg(msg, skb->data, bytes); + err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, + &msg->msg_iter, bytes); if (err) return err; @@ -553,11 +557,13 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, int err; /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ spin_unlock_bh(&vvs->rx_lock); - err = memcpy_to_msg(msg, skb->data, bytes_to_copy); + err = skb_copy_datagram_iter(skb, 0, + &msg->msg_iter, + bytes_to_copy); if (err) { /* Copy of message failed. Rest of * fragments will be freed without copy. -- cgit v1.2.3 From 64c99d2d6adac80cb17669736e32bdb331d68193 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Sat, 16 Sep 2023 16:09:16 +0300 Subject: vsock/virtio: support to send non-linear skb For non-linear skb use its pages from fragment array as buffers in virtio tx queue. These pages are already pinned by 'get_user_pages()' during such skb creation. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- net/vmw_vsock/virtio_transport.c | 60 +++++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index e95df847176b..73d730156349 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -63,6 +63,17 @@ struct virtio_vsock { u32 guest_cid; bool seqpacket_allow; + + /* These fields are used only in tx path in function + * 'virtio_transport_send_pkt_work()', so to save + * stack space in it, place both of them here. Each + * pointer from 'out_sgs' points to the corresponding + * element in 'out_bufs' - this is initialized in + * 'virtio_vsock_probe()'. Both fields are protected + * by 'tx_lock'. +1 is needed for packet header. + */ + struct scatterlist *out_sgs[MAX_SKB_FRAGS + 1]; + struct scatterlist out_bufs[MAX_SKB_FRAGS + 1]; }; static u32 virtio_transport_get_local_cid(void) @@ -100,8 +111,8 @@ virtio_transport_send_pkt_work(struct work_struct *work) vq = vsock->vqs[VSOCK_VQ_TX]; for (;;) { - struct scatterlist hdr, buf, *sgs[2]; int ret, in_sg = 0, out_sg = 0; + struct scatterlist **sgs; struct sk_buff *skb; bool reply; @@ -111,12 +122,43 @@ virtio_transport_send_pkt_work(struct work_struct *work) virtio_transport_deliver_tap_pkt(skb); reply = virtio_vsock_skb_reply(skb); - - sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); - sgs[out_sg++] = &hdr; - if (skb->len > 0) { - sg_init_one(&buf, skb->data, skb->len); - sgs[out_sg++] = &buf; + sgs = vsock->out_sgs; + sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb), + sizeof(*virtio_vsock_hdr(skb))); + out_sg++; + + if (!skb_is_nonlinear(skb)) { + if (skb->len > 0) { + sg_init_one(sgs[out_sg], skb->data, skb->len); + out_sg++; + } + } else { + struct skb_shared_info *si; + int i; + + /* If skb is nonlinear, then its buffer must contain + * only header and nothing more. Data is stored in + * the fragged part. + */ + WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb))); + + si = skb_shinfo(skb); + + for (i = 0; i < si->nr_frags; i++) { + skb_frag_t *skb_frag = &si->frags[i]; + void *va; + + /* We will use 'page_to_virt()' for the userspace page + * here, because virtio or dma-mapping layers will call + * 'virt_to_phys()' later to fill the buffer descriptor. + * We don't touch memory at "virtual" address of this page. + */ + va = page_to_virt(skb_frag->bv_page); + sg_init_one(sgs[out_sg], + va + skb_frag->bv_offset, + skb_frag->bv_len); + out_sg++; + } } ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL); @@ -621,6 +663,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev) { struct virtio_vsock *vsock = NULL; int ret; + int i; ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); if (ret) @@ -663,6 +706,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev) if (ret < 0) goto out; + for (i = 0; i < ARRAY_SIZE(vsock->out_sgs); i++) + vsock->out_sgs[i] = &vsock->out_bufs[i]; + rcu_assign_pointer(the_virtio_vsock, vsock); mutex_unlock(&the_virtio_vsock_mutex); -- cgit v1.2.3 From 4b0bf10eb077cb43c09746251ef3608d62c45667 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Sat, 16 Sep 2023 16:09:17 +0300 Subject: vsock/virtio: non-linear skb handling for tap For tap device new skb is created and data from the current skb is copied to it. This adds copying data from non-linear skb to new the skb. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- net/vmw_vsock/virtio_transport_common.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 3e08d52a9355..3a48e48a99ac 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -106,6 +106,27 @@ out: return NULL; } +static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb, + void *dst, + size_t len) +{ + struct iov_iter iov_iter = { 0 }; + struct kvec kvec; + size_t to_copy; + + kvec.iov_base = dst; + kvec.iov_len = len; + + iov_iter.iter_type = ITER_KVEC; + iov_iter.kvec = &kvec; + iov_iter.nr_segs = 1; + + to_copy = min_t(size_t, len, skb->len); + + skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, + &iov_iter, to_copy); +} + /* Packet capture */ static struct sk_buff *virtio_transport_build_skb(void *opaque) { @@ -114,7 +135,6 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct af_vsockmon_hdr *hdr; struct sk_buff *skb; size_t payload_len; - void *payload_buf; /* A packet could be split to fit the RX buffer, so we can retrieve * the payload length from the header and the buffer pointer taking @@ -122,7 +142,6 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) */ pkt_hdr = virtio_vsock_hdr(pkt); payload_len = pkt->len; - payload_buf = pkt->data; skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len, GFP_ATOMIC); @@ -165,7 +184,13 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr)); if (payload_len) { - skb_put_data(skb, payload_buf, payload_len); + if (skb_is_nonlinear(pkt)) { + void *data = skb_put(skb, payload_len); + + virtio_transport_copy_nonlinear_skb(pkt, data, payload_len); + } else { + skb_put_data(skb, pkt->data, payload_len); + } } return skb; -- cgit v1.2.3 From 581512a6dc939ef122e49336626ae159f3b8a345 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Sat, 16 Sep 2023 16:09:18 +0300 Subject: vsock/virtio: MSG_ZEROCOPY flag support This adds handling of MSG_ZEROCOPY flag on transmission path: 1) If this flag is set and zerocopy transmission is possible (enabled in socket options and transport allows zerocopy), then non-linear skb will be created and filled with the pages of user's buffer. Pages of user's buffer are locked in memory by 'get_user_pages()'. 2) Replaces way of skb owning: instead of 'skb_set_owner_sk_safe()' it calls 'skb_set_owner_w()'. Reason of this change is that '__zerocopy_sg_from_iter()' increments 'sk_wmem_alloc' of socket, so to decrease this field correctly, proper skb destructor is needed: 'sock_wfree()'. This destructor is set by 'skb_set_owner_w()'. 3) Adds new callback to 'struct virtio_transport': 'can_msgzerocopy'. If this callback is set, then transport needs extra check to be able to send provided number of buffers in zerocopy mode. Currently, the only transport that needs this callback set is virtio, because this transport adds new buffers to the virtio queue and we need to check, that number of these buffers is less than size of the queue (it is required by virtio spec). vhost and loopback transports don't need this check. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- include/linux/virtio_vsock.h | 9 + .../trace/events/vsock_virtio_transport_common.h | 12 +- net/vmw_vsock/virtio_transport.c | 32 +++ net/vmw_vsock/virtio_transport_common.c | 250 ++++++++++++++++----- 4 files changed, 241 insertions(+), 62 deletions(-) (limited to 'net') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index a91fbdf233e4..ebb3ce63d64d 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -160,6 +160,15 @@ struct virtio_transport { /* Takes ownership of the packet */ int (*send_pkt)(struct sk_buff *skb); + + /* Used in MSG_ZEROCOPY mode. Checks, that provided data + * (number of buffers) could be transmitted with zerocopy + * mode. If this callback is not implemented for the current + * transport - this means that this transport doesn't need + * extra checks and can perform zerocopy transmission by + * default. + */ + bool (*can_msgzerocopy)(int bufs_num); }; ssize_t diff --git a/include/trace/events/vsock_virtio_transport_common.h b/include/trace/events/vsock_virtio_transport_common.h index d0b3f0ea9ba1..f1ebe36787c3 100644 --- a/include/trace/events/vsock_virtio_transport_common.h +++ b/include/trace/events/vsock_virtio_transport_common.h @@ -43,7 +43,8 @@ TRACE_EVENT(virtio_transport_alloc_pkt, __u32 len, __u16 type, __u16 op, - __u32 flags + __u32 flags, + bool zcopy ), TP_ARGS( src_cid, src_port, @@ -51,7 +52,8 @@ TRACE_EVENT(virtio_transport_alloc_pkt, len, type, op, - flags + flags, + zcopy ), TP_STRUCT__entry( __field(__u32, src_cid) @@ -62,6 +64,7 @@ TRACE_EVENT(virtio_transport_alloc_pkt, __field(__u16, type) __field(__u16, op) __field(__u32, flags) + __field(bool, zcopy) ), TP_fast_assign( __entry->src_cid = src_cid; @@ -72,14 +75,15 @@ TRACE_EVENT(virtio_transport_alloc_pkt, __entry->type = type; __entry->op = op; __entry->flags = flags; + __entry->zcopy = zcopy; ), - TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x", + TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x zcopy=%s", __entry->src_cid, __entry->src_port, __entry->dst_cid, __entry->dst_port, __entry->len, show_type(__entry->type), show_op(__entry->op), - __entry->flags) + __entry->flags, __entry->zcopy ? "true" : "false") ); TRACE_EVENT(virtio_transport_recv_pkt, diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 73d730156349..09ba3128e759 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -455,6 +455,37 @@ static void virtio_vsock_rx_done(struct virtqueue *vq) queue_work(virtio_vsock_workqueue, &vsock->rx_work); } +static bool virtio_transport_can_msgzerocopy(int bufs_num) +{ + struct virtio_vsock *vsock; + bool res = false; + + rcu_read_lock(); + + vsock = rcu_dereference(the_virtio_vsock); + if (vsock) { + struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX]; + + /* Check that tx queue is large enough to keep whole + * data to send. This is needed, because when there is + * not enough free space in the queue, current skb to + * send will be reinserted to the head of tx list of + * the socket to retry transmission later, so if skb + * is bigger than whole queue, it will be reinserted + * again and again, thus blocking other skbs to be sent. + * Each page of the user provided buffer will be added + * as a single buffer to the tx virtqueue, so compare + * number of pages against maximum capacity of the queue. + */ + if (bufs_num <= vq->num_max) + res = true; + } + + rcu_read_unlock(); + + return res; +} + static bool virtio_transport_seqpacket_allow(u32 remote_cid); static struct virtio_transport virtio_transport = { @@ -504,6 +535,7 @@ static struct virtio_transport virtio_transport = { }, .send_pkt = virtio_transport_send_pkt, + .can_msgzerocopy = virtio_transport_can_msgzerocopy, }; static bool virtio_transport_seqpacket_allow(u32 remote_cid) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 3a48e48a99ac..e22c81435ef7 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -37,73 +37,99 @@ virtio_transport_get_ops(struct vsock_sock *vsk) return container_of(t, struct virtio_transport, transport); } -/* Returns a new packet on success, otherwise returns NULL. - * - * If NULL is returned, errp is set to a negative errno. - */ -static struct sk_buff * -virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, - size_t len, - u32 src_cid, - u32 src_port, - u32 dst_cid, - u32 dst_port) -{ - const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len; - struct virtio_vsock_hdr *hdr; - struct sk_buff *skb; - void *payload; - int err; +static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, + struct virtio_vsock_pkt_info *info, + size_t pkt_len) +{ + struct iov_iter *iov_iter; - skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); - if (!skb) - return NULL; + if (!info->msg) + return false; - hdr = virtio_vsock_hdr(skb); - hdr->type = cpu_to_le16(info->type); - hdr->op = cpu_to_le16(info->op); - hdr->src_cid = cpu_to_le64(src_cid); - hdr->dst_cid = cpu_to_le64(dst_cid); - hdr->src_port = cpu_to_le32(src_port); - hdr->dst_port = cpu_to_le32(dst_port); - hdr->flags = cpu_to_le32(info->flags); - hdr->len = cpu_to_le32(len); + iov_iter = &info->msg->msg_iter; - if (info->msg && len > 0) { - payload = skb_put(skb, len); - err = memcpy_from_msg(payload, info->msg, len); - if (err) - goto out; + if (iov_iter->iov_offset) + return false; - if (msg_data_left(info->msg) == 0 && - info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { - hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); + /* We can't send whole iov. */ + if (iov_iter->count > pkt_len) + return false; - if (info->msg->msg_flags & MSG_EOR) - hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); - } + /* Check that transport can send data in zerocopy mode. */ + t_ops = virtio_transport_get_ops(info->vsk); + + if (t_ops->can_msgzerocopy) { + int pages_in_iov = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); + int pages_to_send = min(pages_in_iov, MAX_SKB_FRAGS); + + /* +1 is for packet header. */ + return t_ops->can_msgzerocopy(pages_to_send + 1); } - if (info->reply) - virtio_vsock_skb_set_reply(skb); + return true; +} - trace_virtio_transport_alloc_pkt(src_cid, src_port, - dst_cid, dst_port, - len, - info->type, - info->op, - info->flags); +static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, + struct sk_buff *skb, + struct msghdr *msg, + bool zerocopy) +{ + struct ubuf_info *uarg; - if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) { - WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n"); - goto out; + if (msg->msg_ubuf) { + uarg = msg->msg_ubuf; + net_zcopy_get(uarg); + } else { + struct iov_iter *iter = &msg->msg_iter; + struct ubuf_info_msgzc *uarg_zc; + + uarg = msg_zerocopy_realloc(sk_vsock(vsk), + iter->count, + NULL); + if (!uarg) + return -1; + + uarg_zc = uarg_to_msgzc(uarg); + uarg_zc->zerocopy = zerocopy ? 1 : 0; } - return skb; + skb_zcopy_init(skb, uarg); -out: - kfree_skb(skb); - return NULL; + return 0; +} + +static int virtio_transport_fill_skb(struct sk_buff *skb, + struct virtio_vsock_pkt_info *info, + size_t len, + bool zcopy) +{ + if (zcopy) + return __zerocopy_sg_from_iter(info->msg, NULL, skb, + &info->msg->msg_iter, + len); + + return memcpy_from_msg(skb_put(skb, len), info->msg, len); +} + +static void virtio_transport_init_hdr(struct sk_buff *skb, + struct virtio_vsock_pkt_info *info, + size_t payload_len, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port) +{ + struct virtio_vsock_hdr *hdr; + + hdr = virtio_vsock_hdr(skb); + hdr->type = cpu_to_le16(info->type); + hdr->op = cpu_to_le16(info->op); + hdr->src_cid = cpu_to_le64(src_cid); + hdr->dst_cid = cpu_to_le64(dst_cid); + hdr->src_port = cpu_to_le32(src_port); + hdr->dst_port = cpu_to_le32(dst_port); + hdr->flags = cpu_to_le32(info->flags); + hdr->len = cpu_to_le32(payload_len); } static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb, @@ -214,6 +240,82 @@ static u16 virtio_transport_get_type(struct sock *sk) return VIRTIO_VSOCK_TYPE_SEQPACKET; } +/* Returns new sk_buff on success, otherwise returns NULL. */ +static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, + size_t payload_len, + bool zcopy, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port) +{ + struct vsock_sock *vsk; + struct sk_buff *skb; + size_t skb_len; + + skb_len = VIRTIO_VSOCK_SKB_HEADROOM; + + if (!zcopy) + skb_len += payload_len; + + skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); + if (!skb) + return NULL; + + virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port, + dst_cid, dst_port); + + vsk = info->vsk; + + /* If 'vsk' != NULL then payload is always present, so we + * will never call '__zerocopy_sg_from_iter()' below without + * setting skb owner in 'skb_set_owner_w()'. The only case + * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message + * without payload. + */ + WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy); + + /* Set owner here, because '__zerocopy_sg_from_iter()' uses + * owner of skb without check to update 'sk_wmem_alloc'. + */ + if (vsk) + skb_set_owner_w(skb, sk_vsock(vsk)); + + if (info->msg && payload_len > 0) { + int err; + + err = virtio_transport_fill_skb(skb, info, payload_len, zcopy); + if (err) + goto out; + + if (msg_data_left(info->msg) == 0 && + info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { + struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); + + hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); + + if (info->msg->msg_flags & MSG_EOR) + hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + } + } + + if (info->reply) + virtio_vsock_skb_set_reply(skb); + + trace_virtio_transport_alloc_pkt(src_cid, src_port, + dst_cid, dst_port, + payload_len, + info->type, + info->op, + info->flags, + zcopy); + + return skb; +out: + kfree_skb(skb); + return NULL; +} + /* This function can only be used on connecting/connected sockets, * since a socket assigned to a transport is required. * @@ -222,10 +324,12 @@ static u16 virtio_transport_get_type(struct sock *sk) static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, struct virtio_vsock_pkt_info *info) { + u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; u32 src_cid, src_port, dst_cid, dst_port; const struct virtio_transport *t_ops; struct virtio_vsock_sock *vvs; u32 pkt_len = info->pkt_len; + bool can_zcopy = false; u32 rest_len; int ret; @@ -254,15 +358,30 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) return pkt_len; + if (info->msg) { + /* If zerocopy is not enabled by 'setsockopt()', we behave as + * there is no MSG_ZEROCOPY flag set. + */ + if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) + info->msg->msg_flags &= ~MSG_ZEROCOPY; + + if (info->msg->msg_flags & MSG_ZEROCOPY) + can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len); + + if (can_zcopy) + max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, + (MAX_SKB_FRAGS * PAGE_SIZE)); + } + rest_len = pkt_len; do { struct sk_buff *skb; size_t skb_len; - skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, rest_len); + skb_len = min(max_skb_len, rest_len); - skb = virtio_transport_alloc_skb(info, skb_len, + skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy, src_cid, src_port, dst_cid, dst_port); if (!skb) { @@ -270,6 +389,21 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, break; } + /* We process buffer part by part, allocating skb on + * each iteration. If this is last skb for this buffer + * and MSG_ZEROCOPY mode is in use - we must allocate + * completion for the current syscall. + */ + if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && + skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { + if (virtio_transport_init_zcopy_skb(vsk, skb, + info->msg, + can_zcopy)) { + ret = -ENOMEM; + break; + } + } + virtio_transport_inc_tx_pkt(vvs, skb); ret = t_ops->send_pkt(skb); @@ -985,7 +1119,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, if (!t) return -ENOTCONN; - reply = virtio_transport_alloc_skb(&info, 0, + reply = virtio_transport_alloc_skb(&info, 0, false, le64_to_cpu(hdr->dst_cid), le32_to_cpu(hdr->dst_port), le64_to_cpu(hdr->src_cid), -- cgit v1.2.3 From 5b43bd71f4942afa79b0683f4f41b1d47a21a9c7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Sep 2023 10:52:05 +0100 Subject: wifi: cfg80211: make read-only array centers_80mhz static const Don't populate the read-only array lanes on the stack, instead make it static const. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20230919095205.24949-1-colin.i.king@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/tdls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index ba14f570cda7..f3fd66d30b84 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -309,7 +309,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { /* IEEE802.11ac-2013 Table E-4 */ - u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 }; + static const u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 }; struct cfg80211_chan_def uc = sta->tdls_chandef; enum nl80211_chan_width max_width = ieee80211_sta_cap_chan_bw(&sta->deflink); -- cgit v1.2.3 From 6b348f6e34ce7dc5eb68066377d5e38780ce4095 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Sep 2023 08:40:52 +0200 Subject: wifi: mac80211: ethtool: always hold wiphy mutex Drivers should really be able to rely on the wiphy mutex being held all the time, unless otherwise documented. For ethtool, that wasn't quite right. Fix and clarify this in both code and documentation. Reported-by: syzbot+c12a771b218dcbba32e1@syzkaller.appspotmail.com Fixes: 0e8185ce1dde ("wifi: mac80211: check wiphy mutex in ops") Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/ethtool.c | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8d993f6ab919..0f6390865fe7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4067,11 +4067,15 @@ struct ieee80211_prep_tx_info { * This callback must be atomic. * * @get_et_sset_count: Ethtool API to get string-set count. + * Note that the wiphy mutex is not held for this callback since it's + * expected to return a static value. * * @get_et_stats: Ethtool API to get a set of u64 stats. * * @get_et_strings: Ethtool API to get a set of strings to describe stats * and perhaps other supported types of ethtool data-sets. + * Note that the wiphy mutex is not held for this callback since it's + * expected to return a static value. * * @mgd_prepare_tx: Prepare for transmitting a management frame for association * before associated. In multi-channel scenarios, a virtual interface is diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index 9894d2024470..99f6174a9d69 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -19,11 +19,16 @@ static int ieee80211_set_ringparam(struct net_device *dev, struct netlink_ext_ack *extack) { struct ieee80211_local *local = wiphy_priv(dev->ieee80211_ptr->wiphy); + int ret; if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0) return -EINVAL; - return drv_set_ringparam(local, rp->tx_pending, rp->rx_pending); + wiphy_lock(local->hw.wiphy); + ret = drv_set_ringparam(local, rp->tx_pending, rp->rx_pending); + wiphy_unlock(local->hw.wiphy); + + return ret; } static void ieee80211_get_ringparam(struct net_device *dev, @@ -35,8 +40,10 @@ static void ieee80211_get_ringparam(struct net_device *dev, memset(rp, 0, sizeof(*rp)); + wiphy_lock(local->hw.wiphy); drv_get_ringparam(local, &rp->tx_pending, &rp->tx_max_pending, &rp->rx_pending, &rp->rx_max_pending); + wiphy_unlock(local->hw.wiphy); } static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = { -- cgit v1.2.3 From bb55441c57ccc5cc2eab44e1a97698b9d708871d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Sep 2023 08:56:38 +0200 Subject: wifi: cfg80211: split struct cfg80211_ap_settings Using the full struct cfg80211_ap_settings for an update is misleading, since most settings cannot be updated. Split the update case off into a new struct cfg80211_ap_update. Change-Id: I3ba4dd9280938ab41252f145227a7005edf327e4 Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 2 +- drivers/net/wireless/ath/wil6210/cfg80211.c | 2 +- .../wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 2 +- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 2 +- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 2 +- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 2 +- drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 2 +- include/net/cfg80211.h | 18 +++++++++++++++++- net/mac80211/cfg.c | 2 +- net/wireless/nl80211.c | 19 +++++++++---------- net/wireless/rdev-ops.h | 2 +- net/wireless/trace.h | 2 +- 12 files changed, 36 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index eea60e2fca44..e37db4af33de 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -2954,7 +2954,7 @@ static int ath6kl_start_ap(struct wiphy *wiphy, struct net_device *dev, } static int ath6kl_change_beacon(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_ap_settings *params) + struct cfg80211_ap_update *params) { struct ath6kl_vif *vif = netdev_priv(dev); diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index dfbb478ae274..dbe4b3478f03 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -2082,7 +2082,7 @@ void wil_cfg80211_ap_recovery(struct wil6210_priv *wil) static int wil_cfg80211_change_beacon(struct wiphy *wiphy, struct net_device *ndev, - struct cfg80211_ap_settings *params) + struct cfg80211_ap_update *params) { struct wil6210_priv *wil = wiphy_to_wil(wiphy); struct wireless_dev *wdev = ndev->ieee80211_ptr; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 9012456e1a18..667462369a32 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -5415,7 +5415,7 @@ static int brcmf_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *ndev, static s32 brcmf_cfg80211_change_beacon(struct wiphy *wiphy, struct net_device *ndev, - struct cfg80211_ap_settings *info) + struct cfg80211_ap_update *info) { struct brcmf_if *ifp = netdev_priv(ndev); diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 70473be42d7b..7a15ea8072e6 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -1835,7 +1835,7 @@ static int mwifiex_cfg80211_set_cqm_rssi_config(struct wiphy *wiphy, */ static int mwifiex_cfg80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_ap_settings *params) + struct cfg80211_ap_update *params) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); struct mwifiex_adapter *adapter = priv->adapter; diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 3447470d3d02..da52f91693b5 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -1441,7 +1441,7 @@ static int start_ap(struct wiphy *wiphy, struct net_device *dev, } static int change_beacon(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_ap_settings *params) + struct cfg80211_ap_update *params) { struct wilc_vif *vif = netdev_priv(dev); diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 9388adcdcac1..663d77770fce 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -331,7 +331,7 @@ out: } static int qtnf_change_beacon(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_ap_settings *info) + struct cfg80211_ap_update *info) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index 5ddc2d9a6060..1e683212027c 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -2319,7 +2319,7 @@ static int cfg80211_rtw_start_ap(struct wiphy *wiphy, struct net_device *ndev, } static int cfg80211_rtw_change_beacon(struct wiphy *wiphy, struct net_device *ndev, - struct cfg80211_ap_settings *info) + struct cfg80211_ap_update *info) { struct adapter *adapter = rtw_netdev_priv(ndev); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9af714431b22..899e9ffa6048 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1480,6 +1480,22 @@ struct cfg80211_ap_settings { u16 punct_bitmap; }; + +/** + * struct cfg80211_ap_update - AP configuration update + * + * Subset of &struct cfg80211_ap_settings, for updating a running AP. + * + * @beacon: beacon data + * @fils_discovery: FILS discovery transmission parameters + * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters + */ +struct cfg80211_ap_update { + struct cfg80211_beacon_data beacon; + struct cfg80211_fils_discovery fils_discovery; + struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; +}; + /** * struct cfg80211_csa_settings - channel switch settings * @@ -4523,7 +4539,7 @@ struct cfg80211_ops { int (*start_ap)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *settings); int (*change_beacon)(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_ap_settings *info); + struct cfg80211_ap_update *info); int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e751d4eba8f5..e1a64a154287 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1477,7 +1477,7 @@ error: } static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_ap_settings *params) + struct cfg80211_ap_update *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e64bf2a58b36..cbdf635e6025 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5636,11 +5636,10 @@ static int nl80211_parse_he_obss_pd(struct nlattr *attrs, static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev, struct nlattr *attrs, - struct cfg80211_ap_settings *params) + struct cfg80211_fils_discovery *fd) { struct nlattr *tb[NL80211_FILS_DISCOVERY_ATTR_MAX + 1]; int ret; - struct cfg80211_fils_discovery *fd = ¶ms->fils_discovery; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_FILS_DISCOVERY)) @@ -5674,12 +5673,10 @@ static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev, static int nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev, struct nlattr *attrs, - struct cfg80211_ap_settings *params) + struct cfg80211_unsol_bcast_probe_resp *presp) { struct nlattr *tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX + 1]; int ret; - struct cfg80211_unsol_bcast_probe_resp *presp = - ¶ms->unsol_bcast_probe_resp; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP)) @@ -6122,7 +6119,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) { err = nl80211_parse_fils_discovery(rdev, info->attrs[NL80211_ATTR_FILS_DISCOVERY], - params); + ¶ms->fils_discovery); if (err) goto out; } @@ -6130,7 +6127,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) { err = nl80211_parse_unsol_bcast_probe_resp( rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP], - params); + ¶ms->unsol_bcast_probe_resp); if (err) goto out; } @@ -6202,7 +6199,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_ap_settings *params; + struct cfg80211_ap_update *params; struct nlattr *attr; int err; @@ -6227,14 +6224,16 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) attr = info->attrs[NL80211_ATTR_FILS_DISCOVERY]; if (attr) { - err = nl80211_parse_fils_discovery(rdev, attr, params); + err = nl80211_parse_fils_discovery(rdev, attr, + ¶ms->fils_discovery); if (err) goto out; } attr = info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]; if (attr) { - err = nl80211_parse_unsol_bcast_probe_resp(rdev, attr, params); + err = nl80211_parse_unsol_bcast_probe_resp(rdev, attr, + ¶ms->unsol_bcast_probe_resp); if (err) goto out; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index c6a2c07e380b..2214a90cf101 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -173,7 +173,7 @@ static inline int rdev_start_ap(struct cfg80211_registered_device *rdev, static inline int rdev_change_beacon(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_ap_settings *info) + struct cfg80211_ap_update *info) { int ret; trace_rdev_change_beacon(&rdev->wiphy, dev, info); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 1557dc1d58e2..da2b73951c32 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -615,7 +615,7 @@ TRACE_EVENT(rdev_start_ap, TRACE_EVENT(rdev_change_beacon, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct cfg80211_ap_settings *info), + struct cfg80211_ap_update *info), TP_ARGS(wiphy, netdev, info), TP_STRUCT__entry( WIPHY_ENTRY -- cgit v1.2.3 From b3239498353484fd6ddeb513df89c4628cd623d0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Sep 2023 21:25:12 +0300 Subject: wifi: mac80211: use bandwidth indication element for CSA In CSA, parse the (EHT) bandwidth indication element and use it (in fact prefer it if present). Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.43ef01920556.If4f24a61cd634ab1e50eba43899b9e992bf25602@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 23 +++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/mlme.c | 5 +++-- net/mac80211/spectmgmt.c | 13 +++++++++++-- net/mac80211/util.c | 42 +++++++++++++++++++++++++++++------------- 5 files changed, 68 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 340d7e0f6bf7..f11b7022d9eb 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3139,6 +3139,28 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len) return len >= needed; } +#define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1) + +struct ieee80211_bandwidth_indication { + u8 params; + struct ieee80211_eht_operation_info info; +} __packed; + +static inline bool +ieee80211_bandwidth_indication_size_ok(const u8 *data, u8 len) +{ + const struct ieee80211_bandwidth_indication *bwi = (const void *)data; + + if (len < sizeof(*bwi)) + return false; + + if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT && + len < sizeof(*bwi) + 2) + return false; + + return true; +} + #define LISTEN_INT_USF GENMASK(15, 14) #define LISTEN_INT_UI GENMASK(13, 0) @@ -3596,6 +3618,7 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_EHT_OPERATION = 106, WLAN_EID_EXT_EHT_MULTI_LINK = 107, WLAN_EID_EXT_EHT_CAPABILITY = 108, + WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, }; /* Action category code */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d5c5f865323c..e7856336b5c6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1677,6 +1677,7 @@ struct ieee802_11_elems { const struct ieee80211_eht_operation *eht_operation; const struct ieee80211_multi_link_elem *ml_basic; const struct ieee80211_multi_link_elem *ml_reconf; + const struct ieee80211_bandwidth_indication *bandwidth_indication; /* length of them, respectively */ u8 ext_capab_len; @@ -2463,7 +2464,7 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, const struct ieee80211_vht_operation *oper, const struct ieee80211_ht_operation *htop, struct cfg80211_chan_def *chandef); -void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation *eht_oper, +void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info, bool support_160, bool support_320, struct cfg80211_chan_def *chandef); bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e8f16ed235c3..a211f594f25a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -109,7 +109,8 @@ ieee80211_extract_dis_subch_bmap(const struct ieee80211_eht_operation *eht_oper, return 0; /* set 160/320 supported to get the full AP definition */ - ieee80211_chandef_eht_oper(eht_oper, true, true, &ap_chandef); + ieee80211_chandef_eht_oper((const void *)eht_oper->optional, + true, true, &ap_chandef); ap_center_freq = ap_chandef.center_freq1; ap_bw = 20 * BIT(u8_get_bits(info->control, IEEE80211_EHT_OPER_CHAN_WIDTH)); @@ -387,7 +388,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (eht_oper && (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) { struct cfg80211_chan_def eht_chandef = *chandef; - ieee80211_chandef_eht_oper(eht_oper, + ieee80211_chandef_eht_oper((const void *)eht_oper->optional, eht_chandef.width == NL80211_CHAN_WIDTH_160, false, &eht_chandef); diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 871cdac2d0f4..55959b0b24c5 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2008, Intel Corporation * Copyright 2008, Johannes Berg - * Copyright (C) 2018, 2020, 2022 Intel Corporation + * Copyright (C) 2018, 2020, 2022-2023 Intel Corporation */ #include @@ -33,12 +33,14 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def new_vht_chandef = {}; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; + const struct ieee80211_bandwidth_indication *bwi; int secondary_channel_offset = -1; memset(csa_ie, 0, sizeof(*csa_ie)); sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; + bwi = elems->bandwidth_indication; if (conn_flags & (IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_40MHZ)) { @@ -132,7 +134,14 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, break; } - if (wide_bw_chansw_ie) { + if (bwi) { + /* start with the CSA one */ + new_vht_chandef = csa_ie->chandef; + /* and update the width accordingly */ + /* FIXME: support 160/320 */ + ieee80211_chandef_eht_oper(&bwi->info, true, true, + &new_vht_chandef); + } else if (wide_bw_chansw_ie) { u8 new_seg1 = wide_bw_chansw_ie->new_center_freq_seg1; struct ieee80211_vht_operation vht_oper = { .chan_width = diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 88f714a75862..a1e18938ce52 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -990,6 +990,11 @@ ieee80211_parse_extension_element(u32 *crc, } } break; + case WLAN_EID_EXT_BANDWIDTH_INDICATION: + if (ieee80211_bandwidth_indication_size_ok(data, len)) + elems->bandwidth_indication = data; + calc_crc = true; + break; } if (crc && calc_crc) @@ -1005,11 +1010,11 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, bool calc_crc = params->filter != 0; DECLARE_BITMAP(seen_elems, 256); u32 crc = params->crc; - const u8 *ie; bitmap_zero(seen_elems, 256); for_each_element(elem, params->start, params->len) { + const struct element *subelem; bool elem_parse_failed; u8 id = elem->id; u8 elen = elem->datalen; @@ -1267,15 +1272,27 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, } /* * This is a bit tricky, but as we only care about - * the wide bandwidth channel switch element, so - * just parse it out manually. + * a few elements, parse them out manually. */ - ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, - pos, elen); - if (ie) { - if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie)) + subelem = cfg80211_find_elem(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, + pos, elen); + if (subelem) { + if (subelem->datalen >= sizeof(*elems->wide_bw_chansw_ie)) elems->wide_bw_chansw_ie = - (void *)(ie + 2); + (void *)subelem->data; + else + elem_parse_failed = true; + } + + subelem = cfg80211_find_ext_elem(WLAN_EID_EXT_BANDWIDTH_INDICATION, + pos, elen); + if (subelem) { + const void *edata = subelem->data + 1; + u8 edatalen = subelem->datalen - 1; + + if (ieee80211_bandwidth_indication_size_ok(edata, + edatalen)) + elems->bandwidth_indication = edata; else elem_parse_failed = true; } @@ -3746,12 +3763,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, return true; } -void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation *eht_oper, +void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info, bool support_160, bool support_320, struct cfg80211_chan_def *chandef) { - struct ieee80211_eht_operation_info *info = (void *)eht_oper->optional; - chandef->center_freq1 = ieee80211_channel_to_frequency(info->ccfs0, chandef->chan->band); @@ -3920,8 +3935,9 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, support_320 = eht_phy_cap & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ; - ieee80211_chandef_eht_oper(eht_oper, support_160, - support_320, &he_chandef); + ieee80211_chandef_eht_oper((const void *)eht_oper->optional, + support_160, support_320, + &he_chandef); } if (!cfg80211_chandef_valid(&he_chandef)) { -- cgit v1.2.3 From 2bf57b00abecb2646bb3a387cfc9e6980658cdb5 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 20 Sep 2023 21:25:13 +0300 Subject: wifi: mac80211: update the rx_chains after set_antenna() rx_chains was set only upon registration and it we rely on it for the active chains upon SMPS configuration after association. When we use the set_antenna() API to limit the rx_chains from 2 to 1, this caused issues with iwlwifi since we still had 2 active_chains requested. Signed-off-by: Emmanuel Grumbach Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.2dde4da246b2.I904223c868c77cf2ba132a3088fe6506fcbb443b@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e1a64a154287..5bc6b1329465 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4044,11 +4044,17 @@ ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy, static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant) { struct ieee80211_local *local = wiphy_priv(wiphy); + int ret; if (local->started) return -EOPNOTSUPP; - return drv_set_antenna(local, tx_ant, rx_ant); + ret = drv_set_antenna(local, tx_ant, rx_ant); + if (ret) + return ret; + + local->rx_chains = hweight8(rx_ant); + return 0; } static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) -- cgit v1.2.3 From c09c4f31998bac6d73508e38812518aceb069b68 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Wed, 20 Sep 2023 21:25:14 +0300 Subject: wifi: mac80211: don't connect to an AP while it's in a CSA process Connection to an AP that is running a CSA flow may end up with a failure as the AP might change its channel during the connection flow while we do not track the channel change yet. Avoid that by rejecting a connection to such an AP. Signed-off-by: Ayala Beker Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.e5001a762a4a.I9745c695f3403b259ad000ce94110588a836c04a@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a211f594f25a..a85873f305bf 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7015,6 +7015,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_auth_data *auth_data; struct ieee80211_link_data *link; + const struct element *csa_elem, *ecsa_elem; u16 auth_alg; int err; bool cont_auth; @@ -7057,6 +7058,22 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, if (ifmgd->assoc_data) return -EBUSY; + rcu_read_lock(); + csa_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_CHANNEL_SWITCH); + ecsa_elem = ieee80211_bss_get_elem(req->bss, + WLAN_EID_EXT_CHANSWITCH_ANN); + if ((csa_elem && + csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) && + ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) || + (ecsa_elem && + ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) && + ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) { + rcu_read_unlock(); + sdata_info(sdata, "AP is in CSA process, reject auth\n"); + return -EINVAL; + } + rcu_read_unlock(); + auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len + req->ie_len, GFP_KERNEL); if (!auth_data) @@ -7364,7 +7381,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_assoc_data *assoc_data; - const struct element *ssid_elem; + const struct element *ssid_elem, *csa_elem, *ecsa_elem; struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg; ieee80211_conn_flags_t conn_flags = 0; struct ieee80211_link_data *link; @@ -7394,6 +7411,21 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, kfree(assoc_data); return -EINVAL; } + + csa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_CHANNEL_SWITCH); + ecsa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_EXT_CHANSWITCH_ANN); + if ((csa_elem && + csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) && + ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) || + (ecsa_elem && + ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) && + ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) { + sdata_info(sdata, "AP is in CSA process, reject assoc\n"); + rcu_read_unlock(); + kfree(assoc_data); + return -EINVAL; + } + memcpy(assoc_data->ssid, ssid_elem->data, ssid_elem->datalen); assoc_data->ssid_len = ssid_elem->datalen; memcpy(vif_cfg->ssid, assoc_data->ssid, assoc_data->ssid_len); -- cgit v1.2.3 From 87cd646f615ce152952b3bbd0c0b65863b7ff7ee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Sep 2023 21:25:17 +0300 Subject: wifi: cfg80211: reg: describe return values in kernel-doc Describe the function return values in kernel-doc. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.8b1e45c8bab8.I6dbae4f6dfe8f5352bc44565cc5131e73dd1873f@changeid Signed-off-by: Johannes Berg --- net/wireless/reg.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index eb2fa97457b4..2ef4f6cc7a32 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1283,7 +1283,9 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd) * 60 GHz band. * This resolution can be lowered and should be considered as we add * regulatory rule support for other "bands". - **/ + * + * Returns: whether or not the frequency is in the range + */ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, u32 freq_khz) { @@ -1492,6 +1494,8 @@ static void add_rule(struct ieee80211_reg_rule *rule, * Returns a pointer to the regulatory domain structure which will hold the * resulting intersection of rules between rd1 and rd2. We will * kzalloc() this structure for you. + * + * Returns: the intersected regdomain */ static struct ieee80211_regdomain * regdom_intersect(const struct ieee80211_regdomain *rd1, @@ -2673,6 +2677,9 @@ static void reg_set_request_processed(void) * * The wireless subsystem can use this function to process * a regulatory request issued by the regulatory core. + * + * Returns: %REG_REQ_OK or %REG_REQ_IGNORE, indicating if the + * hint was processed or ignored */ static enum reg_request_treatment reg_process_hint_core(struct regulatory_request *core_request) @@ -2729,6 +2736,9 @@ __reg_process_hint_user(struct regulatory_request *user_request) * * The wireless subsystem can use this function to process * a regulatory request initiated by userspace. + * + * Returns: %REG_REQ_OK or %REG_REQ_IGNORE, indicating if the + * hint was processed or ignored */ static enum reg_request_treatment reg_process_hint_user(struct regulatory_request *user_request) @@ -2784,7 +2794,7 @@ __reg_process_hint_driver(struct regulatory_request *driver_request) * The wireless subsystem can use this function to process * a regulatory request issued by an 802.11 driver. * - * Returns one of the different reg request treatment values. + * Returns: one of the different reg request treatment values. */ static enum reg_request_treatment reg_process_hint_driver(struct wiphy *wiphy, @@ -2888,7 +2898,7 @@ __reg_process_hint_country_ie(struct wiphy *wiphy, * The wireless subsystem can use this function to process * a regulatory request issued by a country Information Element. * - * Returns one of the different reg request treatment values. + * Returns: one of the different reg request treatment values. */ static enum reg_request_treatment reg_process_hint_country_ie(struct wiphy *wiphy, -- cgit v1.2.3 From cef7104720cc1d4af238e8507a98e116c4b78ba2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Sep 2023 21:25:18 +0300 Subject: wifi: mac80211: describe return values in kernel-doc Add descriptions for two return values for two functions that are missing them. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.79307c341723.Ibae386f0354f2e215d4955752ac378acc2466b51@changeid Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 ++ net/mac80211/util.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 9b845fbf923c..932516f8cc13 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4474,6 +4474,8 @@ static void ieee80211_mlo_multicast_tx(struct net_device *dev, * @dev: incoming interface * * On failure skb will be freed. + * + * Returns: the netdev TX status (but really only %NETDEV_TX_OK) */ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index a1e18938ce52..97c5823da0eb 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -4146,6 +4146,8 @@ u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs) * This function calculates the RX timestamp at the given MPDU offset, taking * into account what the RX timestamp was. An offset of 0 will just normalize * the timestamp to TSF at beginning of MPDU reception. + * + * Returns: the calculated timestamp */ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, struct ieee80211_rx_status *status, -- cgit v1.2.3 From 041a74cbe49048abc3adb6129141256e50b84b6e Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 20 Sep 2023 21:25:22 +0300 Subject: wifi: mac80211: Notify the low level driver on change in MLO valid links Notify the low level driver when there is change in the valid links. Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.4fc85b0a51b0.I64238e0e892709a2bd4764b3bca93cdcf021e2fd@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/main.c | 3 ++- net/mac80211/mlme.c | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6dbe775b9e87..d4ef2a605cb4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -341,6 +341,7 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response * status changed. * @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed. + * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -376,6 +377,7 @@ enum ieee80211_bss_change { BSS_CHANGED_FILS_DISCOVERY = 1<<30, BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, BSS_CHANGED_EHT_PUNCTURING = BIT_ULL(32), + BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), /* when adding here, make sure to change ieee80211_reconfig */ }; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index bf8f72c412ee..b46f4d733c5d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -207,7 +207,8 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) BSS_CHANGED_PS |\ BSS_CHANGED_IBSS |\ BSS_CHANGED_ARP_FILTER |\ - BSS_CHANGED_SSID) + BSS_CHANGED_SSID |\ + BSS_CHANGED_MLD_VALID_LINKS) void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u64 changed) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a85873f305bf..085efae8e23c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5713,6 +5713,8 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy, if (ret) sdata_info(sdata, "Failed setting valid links\n"); + ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_MLD_VALID_LINKS); + out: if (!ret) cfg80211_links_removed(sdata->dev, sdata->u.mgd.removed_links); -- cgit v1.2.3 From 62e9c64eedfeb697ba28081ccaac59a45f9a96e1 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Wed, 20 Sep 2023 21:25:24 +0300 Subject: wifi: mac80211: add support for parsing TID to Link mapping element Add the relevant definitions for TID to Link mapping element according to the P802.11be_D4.0. Signed-off-by: Ayala Beker Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.9ea9b0b4412a.I2281ab2c70e8b43a39032dc115db6a80f1f0b3f4@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/util.c | 8 +++++++ 3 files changed, 69 insertions(+) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f11b7022d9eb..f2965ff3d7c1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1246,6 +1246,30 @@ struct ieee80211_twt_setup { u8 params[]; } __packed; +#define IEEE80211_TTLM_MAX_CNT 2 +#define IEEE80211_TTLM_CONTROL_DIRECTION 0x03 +#define IEEE80211_TTLM_CONTROL_DEF_LINK_MAP 0x04 +#define IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT 0x08 +#define IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT 0x10 +#define IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE 0x20 + +#define IEEE80211_TTLM_DIRECTION_DOWN 0 +#define IEEE80211_TTLM_DIRECTION_UP 1 +#define IEEE80211_TTLM_DIRECTION_BOTH 2 + +/** + * struct ieee80211_ttlm_elem - TID-To-Link Mapping element + * + * Defined in section 9.4.2.314 in P802.11be_D4 + * + * @control: the first part of control field + * @optional: the second part of control field + */ +struct ieee80211_ttlm_elem { + u8 control; + u8 optional[]; +} __packed; + struct ieee80211_mgmt { __le16 frame_control; __le16 duration; @@ -3618,6 +3642,7 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_EHT_OPERATION = 106, WLAN_EID_EXT_EHT_MULTI_LINK = 107, WLAN_EID_EXT_EHT_CAPABILITY = 108, + WLAN_EID_EXT_TID_TO_LINK_MAPPING = 109, WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, }; @@ -5155,6 +5180,39 @@ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, fixed + prof->sta_info_len - 1 <= len; } +static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) +{ + const struct ieee80211_ttlm_elem *t2l = (const void *)data; + u8 control, fixed = sizeof(*t2l), elem_len = 0; + + if (len < fixed) + return false; + + control = t2l->control; + + if (control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT) + elem_len += 2; + if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) + elem_len += 3; + + if (!(control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP)) { + u8 bm_size; + + elem_len += 1; + if (len < fixed + elem_len) + return false; + + if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) + bm_size = 1; + else + bm_size = 2; + + elem_len += hweight8(t2l->optional[0]) * bm_size; + } + + return len >= fixed + elem_len; +} + #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e7856336b5c6..d1a73095c914 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1678,6 +1678,7 @@ struct ieee802_11_elems { const struct ieee80211_multi_link_elem *ml_basic; const struct ieee80211_multi_link_elem *ml_reconf; const struct ieee80211_bandwidth_indication *bandwidth_indication; + const struct ieee80211_ttlm_elem *ttlm[IEEE80211_TTLM_MAX_CNT]; /* length of them, respectively */ u8 ext_capab_len; @@ -1711,6 +1712,8 @@ struct ieee802_11_elems { /* The reconfiguration Multi-Link element in the original IEs */ const struct element *ml_reconf_elem; + u8 ttlm_num; + /* * store the per station profile pointer and length in case that the * parsing also handled Multi-Link element parsing for a specific link diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 97c5823da0eb..98a3bffc6991 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -995,6 +995,14 @@ ieee80211_parse_extension_element(u32 *crc, elems->bandwidth_indication = data; calc_crc = true; break; + case WLAN_EID_EXT_TID_TO_LINK_MAPPING: + calc_crc = true; + if (ieee80211_tid_to_link_map_size_ok(data, len) && + elems->ttlm_num < ARRAY_SIZE(elems->ttlm)) { + elems->ttlm[elems->ttlm_num] = (void *)data; + elems->ttlm_num++; + } + break; } if (crc && calc_crc) -- cgit v1.2.3 From 702e80470a3359ce02b3f846f48f6db4ac7fd837 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Wed, 20 Sep 2023 21:25:25 +0300 Subject: wifi: mac80211: support handling of advertised TID-to-link mapping Support handling of advertised TID-to-link mapping elements received in a beacon. These elements are used by AP MLD to disable specific links and force all clients to stop using these links. By default if no TID-to-link mapping is advertised, all TIDs shall be mapped to all links. Signed-off-by: Ayala Beker Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.623c4b692ff9.Iab0a6f561d85b8ab6efe541590985a2b6e9e74aa@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 15 ++++ net/mac80211/mlme.c | 196 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d1a73095c914..19429f84afc3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -467,6 +467,17 @@ struct ieee80211_sta_tx_tspec { bool downgraded; }; +/* Advertised TID-to-link mapping info */ +struct ieee80211_adv_ttlm_info { + /* time in TUs at which the new mapping is established, or 0 if there is + * no planned advertised TID-to-link mapping + */ + u16 switch_time; + u32 duration; /* duration of the planned T2L map in TUs */ + u16 map; /* map of usable links for all TIDs */ + bool active; /* whether the advertised mapping is active or not */ +}; + DECLARE_EWMA(beacon_signal, 4, 4) struct ieee80211_if_managed { @@ -560,6 +571,10 @@ struct ieee80211_if_managed { struct wiphy_delayed_work ml_reconf_work; u16 removed_links; + + /* TID-to-link mapping support */ + struct wiphy_delayed_work ttlm_work; + struct ieee80211_adv_ttlm_info ttlm_info; }; struct ieee80211_if_ibss { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 085efae8e23c..663ea7430b73 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3053,6 +3053,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(sdata->vif.bss_conf.tx_pwr_env, 0, sizeof(sdata->vif.bss_conf.tx_pwr_env)); + memset(&sdata->u.mgd.ttlm_info, 0, + sizeof(sdata->u.mgd.ttlm_info)); + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); ieee80211_vif_set_links(sdata, 0, 0); } @@ -5821,6 +5824,194 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, TU_TO_JIFFIES(delay)); } +static void ieee80211_tid_to_link_map_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + u16 new_active_links, new_dormant_links; + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + u.mgd.ttlm_work.work); + int ret; + + new_active_links = sdata->u.mgd.ttlm_info.map & + sdata->vif.valid_links; + new_dormant_links = ~sdata->u.mgd.ttlm_info.map & + sdata->vif.valid_links; + if (!new_active_links) { + ieee80211_disconnect(&sdata->vif, false); + return; + } + + ieee80211_vif_set_links(sdata, sdata->vif.valid_links, 0); + new_active_links = BIT(ffs(new_active_links) - 1); + ieee80211_set_active_links(&sdata->vif, new_active_links); + + ret = ieee80211_vif_set_links(sdata, sdata->vif.valid_links, + new_dormant_links); + + sdata->u.mgd.ttlm_info.active = true; + sdata->u.mgd.ttlm_info.switch_time = 0; + + if (!ret) + ieee80211_vif_cfg_change_notify(sdata, + BSS_CHANGED_MLD_VALID_LINKS); +} + +static u16 ieee80211_get_ttlm(u8 bm_size, u8 *data) +{ + if (bm_size == 1) + return *data; + else + return get_unaligned_le16(data); +} + +static int +ieee80211_parse_adv_t2l(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_ttlm_elem *ttlm, + struct ieee80211_adv_ttlm_info *ttlm_info) +{ + /* The element size was already validated in + * ieee80211_tid_to_link_map_size_ok() + */ + u8 control, link_map_presence, map_size, tid; + u8 *pos; + + memset(ttlm_info, 0, sizeof(*ttlm_info)); + pos = (void *)ttlm->optional; + control = ttlm->control; + + if ((control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP) || + !(control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT)) + return 0; + + if ((control & IEEE80211_TTLM_CONTROL_DIRECTION) != + IEEE80211_TTLM_DIRECTION_BOTH) { + sdata_info(sdata, "Invalid advertised T2L map direction\n"); + return -EINVAL; + } + + link_map_presence = *pos; + pos++; + + ttlm_info->switch_time = get_unaligned_le16(pos); + pos += 2; + + if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) { + ttlm_info->duration = pos[0] | pos[1] << 8 | pos[2] << 16; + pos += 3; + } + + if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) + map_size = 1; + else + map_size = 2; + + /* According to Draft P802.11be_D3.0 clause 35.3.7.1.7, an AP MLD shall + * not advertise a TID-to-link mapping that does not map all TIDs to the + * same link set, reject frame if not all links have mapping + */ + if (link_map_presence != 0xff) { + sdata_info(sdata, + "Invalid advertised T2L mapping presence indicator\n"); + return -EINVAL; + } + + ttlm_info->map = ieee80211_get_ttlm(map_size, pos); + if (!ttlm_info->map) { + sdata_info(sdata, + "Invalid advertised T2L map for TID 0\n"); + return -EINVAL; + } + + pos += map_size; + + for (tid = 1; tid < 8; tid++) { + u16 map = ieee80211_get_ttlm(map_size, pos); + + if (map != ttlm_info->map) { + sdata_info(sdata, "Invalid advertised T2L map for tid %d\n", + tid); + return -EINVAL; + } + + pos += map_size; + } + return 0; +} + +static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, + struct ieee802_11_elems *elems, + u64 beacon_ts) +{ + u8 i; + int ret; + + if (!ieee80211_vif_is_mld(&sdata->vif)) + return; + + if (!elems->ttlm_num) { + if (sdata->u.mgd.ttlm_info.switch_time) { + /* if a planned TID-to-link mapping was cancelled - + * abort it + */ + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &sdata->u.mgd.ttlm_work); + } else if (sdata->u.mgd.ttlm_info.active) { + /* if no TID-to-link element, set to default mapping in + * which all TIDs are mapped to all setup links + */ + ret = ieee80211_vif_set_links(sdata, + sdata->vif.valid_links, + 0); + if (ret) { + sdata_info(sdata, "Failed setting valid/dormant links\n"); + return; + } + ieee80211_vif_cfg_change_notify(sdata, + BSS_CHANGED_MLD_VALID_LINKS); + } + memset(&sdata->u.mgd.ttlm_info, 0, + sizeof(sdata->u.mgd.ttlm_info)); + return; + } + + for (i = 0; i < elems->ttlm_num; i++) { + struct ieee80211_adv_ttlm_info ttlm_info; + u32 res; + + res = ieee80211_parse_adv_t2l(sdata, elems->ttlm[i], + &ttlm_info); + + if (res) { + __ieee80211_disconnect(sdata); + return; + } + + if (ttlm_info.switch_time) { + u32 st_us, delay = 0; + u32 ts_l26 = beacon_ts & GENMASK(25, 0); + + /* The t2l map switch time is indicated with a partial + * TSF value, convert it to TSF and calc the delay + * to the start time. + */ + st_us = ieee80211_tu_to_usec(ttlm_info.switch_time); + if (st_us > ts_l26) + delay = st_us - ts_l26; + else + continue; + + sdata->u.mgd.ttlm_info = ttlm_info; + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &sdata->u.mgd.ttlm_work); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &sdata->u.mgd.ttlm_work, + usecs_to_jiffies(delay)); + return; + } + } +} + static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, struct ieee80211_hdr *hdr, size_t len, struct ieee80211_rx_status *rx_status) @@ -6144,6 +6335,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, } ieee80211_ml_reconfiguration(sdata, elems); + ieee80211_process_adv_ttlm(sdata, elems, + le64_to_cpu(mgmt->u.beacon.timestamp)); ieee80211_link_info_change_notify(sdata, link, changed); free: @@ -6766,6 +6959,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0); wiphy_delayed_work_init(&ifmgd->tx_tspec_wk, ieee80211_sta_handle_tspec_ac_params_wk); + wiphy_delayed_work_init(&ifmgd->ttlm_work, + ieee80211_tid_to_link_map_work); ifmgd->flags = 0; ifmgd->powersave = sdata->wdev.ps; @@ -7840,6 +8035,7 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) &ifmgd->tdls_peer_del_work); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ml_reconf_work); + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); if (ifmgd->assoc_data) ieee80211_destroy_assoc_data(sdata, ASSOC_TIMEOUT); -- cgit v1.2.3 From ef246a1480cc484cd2aeda75737cb0848616ddf3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Sep 2023 21:25:26 +0300 Subject: wifi: mac80211: support antenna control in injection Support antenna control for injection by parsing the antenna radiotap field (which may be presented multiple times) and telling the driver about the resulting antenna bitmap. Of course there's no guarantee the driver will actually honour this, just like any other injection control. If misconfigured, i.e. the injected HT/VHT MCS needs more chains than antennas are configured, the bitmap is reset to zero, indicating no selection. For now this is only set up for two anntenas so we keep more free bits, but that can be trivially extended if any driver implements support for it that can deal with hardware with more antennas. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.f71001aa4da9.I00ccb762a806ea62bc3d728fa3a0d29f4f285eeb@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ include/net/mac80211.h | 6 +++++- net/mac80211/tx.c | 14 ++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f2965ff3d7c1..3b02f038d509 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1705,6 +1705,8 @@ struct ieee80211_mcs_info { #define IEEE80211_HT_MCS_TX_MAX_STREAMS 4 #define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION 0x10 +#define IEEE80211_HT_MCS_CHAINS(mcs) ((mcs) == 32 ? 1 : (1 + ((mcs) >> 3))) + /* * 802.11n D5.0 20.3.5 / 20.6 says: * - indices 0 to 7 and 32 are single spatial stream diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d4ef2a605cb4..72375eceb786 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1178,7 +1178,11 @@ struct ieee80211_tx_info { u8 use_cts_prot:1; u8 short_preamble:1; u8 skip_table:1; - /* 2 bytes free */ + + /* for injection only (bitmap) */ + u8 antennas:2; + + /* 14 bits free */ }; /* only needed before rate control */ unsigned long jiffies; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 932516f8cc13..a984fc54644e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2162,6 +2162,11 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, rate_found = true; break; + case IEEE80211_RADIOTAP_ANTENNA: + /* this can appear multiple times, keep a bitmap */ + info->control.antennas |= BIT(*iterator.this_arg); + break; + case IEEE80211_RADIOTAP_DATA_RETRIES: rate_retries = *iterator.this_arg; break; @@ -2256,8 +2261,17 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, } if (rate_flags & IEEE80211_TX_RC_MCS) { + /* reset antennas if not enough */ + if (IEEE80211_HT_MCS_CHAINS(rate) > + hweight8(info->control.antennas)) + info->control.antennas = 0; + info->control.rates[0].idx = rate; } else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) { + /* reset antennas if not enough */ + if (vht_nss > hweight8(info->control.antennas)) + info->control.antennas = 0; + ieee80211_rate_set_vht(info->control.rates, vht_mcs, vht_nss); } else if (sband) { -- cgit v1.2.3 From a7b2cc591d556eca044c823e4e92d8c1cb430ef7 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 20 Sep 2023 21:25:27 +0300 Subject: wifi: cfg80211: report per-link errors during association When one of the links (other than the assoc_link) is misconfigured and cannot work the association will fail. However, userspace was not able to tell that the operation only failed because of a problem with one of the links. Fix this, by allowing the driver to set a per-link error code and reporting the (first) offending link by setting the bad_attr accordingly. This only allows us to report the first error, but that is sufficient for userspace to e.g. remove the offending link and retry. Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.ebe63c0bd513.I40799998f02bf987acee1501a2522dc98bb6eb5a@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ net/wireless/nl80211.c | 50 ++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 899e9ffa6048..34c50f7273d1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2980,12 +2980,15 @@ struct cfg80211_auth_request { * @elems_len: length of the elements * @disabled: If set this link should be included during association etc. but it * should not be used until enabled by the AP MLD. + * @error: per-link error code, must be <= 0. If there is an error, then the + * operation as a whole must fail. */ struct cfg80211_assoc_link { struct cfg80211_bss *bss; const u8 *elems; size_t elems_len; bool disabled; + int error; }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index cbdf635e6025..87b21c0c0f25 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10941,8 +10941,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, req.ie, req.ie_len)) { - GENL_SET_ERR_MSG(info, - "non-inheritance makes no sense"); + NL_SET_ERR_MSG_ATTR(info->extack, + info->attrs[NL80211_ATTR_IE], + "non-inheritance makes no sense"); return -EINVAL; } } @@ -11067,6 +11068,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (!attrs[NL80211_ATTR_MLO_LINK_ID]) { err = -EINVAL; + NL_SET_BAD_ATTR(info->extack, link); goto free; } @@ -11074,6 +11076,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) /* cannot use the same link ID again */ if (req.links[link_id].bss) { err = -EINVAL; + NL_SET_BAD_ATTR(info->extack, link); goto free; } req.links[link_id].bss = @@ -11081,6 +11084,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(req.links[link_id].bss)) { err = PTR_ERR(req.links[link_id].bss); req.links[link_id].bss = NULL; + NL_SET_ERR_MSG_ATTR(info->extack, + link, "Error fetching BSS for link"); goto free; } @@ -11093,8 +11098,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (cfg80211_find_elem(WLAN_EID_FRAGMENT, req.links[link_id].elems, req.links[link_id].elems_len)) { - GENL_SET_ERR_MSG(info, - "cannot deal with fragmentation"); + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[NL80211_ATTR_IE], + "cannot deal with fragmentation"); err = -EINVAL; goto free; } @@ -11102,8 +11108,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, req.links[link_id].elems, req.links[link_id].elems_len)) { - GENL_SET_ERR_MSG(info, - "cannot deal with non-inheritance"); + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[NL80211_ATTR_IE], + "cannot deal with non-inheritance"); err = -EINVAL; goto free; } @@ -11146,6 +11153,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); if (!err) { + struct nlattr *link; + int rem = 0; + err = cfg80211_mlme_assoc(rdev, dev, &req); if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) { @@ -11154,6 +11164,34 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) memcpy(dev->ieee80211_ptr->disconnect_bssid, ap_addr, ETH_ALEN); } + + /* Report error from first problematic link */ + if (info->attrs[NL80211_ATTR_MLO_LINKS]) { + nla_for_each_nested(link, + info->attrs[NL80211_ATTR_MLO_LINKS], + rem) { + struct nlattr *link_id_attr = + nla_find_nested(link, NL80211_ATTR_MLO_LINK_ID); + + if (!link_id_attr) + continue; + + link_id = nla_get_u8(link_id_attr); + + if (link_id == req.link_id) + continue; + + if (!req.links[link_id].error || + WARN_ON(req.links[link_id].error > 0)) + continue; + + WARN_ON(err >= 0); + + NL_SET_BAD_ATTR(info->extack, link); + err = req.links[link_id].error; + break; + } + } } free: -- cgit v1.2.3 From 4aa064484504fba48b3c71321940893ca7cebc84 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 20 Sep 2023 21:25:28 +0300 Subject: wifi: mac80211: report per-link error during association With this cfg80211 can report the link that caused the error to userspace which is then able to react to it by e.g. removing the link from the association and retrying. Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.275fc7f5c426.I8086c0fdbbf92537d6a8b8e80b33387fcfd5553d@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 663ea7430b73..509ddfd99cbf 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7866,8 +7866,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, /* only calculate the flags, hence link == NULL */ err = ieee80211_prep_channel(sdata, NULL, assoc_data->link[i].bss, &assoc_data->link[i].conn_flags); - if (err) + if (err) { + req->links[i].error = err; goto err_clear; + } } /* needed for transmitting the assoc frames properly */ -- cgit v1.2.3 From 1228c749416cb2b0232cbd9beb0c9f1200dfb3c9 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 20 Sep 2023 21:25:29 +0300 Subject: wifi: mac80211: reject MLO channel configuration if not supported Reject configuring a channel for MLO if either EHT is not supported or the BSS does not have the correct ML element. This avoids trying to do a multi-link association with a misconfigured AP. Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.80c3b8e5a344.Iaa2d466ee6280994537e1ae7ab9256a27934806f@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 509ddfd99cbf..6a078eb23a5d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4755,6 +4755,7 @@ ieee80211_verify_sta_eht_mcs_support(struct ieee80211_sub_if_data *sdata, static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, struct cfg80211_bss *cbss, + bool mlo, ieee80211_conn_flags_t *conn_flags) { struct ieee80211_local *local = sdata->local; @@ -4768,6 +4769,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def chandef; bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ; + bool supports_mlo = false; struct ieee80211_bss *bss = (void *)cbss->priv; struct ieee80211_elems_parse_params parse_params = { .link_id = -1, @@ -4921,6 +4923,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ieee80211_mle_type_ok(eht_ml_elem->data + 1, IEEE80211_ML_CONTROL_TYPE_BASIC, eht_ml_elem->datalen - 1)) { + supports_mlo = true; + sdata->vif.cfg.eml_cap = ieee80211_mle_get_eml_cap(eht_ml_elem->data + 1); sdata->vif.cfg.eml_med_sync_delay = @@ -4976,6 +4980,11 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, return -EINVAL; } + if (mlo && !supports_mlo) { + sdata_info(sdata, "Rejecting MLO as it is not supported by AP\n"); + return -EINVAL; + } + if (!link) return 0; @@ -5124,7 +5133,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, link->conf->dtim_period = link->u.mgd.dtim_period ?: 1; if (link_id != assoc_data->assoc_link_id) { - err = ieee80211_prep_channel(sdata, link, cbss, + err = ieee80211_prep_channel(sdata, link, cbss, true, &link->u.mgd.conn_flags); if (err) { link_info(link, "prep_channel failed\n"); @@ -7159,7 +7168,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, } if (new_sta || override) { - err = ieee80211_prep_channel(sdata, link, cbss, + err = ieee80211_prep_channel(sdata, link, cbss, mlo, &link->u.mgd.conn_flags); if (err) { if (new_sta) @@ -7864,7 +7873,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (i == assoc_data->assoc_link_id) continue; /* only calculate the flags, hence link == NULL */ - err = ieee80211_prep_channel(sdata, NULL, assoc_data->link[i].bss, + err = ieee80211_prep_channel(sdata, NULL, + assoc_data->link[i].bss, true, &assoc_data->link[i].conn_flags); if (err) { req->links[i].error = err; -- cgit v1.2.3 From e406f291501050e63a805d6b87a0d9bf198fedf9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Sep 2023 17:24:28 +0200 Subject: wifi: cfg80211: add local_state_change to deauth trace Add the local_state_change request to the deauth trace for easier debugging. Signed-off-by: Johannes Berg --- net/wireless/trace.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/trace.h b/net/wireless/trace.h index da2b73951c32..f6667bf3fd12 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1321,16 +1321,18 @@ TRACE_EVENT(rdev_deauth, NETDEV_ENTRY MAC_ENTRY(bssid) __field(u16, reason_code) + __field(bool, local_state_change) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(bssid, req->bssid); __entry->reason_code = req->reason_code; + __entry->local_state_change = req->local_state_change; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM, reason: %u", + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM, reason: %u, local_state_change:%d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, - __entry->reason_code) + __entry->reason_code, __entry->local_state_change) ); TRACE_EVENT(rdev_disassoc, -- cgit v1.2.3 From 583058542f46e3e2b0c536316fbd641f62d91dc6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Sep 2023 17:24:39 +0200 Subject: wifi: mac80211: fix check for unusable RX result If we just check "result & RX_DROP_UNUSABLE", this really only works by accident, because SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE got to have the value 1, and SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR is 2. Fix this to really check the entire subsys mask for the value, so it doesn't matter what the subsystem value is. Fixes: 7f4e09700bdc ("wifi: mac80211: report all unusable beacon frames") Signed-off-by: Johannes Berg --- net/mac80211/drop.h | 3 +++ net/mac80211/rx.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/drop.h b/net/mac80211/drop.h index 49dc809cab29..1570fac8411f 100644 --- a/net/mac80211/drop.h +++ b/net/mac80211/drop.h @@ -53,4 +53,7 @@ enum mac80211_drop_reason { #undef DEF }; +#define RX_RES_IS_UNUSABLE(result) \ + (((__force u32)(result) & SKB_DROP_REASON_SUBSYS_MASK) == ___RX_DROP_UNUSABLE) + #endif /* MAC80211_DROP_H */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6fcd2a717922..944adc9a51f1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2112,7 +2112,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* either the frame has been decrypted or will be dropped */ status->flag |= RX_FLAG_DECRYPTED; - if (unlikely(ieee80211_is_beacon(fc) && (result & RX_DROP_UNUSABLE) && + if (unlikely(ieee80211_is_beacon(fc) && RX_RES_IS_UNUSABLE(result) && rx->sdata->dev)) cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, skb->data, skb->len); -- cgit v1.2.3 From dccc9aa7ee84a9bed7a4840608829eba66f84cb9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Sep 2023 17:25:09 +0200 Subject: wifi: mac80211: remove RX_DROP_UNUSABLE Convert all instances of RX_DROP_UNUSABLE to indicate a better reason, and then remove RX_DROP_UNUSABLE. Signed-off-by: Johannes Berg --- net/mac80211/drop.h | 33 ++++++++++++++++++++++++++++++++- net/mac80211/rx.c | 52 ++++++++++++++++++++++++++-------------------------- net/mac80211/wep.c | 9 +++++---- net/mac80211/wpa.c | 42 +++++++++++++++++++++--------------------- 4 files changed, 84 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/net/mac80211/drop.h b/net/mac80211/drop.h index 1570fac8411f..725a07a5b614 100644 --- a/net/mac80211/drop.h +++ b/net/mac80211/drop.h @@ -21,6 +21,38 @@ typedef unsigned int __bitwise ieee80211_rx_result; R(RX_DROP_U_MIC_FAIL) \ R(RX_DROP_U_REPLAY) \ R(RX_DROP_U_BAD_MMIE) \ + R(RX_DROP_U_DUP) \ + R(RX_DROP_U_SPURIOUS) \ + R(RX_DROP_U_DECRYPT_FAIL) \ + R(RX_DROP_U_NO_KEY_ID) \ + R(RX_DROP_U_BAD_CIPHER) \ + R(RX_DROP_U_OOM) \ + R(RX_DROP_U_NONSEQ_PN) \ + R(RX_DROP_U_BAD_KEY_COLOR) \ + R(RX_DROP_U_BAD_4ADDR) \ + R(RX_DROP_U_BAD_AMSDU) \ + R(RX_DROP_U_BAD_AMSDU_CIPHER) \ + R(RX_DROP_U_INVALID_8023) \ + R(RX_DROP_U_RUNT_ACTION) \ + R(RX_DROP_U_UNPROT_ACTION) \ + R(RX_DROP_U_ACTION_UNKNOWN_SRC) \ + R(RX_DROP_U_REJECTED_ACTION_RESPONSE) \ + R(RX_DROP_U_EXPECT_DEFRAG_PROT) \ + R(RX_DROP_U_WEP_DEC_FAIL) \ + R(RX_DROP_U_NO_IV) \ + R(RX_DROP_U_NO_ICV) \ + R(RX_DROP_U_AP_RX_GROUPCAST) \ + R(RX_DROP_U_SHORT_MMIC) \ + R(RX_DROP_U_MMIC_FAIL) \ + R(RX_DROP_U_SHORT_TKIP) \ + R(RX_DROP_U_TKIP_FAIL) \ + R(RX_DROP_U_SHORT_CCMP) \ + R(RX_DROP_U_SHORT_CCMP_MIC) \ + R(RX_DROP_U_SHORT_GCMP) \ + R(RX_DROP_U_SHORT_GCMP_MIC) \ + R(RX_DROP_U_SHORT_CMAC) \ + R(RX_DROP_U_SHORT_CMAC256) \ + R(RX_DROP_U_SHORT_GMAC) \ /* this line for the trailing \ - add before this */ /* having two enums allows for checking ieee80211_rx_result use with sparse */ @@ -46,7 +78,6 @@ enum mac80211_drop_reason { RX_CONTINUE = (__force ieee80211_rx_result)___RX_CONTINUE, RX_QUEUED = (__force ieee80211_rx_result)___RX_QUEUED, RX_DROP_MONITOR = (__force ieee80211_rx_result)___RX_DROP_MONITOR, - RX_DROP_UNUSABLE = (__force ieee80211_rx_result)___RX_DROP_UNUSABLE, #define DEF(x) x = (__force ieee80211_rx_result)___ ## x, MAC80211_DROP_REASONS_MONITOR(DEF) MAC80211_DROP_REASONS_UNUSABLE(DEF) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 944adc9a51f1..1851b86fc5fd 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1436,7 +1436,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount); rx->link_sta->rx_stats.num_duplicates++; - return RX_DROP_UNUSABLE; + return RX_DROP_U_DUP; } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; } @@ -1490,7 +1490,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) cfg80211_rx_spurious_frame(rx->sdata->dev, hdr->addr2, GFP_ATOMIC)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SPURIOUS; return RX_DROP_MONITOR; } @@ -1883,7 +1883,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; int keyidx; - ieee80211_rx_result result = RX_DROP_UNUSABLE; + ieee80211_rx_result result = RX_DROP_U_DECRYPT_FAIL; struct ieee80211_key *sta_ptk = NULL; struct ieee80211_key *ptk_idx = NULL; int mmie_keyidx = -1; @@ -1933,7 +1933,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) keyid = ieee80211_get_keyid(rx->skb); if (unlikely(keyid < 0)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_NO_KEY_ID; ptk_idx = rcu_dereference(rx->sta->ptk[keyid]); } @@ -2038,7 +2038,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) keyidx = ieee80211_get_keyid(rx->skb); if (unlikely(keyidx < 0)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_NO_KEY_ID; /* check per-station GTK first, if multicast packet */ if (is_multicast_ether_addr(hdr->addr1) && rx->link_sta) @@ -2104,7 +2104,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) result = ieee80211_crypto_gcmp_decrypt(rx); break; default: - result = RX_DROP_UNUSABLE; + result = RX_DROP_U_BAD_CIPHER; } /* the hdr variable is invalid after the decrypt handlers */ @@ -2249,7 +2249,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) I802_DEBUG_INC(rx->local->rx_handlers_fragments); if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; /* * skb_linearize() might change the skb->data and @@ -2312,11 +2312,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; if (!requires_sequential_pn(rx, fc)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_NONSEQ_PN; /* Prevent mixed key and fragment cache attacks */ if (entry->key_color != rx->key->color) - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_KEY_COLOR; memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { @@ -2327,7 +2327,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) rpn = rx->ccm_gcm.pn; if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_REPLAY; memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); } else if (entry->is_protected && (!rx->key || @@ -2338,11 +2338,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) * if for TKIP Michael MIC should protect us, and WEP is a * lost cause anyway. */ - return RX_DROP_UNUSABLE; + return RX_DROP_U_EXPECT_DEFRAG_PROT; } else if (entry->is_protected && rx->key && entry->key_color != rx->key->color && (status->flag & RX_FLAG_DECRYPTED)) { - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_KEY_COLOR; } skb_pull(rx->skb, ieee80211_hdrlen(fc)); @@ -2361,7 +2361,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) GFP_ATOMIC))) { I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); __skb_queue_purge(&entry->skb_list); - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; } } while ((skb = __skb_dequeue(&entry->skb_list))) { @@ -2904,10 +2904,10 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta skb = NULL; if (skb_cow_head(fwd_skb, hdrlen - sizeof(struct ethhdr))) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; if (skb_linearize(fwd_skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; } fwd_hdr = skb_push(fwd_skb, hdrlen - sizeof(struct ethhdr)); @@ -3003,7 +3003,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset) rx->sdata->vif.addr, rx->sdata->vif.type, data_offset, true)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_AMSDU; if (rx->sta->amsdu_mesh_control < 0) { s8 valid = -1; @@ -3078,21 +3078,21 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) switch (rx->sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: if (!rx->sdata->u.vlan.sta) - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_4ADDR; break; case NL80211_IFTYPE_STATION: if (!rx->sdata->u.mgd.use_4addr) - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_4ADDR; break; case NL80211_IFTYPE_MESH_POINT: break; default: - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_4ADDR; } } if (is_multicast_ether_addr(hdr->addr1) || !rx->sta) - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_AMSDU; if (rx->key) { /* @@ -3105,7 +3105,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: case WLAN_CIPHER_SUITE_TKIP: - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_AMSDU_CIPHER; default: break; } @@ -3147,7 +3147,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) err = __ieee80211_data_to_8023(rx, &port_control); if (unlikely(err)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_INVALID_8023; res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb); if (res != RX_CONTINUE) @@ -3379,7 +3379,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) /* drop too small action frames */ if (ieee80211_is_action(mgmt->frame_control) && rx->skb->len < IEEE80211_MIN_ACTION_SIZE) - return RX_DROP_UNUSABLE; + return RX_DROP_U_RUNT_ACTION; if (rx->sdata->vif.type == NL80211_IFTYPE_AP && ieee80211_is_beacon(mgmt->frame_control) && @@ -3401,7 +3401,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) } if (ieee80211_drop_unencrypted_mgmt(rx)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_UNPROT_ACTION; return RX_CONTINUE; } @@ -3473,7 +3473,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC && mgmt->u.action.category != WLAN_CATEGORY_SELF_PROTECTED && mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT) - return RX_DROP_UNUSABLE; + return RX_DROP_U_ACTION_UNKNOWN_SRC; switch (mgmt->u.action.category) { case WLAN_CATEGORY_HT: @@ -3878,7 +3878,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) /* do not return rejected action frames */ if (mgmt->u.action.category & 0x80) - return RX_DROP_UNUSABLE; + return RX_DROP_U_REJECTED_ACTION_RESPONSE; nskb = skb_copy_expand(rx->skb, local->hw.extra_tx_headroom, 0, GFP_ATOMIC); diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 9a6e11d7b4db..5c01e121481a 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -3,6 +3,7 @@ * Software WEP encryption implementation * Copyright 2002, Jouni Malinen * Copyright 2003, Instant802 Networks, Inc. + * Copyright (C) 2023 Intel Corporation */ #include @@ -250,18 +251,18 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) if (!(status->flag & RX_FLAG_DECRYPTED)) { if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_WEP_DEC_FAIL; } else if (!(status->flag & RX_FLAG_IV_STRIPPED)) { if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) + IEEE80211_WEP_IV_LEN)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_NO_IV; ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ if (!(status->flag & RX_FLAG_ICV_STRIPPED) && pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_NO_ICV; } return RX_CONTINUE; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 2d8e38b3bcb5..94dae7cb6dbd 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -3,7 +3,7 @@ * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2008, Jouni Malinen * Copyright (C) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation */ #include @@ -142,7 +142,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) * group keys and only the AP is sending real multicast * frames in the BSS. */ - return RX_DROP_UNUSABLE; + return RX_DROP_U_AP_RX_GROUPCAST; } if (status->flag & RX_FLAG_MMIC_ERROR) @@ -150,10 +150,10 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) hdrlen = ieee80211_hdrlen(hdr->frame_control); if (skb->len < hdrlen + MICHAEL_MIC_LEN) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_MMIC; if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; hdr = (void *)skb->data; data = skb->data + hdrlen; @@ -188,7 +188,7 @@ mic_fail_no_key: NL80211_KEYTYPE_PAIRWISE, rx->key ? rx->key->conf.keyidx : -1, NULL, GFP_ATOMIC); - return RX_DROP_UNUSABLE; + return RX_DROP_U_MMIC_FAIL; } static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) @@ -276,11 +276,11 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (!rx->sta || skb->len - hdrlen < 12) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_TKIP; /* it may be possible to optimize this a bit more */ if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; hdr = (void *)skb->data; /* @@ -298,7 +298,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) &rx->tkip.iv32, &rx->tkip.iv16); if (res != TKIP_DECRYPT_OK) - return RX_DROP_UNUSABLE; + return RX_DROP_U_TKIP_FAIL; /* Trim ICV */ if (!(status->flag & RX_FLAG_ICV_STRIPPED)) @@ -523,12 +523,12 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, if (status->flag & RX_FLAG_DECRYPTED) { if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_CCMP_HDR_LEN)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_CCMP; if (status->flag & RX_FLAG_MIC_STRIPPED) mic_len = 0; } else { if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; } /* reload hdr - skb might have been reallocated */ @@ -536,7 +536,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len; if (!rx->sta || data_len < 0) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_CCMP; if (!(status->flag & RX_FLAG_PN_VALIDATED)) { int res; @@ -574,7 +574,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, /* Remove CCMP header and MIC */ if (pskb_trim(skb, skb->len - mic_len)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_CCMP_MIC; memmove(skb->data + IEEE80211_CCMP_HDR_LEN, skb->data, hdrlen); skb_pull(skb, IEEE80211_CCMP_HDR_LEN); @@ -719,12 +719,12 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_DECRYPTED) { if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_GCMP_HDR_LEN)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_GCMP; if (status->flag & RX_FLAG_MIC_STRIPPED) mic_len = 0; } else { if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; } /* reload hdr - skb might have been reallocated */ @@ -732,7 +732,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len; if (!rx->sta || data_len < 0) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_GCMP; if (!(status->flag & RX_FLAG_PN_VALIDATED)) { int res; @@ -771,7 +771,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) /* Remove GCMP header and MIC */ if (pskb_trim(skb, skb->len - mic_len)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_GCMP_MIC; memmove(skb->data + IEEE80211_GCMP_HDR_LEN, skb->data, hdrlen); skb_pull(skb, IEEE80211_GCMP_HDR_LEN); @@ -924,7 +924,7 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) /* management frames are already linear */ if (skb->len < 24 + sizeof(*mmie)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_CMAC; mmie = (struct ieee80211_mmie *) (skb->data + skb->len - sizeof(*mmie)); @@ -974,13 +974,13 @@ ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx) /* management frames are already linear */ if (skb->len < 24 + sizeof(*mmie)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_CMAC256; mmie = (struct ieee80211_mmie_16 *) (skb->data + skb->len - sizeof(*mmie)); if (mmie->element_id != WLAN_EID_MMIE || mmie->length != sizeof(*mmie) - 2) - return RX_DROP_UNUSABLE; /* Invalid MMIE */ + return RX_DROP_U_BAD_MMIE; /* Invalid MMIE */ bip_ipn_swap(ipn, mmie->sequence_number); @@ -1073,7 +1073,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) /* management frames are already linear */ if (skb->len < 24 + sizeof(*mmie)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_GMAC; mmie = (struct ieee80211_mmie_16 *) (skb->data + skb->len - sizeof(*mmie)); @@ -1097,7 +1097,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) mic = kmalloc(GMAC_MIC_LEN, GFP_ATOMIC); if (!mic) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce, skb->data + 24, skb->len - 24, mic) < 0 || -- cgit v1.2.3 From 6c02fab72429b4950f5d6edd003310d9245e18e4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Sep 2023 17:25:10 +0200 Subject: wifi: mac80211: split ieee80211_drop_unencrypted_mgmt() return value This has many different reasons, split the return value into the individual reasons for better traceability. Also, since symbolic tracing doesn't work for these, add a few comments for the numbering. Signed-off-by: Johannes Berg --- net/mac80211/drop.h | 9 +++++++++ net/mac80211/rx.c | 21 +++++++++------------ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/drop.h b/net/mac80211/drop.h index 725a07a5b614..3acc21ae9c69 100644 --- a/net/mac80211/drop.h +++ b/net/mac80211/drop.h @@ -18,6 +18,7 @@ typedef unsigned int __bitwise ieee80211_rx_result; /* this line for the trailing \ - add before this */ #define MAC80211_DROP_REASONS_UNUSABLE(R) \ + /* 0x00 == ___RX_DROP_UNUSABLE */ \ R(RX_DROP_U_MIC_FAIL) \ R(RX_DROP_U_REPLAY) \ R(RX_DROP_U_BAD_MMIE) \ @@ -33,8 +34,15 @@ typedef unsigned int __bitwise ieee80211_rx_result; R(RX_DROP_U_BAD_AMSDU) \ R(RX_DROP_U_BAD_AMSDU_CIPHER) \ R(RX_DROP_U_INVALID_8023) \ + /* 0x10 */ \ R(RX_DROP_U_RUNT_ACTION) \ R(RX_DROP_U_UNPROT_ACTION) \ + R(RX_DROP_U_UNPROT_DUAL) \ + R(RX_DROP_U_UNPROT_UCAST_MGMT) \ + R(RX_DROP_U_UNPROT_MCAST_MGMT) \ + R(RX_DROP_U_UNPROT_BEACON) \ + R(RX_DROP_U_UNPROT_UNICAST_PUB_ACTION) \ + R(RX_DROP_U_UNPROT_ROBUST_ACTION) \ R(RX_DROP_U_ACTION_UNKNOWN_SRC) \ R(RX_DROP_U_REJECTED_ACTION_RESPONSE) \ R(RX_DROP_U_EXPECT_DEFRAG_PROT) \ @@ -43,6 +51,7 @@ typedef unsigned int __bitwise ieee80211_rx_result; R(RX_DROP_U_NO_ICV) \ R(RX_DROP_U_AP_RX_GROUPCAST) \ R(RX_DROP_U_SHORT_MMIC) \ + /* 0x20 */ \ R(RX_DROP_U_MMIC_FAIL) \ R(RX_DROP_U_SHORT_TKIP) \ R(RX_DROP_U_TKIP_FAIL) \ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1851b86fc5fd..ff98681c70e3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2416,12 +2416,12 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) * decrypted them already. */ if (status->flag & RX_FLAG_DECRYPTED) - return 0; + return RX_CONTINUE; /* drop unicast protected dual (that wasn't protected) */ if (ieee80211_is_action(fc) && mgmt->u.action.category == WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION) - return -EACCES; + return RX_DROP_U_UNPROT_DUAL; if (rx->sta && test_sta_flag(rx->sta, WLAN_STA_MFP)) { if (unlikely(!ieee80211_has_protected(fc) && @@ -2433,13 +2433,13 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) * during 4-way-HS (key is installed after HS). */ if (!rx->key) - return 0; + return RX_CONTINUE; cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, rx->skb->data, rx->skb->len); } - return -EACCES; + return RX_DROP_U_UNPROT_UCAST_MGMT; } /* BIP does not use Protected field, so need to check MMIE */ if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) && @@ -2449,14 +2449,14 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, rx->skb->data, rx->skb->len); - return -EACCES; + return RX_DROP_U_UNPROT_MCAST_MGMT; } if (unlikely(ieee80211_is_beacon(fc) && rx->key && ieee80211_get_mmie_keyidx(rx->skb) < 0)) { cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, rx->skb->data, rx->skb->len); - return -EACCES; + return RX_DROP_U_UNPROT_BEACON; } /* * When using MFP, Action frames are not allowed prior to @@ -2464,13 +2464,13 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) */ if (unlikely(ieee80211_is_action(fc) && !rx->key && ieee80211_is_robust_mgmt_frame(rx->skb))) - return -EACCES; + return RX_DROP_U_UNPROT_ACTION; /* drop unicast public action frames when using MPF */ if (is_unicast_ether_addr(mgmt->da) && ieee80211_is_public_action((void *)rx->skb->data, rx->skb->len)) - return -EACCES; + return RX_DROP_U_UNPROT_UNICAST_PUB_ACTION; } return 0; @@ -3400,10 +3400,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) rx->flags |= IEEE80211_RX_BEACON_REPORTED; } - if (ieee80211_drop_unencrypted_mgmt(rx)) - return RX_DROP_U_UNPROT_ACTION; - - return RX_CONTINUE; + return ieee80211_drop_unencrypted_mgmt(rx); } static bool -- cgit v1.2.3 From 2a1c5c7de468801d414dcb4410aba32c3ee7207b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Sep 2023 17:25:11 +0200 Subject: wifi: mac80211: expand __ieee80211_data_to_8023() status Make __ieee80211_data_to_8023() return more individual drop reasons instead of just doing RX_DROP_U_INVALID_8023. Signed-off-by: Johannes Berg --- net/mac80211/drop.h | 4 ++++ net/mac80211/rx.c | 22 ++++++++++------------ 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/drop.h b/net/mac80211/drop.h index 3acc21ae9c69..12a6f0e9eca6 100644 --- a/net/mac80211/drop.h +++ b/net/mac80211/drop.h @@ -62,6 +62,10 @@ typedef unsigned int __bitwise ieee80211_rx_result; R(RX_DROP_U_SHORT_CMAC) \ R(RX_DROP_U_SHORT_CMAC256) \ R(RX_DROP_U_SHORT_GMAC) \ + R(RX_DROP_U_UNEXPECTED_VLAN_4ADDR) \ + R(RX_DROP_U_UNEXPECTED_STA_4ADDR) \ + R(RX_DROP_U_UNEXPECTED_VLAN_MCAST) \ + R(RX_DROP_U_NOT_PORT_CONTROL) \ /* this line for the trailing \ - add before this */ /* having two enums allows for checking ieee80211_rx_result use with sparse */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ff98681c70e3..fb2d4a7436be 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2476,7 +2476,7 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) return 0; } -static int +static ieee80211_rx_result __ieee80211_data_to_8023(struct ieee80211_rx_data *rx, bool *port_control) { struct ieee80211_sub_if_data *sdata = rx->sdata; @@ -2488,32 +2488,31 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx, bool *port_control) *port_control = false; if (ieee80211_has_a4(hdr->frame_control) && sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta) - return -1; + return RX_DROP_U_UNEXPECTED_VLAN_4ADDR; if (sdata->vif.type == NL80211_IFTYPE_STATION && !!sdata->u.mgd.use_4addr != !!ieee80211_has_a4(hdr->frame_control)) { - if (!sdata->u.mgd.use_4addr) - return -1; + return RX_DROP_U_UNEXPECTED_STA_4ADDR; else if (!ether_addr_equal(hdr->addr1, sdata->vif.addr)) check_port_control = true; } if (is_multicast_ether_addr(hdr->addr1) && sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) - return -1; + return RX_DROP_U_UNEXPECTED_VLAN_MCAST; ret = ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type); if (ret < 0) - return ret; + return RX_DROP_U_INVALID_8023; ehdr = (struct ethhdr *) rx->skb->data; if (ehdr->h_proto == rx->sdata->control_port_protocol) *port_control = true; else if (check_port_control) - return -1; + return RX_DROP_U_NOT_PORT_CONTROL; - return 0; + return RX_CONTINUE; } bool ieee80211_is_our_addr(struct ieee80211_sub_if_data *sdata, @@ -3124,7 +3123,6 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) __le16 fc = hdr->frame_control; ieee80211_rx_result res; bool port_control; - int err; if (unlikely(!ieee80211_is_data(hdr->frame_control))) return RX_CONTINUE; @@ -3145,9 +3143,9 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) return RX_DROP_MONITOR; } - err = __ieee80211_data_to_8023(rx, &port_control); - if (unlikely(err)) - return RX_DROP_U_INVALID_8023; + res = __ieee80211_data_to_8023(rx, &port_control); + if (unlikely(res != RX_CONTINUE)) + return res; res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb); if (res != RX_CONTINUE) -- cgit v1.2.3 From 057708a9ca5930d4d9a456c29010f4f90ae760b7 Mon Sep 17 00:00:00 2001 From: Liang Chen Date: Wed, 20 Sep 2023 20:56:57 +0800 Subject: pktgen: Automate flag enumeration for unknown flag handling When specifying an unknown flag, it will print all available flags. Currently, these flags are provided as fixed strings, which requires manual updates when flags change. Replacing it with automated flag enumeration. Signed-off-by: Liang Chen Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20230920125658.46978-1-liangchen.linux@gmail.com Signed-off-by: Paolo Abeni --- net/core/pktgen.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f56b8d697014..48306a101fd9 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1318,9 +1318,10 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "flag")) { + bool disable = false; __u32 flag; char f[32]; - bool disable = false; + char *end; memset(f, 0, 32); len = strn_len(&user_buffer[i], sizeof(f) - 1); @@ -1332,28 +1333,33 @@ static ssize_t pktgen_if_write(struct file *file, i += len; flag = pktgen_read_flag(f, &disable); - if (flag) { if (disable) pkt_dev->flags &= ~flag; else pkt_dev->flags |= flag; - } else { - sprintf(pg_result, - "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", - f, - "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " - "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, " - "MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, " - "QUEUE_MAP_RND, QUEUE_MAP_CPU, UDPCSUM, " - "NO_TIMESTAMP, " -#ifdef CONFIG_XFRM - "IPSEC, " -#endif - "NODE_ALLOC\n"); + + sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); return count; } - sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); + + /* Unknown flag */ + end = pkt_dev->result + sizeof(pkt_dev->result); + pg_result += sprintf(pg_result, + "Flag -:%s:- unknown\n" + "Available flags, (prepend ! to un-set flag):\n", f); + + for (int n = 0; n < NR_PKT_FLAGS && pg_result < end; n++) { + if (!IS_ENABLED(CONFIG_XFRM) && n == IPSEC_SHIFT) + continue; + pg_result += snprintf(pg_result, end - pg_result, + "%s, ", pkt_flag_names[n]); + } + if (!WARN_ON_ONCE(pg_result >= end)) { + /* Remove the comma and whitespace at the end */ + *(pg_result - 2) = '\0'; + } + return count; } if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) { -- cgit v1.2.3 From 7c7dd1d64910d07ab36b858d53d00e89b6d918d6 Mon Sep 17 00:00:00 2001 From: Liang Chen Date: Wed, 20 Sep 2023 20:56:58 +0800 Subject: pktgen: Introducing 'SHARED' flag for testing with non-shared skb Currently, skbs generated by pktgen always have their reference count incremented before transmission, causing their reference count to be always greater than 1, leading to two issues: 1. Only the code paths for shared skbs can be tested. 2. In certain situations, skbs can only be released by pktgen. To enhance testing comprehensiveness, we are introducing the "SHARED" flag to indicate whether an SKB is shared. This flag is enabled by default, aligning with the current behavior. However, disabling this flag allows skbs with a reference count of 1 to be transmitted. So we can test non-shared skbs and code paths where skbs are released within the stack. Signed-off-by: Liang Chen Reviewed-by: Benjamin Poirier Link: https://lore.kernel.org/r/20230920125658.46978-2-liangchen.linux@gmail.com Signed-off-by: Paolo Abeni --- Documentation/networking/pktgen.rst | 12 +++++++ net/core/pktgen.c | 64 +++++++++++++++++++++++++++++++------ 2 files changed, 66 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/Documentation/networking/pktgen.rst b/Documentation/networking/pktgen.rst index 1225f0f63ff0..c945218946e1 100644 --- a/Documentation/networking/pktgen.rst +++ b/Documentation/networking/pktgen.rst @@ -178,6 +178,7 @@ Examples:: IPSEC # IPsec encapsulation (needs CONFIG_XFRM) NODE_ALLOC # node specific memory allocation NO_TIMESTAMP # disable timestamping + SHARED # enable shared SKB pgset 'flag ![name]' Clear a flag to determine behaviour. Note that you might need to use single quote in interactive mode, so that your shell wouldn't expand @@ -288,6 +289,16 @@ To avoid breaking existing testbed scripts for using AH type and tunnel mode, you can use "pgset spi SPI_VALUE" to specify which transformation mode to employ. +Disable shared SKB +================== +By default, SKBs sent by pktgen are shared (user count > 1). +To test with non-shared SKBs, remove the "SHARED" flag by simply setting:: + + pg_set "flag !SHARED" + +However, if the "clone_skb" or "burst" parameters are configured, the skb +still needs to be held by pktgen for further access. Hence the skb must be +shared. Current commands and configuration options ========================================== @@ -357,6 +368,7 @@ Current commands and configuration options IPSEC NODE_ALLOC NO_TIMESTAMP + SHARED spi (ipsec) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 48306a101fd9..5e865af82e5b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -200,6 +200,7 @@ pf(VID_RND) /* Random VLAN ID */ \ pf(SVID_RND) /* Random SVLAN ID */ \ pf(NODE) /* Node memory alloc*/ \ + pf(SHARED) /* Shared SKB */ \ #define pf(flag) flag##_SHIFT, enum pkt_flags { @@ -1198,7 +1199,8 @@ static ssize_t pktgen_if_write(struct file *file, ((pkt_dev->xmit_mode == M_NETIF_RECEIVE) || !(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) return -ENOTSUPP; - if (value > 0 && pkt_dev->n_imix_entries > 0) + if (value > 0 && (pkt_dev->n_imix_entries > 0 || + !(pkt_dev->flags & F_SHARED))) return -EINVAL; i += len; @@ -1257,6 +1259,10 @@ static ssize_t pktgen_if_write(struct file *file, ((pkt_dev->xmit_mode == M_START_XMIT) && (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))))) return -ENOTSUPP; + + if (value > 1 && !(pkt_dev->flags & F_SHARED)) + return -EINVAL; + pkt_dev->burst = value < 1 ? 1 : value; sprintf(pg_result, "OK: burst=%u", pkt_dev->burst); return count; @@ -1334,10 +1340,19 @@ static ssize_t pktgen_if_write(struct file *file, flag = pktgen_read_flag(f, &disable); if (flag) { - if (disable) + if (disable) { + /* If "clone_skb", or "burst" parameters are + * configured, it means that the skb still + * needs to be referenced by the pktgen, so + * the skb must be shared. + */ + if (flag == F_SHARED && (pkt_dev->clone_skb || + pkt_dev->burst > 1)) + return -EINVAL; pkt_dev->flags &= ~flag; - else + } else { pkt_dev->flags |= flag; + } sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); return count; @@ -3446,12 +3461,24 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) static void pktgen_xmit(struct pktgen_dev *pkt_dev) { - unsigned int burst = READ_ONCE(pkt_dev->burst); + bool skb_shared = !!(READ_ONCE(pkt_dev->flags) & F_SHARED); struct net_device *odev = pkt_dev->odev; struct netdev_queue *txq; + unsigned int burst = 1; struct sk_buff *skb; + int clone_skb = 0; int ret; + /* If 'skb_shared' is false, the read of possible + * new values (if any) for 'burst' and 'clone_skb' will be skipped to + * prevent some concurrent changes from slipping in. And the stabilized + * config will be read in during the next run of pktgen_xmit. + */ + if (skb_shared) { + burst = READ_ONCE(pkt_dev->burst); + clone_skb = READ_ONCE(pkt_dev->clone_skb); + } + /* If device is offline, then don't send */ if (unlikely(!netif_running(odev) || !netif_carrier_ok(odev))) { pktgen_stop_device(pkt_dev); @@ -3468,7 +3495,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) /* If no skb or clone count exhausted then get new one */ if (!pkt_dev->skb || (pkt_dev->last_ok && - ++pkt_dev->clone_count >= pkt_dev->clone_skb)) { + ++pkt_dev->clone_count >= clone_skb)) { /* build a new pkt */ kfree_skb(pkt_dev->skb); @@ -3489,7 +3516,8 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) { skb = pkt_dev->skb; skb->protocol = eth_type_trans(skb, skb->dev); - refcount_add(burst, &skb->users); + if (skb_shared) + refcount_add(burst, &skb->users); local_bh_disable(); do { ret = netif_receive_skb(skb); @@ -3497,6 +3525,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->errors++; pkt_dev->sofar++; pkt_dev->seq_num++; + if (unlikely(!skb_shared)) { + pkt_dev->skb = NULL; + break; + } if (refcount_read(&skb->users) != burst) { /* skb was queued by rps/rfs or taps, * so cannot reuse this skb @@ -3515,9 +3547,14 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) goto out; /* Skips xmit_mode M_START_XMIT */ } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) { local_bh_disable(); - refcount_inc(&pkt_dev->skb->users); + if (skb_shared) + refcount_inc(&pkt_dev->skb->users); ret = dev_queue_xmit(pkt_dev->skb); + + if (!skb_shared && dev_xmit_complete(ret)) + pkt_dev->skb = NULL; + switch (ret) { case NET_XMIT_SUCCESS: pkt_dev->sofar++; @@ -3555,11 +3592,15 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->last_ok = 0; goto unlock; } - refcount_add(burst, &pkt_dev->skb->users); + if (skb_shared) + refcount_add(burst, &pkt_dev->skb->users); xmit_more: ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0); + if (!skb_shared && dev_xmit_complete(ret)) + pkt_dev->skb = NULL; + switch (ret) { case NETDEV_TX_OK: pkt_dev->last_ok = 1; @@ -3581,7 +3622,8 @@ xmit_more: fallthrough; case NETDEV_TX_BUSY: /* Retry it next time */ - refcount_dec(&(pkt_dev->skb->users)); + if (skb_shared) + refcount_dec(&pkt_dev->skb->users); pkt_dev->last_ok = 0; } if (unlikely(burst)) @@ -3594,7 +3636,8 @@ out: /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { - pktgen_wait_for_skb(pkt_dev); + if (pkt_dev->skb) + pktgen_wait_for_skb(pkt_dev); /* Done with this */ pktgen_stop_device(pkt_dev); @@ -3777,6 +3820,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->svlan_id = 0xffff; pkt_dev->burst = 1; pkt_dev->node = NUMA_NO_NODE; + pkt_dev->flags = F_SHARED; /* SKB shared by default */ err = pktgen_setup_dev(t->net, pkt_dev, ifname); if (err) -- cgit v1.2.3 From e27c3295114bb6a6dc6d58a38f8503c0ea97aa6b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Aug 2023 11:52:40 +0200 Subject: netfilter: nf_nat: undo erroneous tcp edemux lookup after port clash In commit 03a3ca37e4c6 ("netfilter: nf_nat: undo erroneous tcp edemux lookup") I fixed a problem with source port clash resolution and DNAT. A very similar issue exists with REDIRECT (DNAT to local address) and port rewrites. Consider two port redirections done at prerouting hook: -p tcp --port 1111 -j REDIRECT --to-ports 80 -p tcp --port 1112 -j REDIRECT --to-ports 80 Its possible, however unlikely, that we get two connections sharing the same source port, i.e. saddr:12345 -> daddr:1111 saddr:12345 -> daddr:1112 This works on sender side because destination address is different. After prerouting, nat will change first syn packet to saddr:12345 -> daddr:80, stack will send a syn-ack back and 3whs completes. The second syn however will result in a source port clash: after dnat rewrite, new syn has saddr:12345 -> daddr:80 This collides with the reply direction of the first connection. The NAT engine will handle this in the input nat hook by also altering the source port, so we get for example saddr:13535 -> daddr:80 This allows the stack to send back a syn-ack to that address. Reverse NAT during POSTROUTING will rewrite the packet to daddr:1112 -> saddr:12345 again. Tuple will be unique on-wire and peer can process it normally. Problem is when ACK packet comes in: After prerouting, packet payload is mangled to saddr:12345 -> daddr:80. Early demux will assign the 3whs-completing ACK skb to the first connections' established socket. This will then elicit a challenge ack from the first connections' socket rather than complete the connection of the second. The second connection can never complete. Detect this condition by checking if the associated sockets port matches the conntrack entries reply tuple. If it doesn't, then input source address translation mangled payload after early demux and the found sk is incorrect. Discard this sk and let TCP stack do another lookup. Signed-off-by: Florian Westphal --- net/netfilter/nf_nat_proto.c | 64 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 48cc60084d28..5a049740758f 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -697,6 +697,31 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int } #endif +static bool nf_nat_inet_port_was_mangled(const struct sk_buff *skb, __be16 sport) +{ + enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; + const struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return false; + + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + case IPPROTO_UDP: + break; + default: + return false; + } + + dir = CTINFO2DIR(ctinfo); + if (dir != IP_CT_DIR_ORIGINAL) + return false; + + return ct->tuplehash[!dir].tuple.dst.u.all != sport; +} + static unsigned int nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -707,8 +732,20 @@ nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb, ret = nf_nat_ipv4_fn(priv, skb, state); - if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr && - !inet_sk_transparent(sk)) + if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk)) + return ret; + + /* skb has a socket assigned via tcp edemux. We need to check + * if nf_nat_ipv4_fn() has mangled the packet in a way that + * edemux would not have found this socket. + * + * This includes both changes to the source address and changes + * to the source port, which are both handled by the + * nf_nat_ipv4_fn() call above -- long after tcp/udp early demux + * might have found a socket for the old (pre-snat) address. + */ + if (saddr != ip_hdr(skb)->saddr || + nf_nat_inet_port_was_mangled(skb, sk->sk_dport)) skb_orphan(skb); /* TCP edemux obtained wrong socket */ return ret; @@ -937,6 +974,27 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, return nf_nat_inet_fn(priv, skb, state); } +static unsigned int +nf_nat_ipv6_local_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct in6_addr saddr = ipv6_hdr(skb)->saddr; + struct sock *sk = skb->sk; + unsigned int ret; + + ret = nf_nat_ipv6_fn(priv, skb, state); + + if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk)) + return ret; + + /* see nf_nat_ipv4_local_in */ + if (ipv6_addr_cmp(&saddr, &ipv6_hdr(skb)->saddr) || + nf_nat_inet_port_was_mangled(skb, sk->sk_dport)) + skb_orphan(skb); + + return ret; +} + static unsigned int nf_nat_ipv6_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -1051,7 +1109,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = { }, /* After packet filtering, change source */ { - .hook = nf_nat_ipv6_fn, + .hook = nf_nat_ipv6_local_in, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC, -- cgit v1.2.3 From aee1f692bfeda9e5c3a40cbc165d80de0ffb0879 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 22 Aug 2023 19:11:17 +0200 Subject: netfilter: nf_tables: missing extended netlink error in lookup functions Set netlink extended error reporting for several lookup functions which allows userspace to infer what is the error cause. Reported-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4356189360fb..f993c237afd0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4695,8 +4695,10 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info, return -EINVAL; set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask); - if (IS_ERR(set)) + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); return PTR_ERR(set); + } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb2 == NULL) @@ -6025,8 +6027,10 @@ static int nf_tables_getsetelem(struct sk_buff *skb, } set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); - if (IS_ERR(set)) + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); + } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); @@ -6919,8 +6923,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb, set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET], nla[NFTA_SET_ELEM_LIST_SET_ID], genmask); - if (IS_ERR(set)) + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); + } if (!list_empty(&set->bindings) && (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) @@ -7195,8 +7201,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb, } set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); - if (IS_ERR(set)) + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); + } if (nft_set_is_anonymous(set)) return -EOPNOTSUPP; @@ -8680,6 +8688,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { + struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; struct nft_flowtable *flowtable; @@ -8705,13 +8714,17 @@ static int nf_tables_getflowtable(struct sk_buff *skb, table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family, genmask, 0); - if (IS_ERR(table)) + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]); return PTR_ERR(table); + } flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], genmask); - if (IS_ERR(flowtable)) + if (IS_ERR(flowtable)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]); return PTR_ERR(flowtable); + } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) -- cgit v1.2.3 From 013714bf3e125a218bb02c938ff6df348dda743e Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 1 Sep 2023 14:16:15 +0200 Subject: netfilter: nf_tables: Utilize NLA_POLICY_NESTED_ARRAY Mark attributes which are supposed to be arrays of nested attributes with known content as such. Originally suggested for NFTA_RULE_EXPRESSIONS only, but does apply to others as well. Suggested-by: Florian Westphal Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f993c237afd0..7e2e76086d25 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3316,7 +3316,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { [NFTA_RULE_CHAIN] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, - [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_RULE_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, [NFTA_RULE_POSITION] = { .type = NLA_U64 }, [NFTA_RULE_USERDATA] = { .type = NLA_BINARY, @@ -4254,12 +4254,16 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 }, [NFTA_SET_HANDLE] = { .type = NLA_U64 }, [NFTA_SET_EXPR] = { .type = NLA_NESTED }, - [NFTA_SET_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_SET_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), +}; + +static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = { + [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 }, - [NFTA_SET_DESC_CONCAT] = { .type = NLA_NESTED }, + [NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy), }; static struct nft_set *nft_set_lookup(const struct nft_table *table, @@ -4715,10 +4719,6 @@ err_fill_set_info: return err; } -static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = { - [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 }, -}; - static int nft_set_desc_concat_parse(const struct nlattr *attr, struct nft_set_desc *desc) { @@ -5500,7 +5500,7 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { [NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING, .len = NFT_OBJ_MAXNAMELEN - 1 }, [NFTA_SET_ELEM_KEY_END] = { .type = NLA_NESTED }, - [NFTA_SET_ELEM_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), }; static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { @@ -5508,7 +5508,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, - [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_LIST_ELEMENTS] = NLA_POLICY_NESTED_ARRAY(nft_set_elem_policy), [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 }, }; -- cgit v1.2.3 From c419d884551fa4f000996ace22ab498415afad2e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 29 Sep 2023 16:21:00 +0200 Subject: wifi: mac80211: fix ieee80211_drop_unencrypted_mgmt return type/value Somehow, I managed to botch this and pretty much completely break wifi. My original patch did contain these changes, but I seem to have lost them before sending to the list. Fix it now. Reported-and-tested-by: Kalle Valo Fixes: 6c02fab72429 ("wifi: mac80211: split ieee80211_drop_unencrypted_mgmt() return value") Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index fb2d4a7436be..051db97a92b4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2405,7 +2405,8 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) return 0; } -static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) +static ieee80211_rx_result +ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; @@ -2473,7 +2474,7 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) return RX_DROP_U_UNPROT_UNICAST_PUB_ACTION; } - return 0; + return RX_CONTINUE; } static ieee80211_rx_result -- cgit v1.2.3 From aa75cc029e053627743fba2cde8a73519abe8421 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 29 Sep 2023 15:58:14 +0200 Subject: wifi: mac80211: add back SPDX identifier Looks like I lost that by accident, add it back. Fixes: 076fc8775daf ("wifi: cfg80211: remove wdev mutex") Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6a078eb23a5d..2ac36ad9fa91 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * BSS client mode implementation * Copyright 2003-2008, Jouni Malinen -- cgit v1.2.3 From bbf80d713fe75cfbecda26e7c03a9a8d22af2f4f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Sep 2023 17:29:43 +0000 Subject: tcp: derive delack_max from rto_min While BPF allows to set icsk->->icsk_delack_max and/or icsk->icsk_rto_min, we have an ip route attribute (RTAX_RTO_MIN) to be able to tune rto_min, but nothing to consequently adjust max delayed ack, which vary from 40ms to 200 ms (TCP_DELACK_{MIN|MAX}). This makes RTAX_RTO_MIN of almost no practical use, unless customers are in big trouble. Modern days datacenter communications want to set rto_min to ~5 ms, and the max delayed ack one jiffie smaller to avoid spurious retransmits. After this patch, an "rto_min 5" route attribute will effectively lower max delayed ack timers to 4 ms. Note in the following ss output, "rto:6 ... ato:4" $ ss -temoi dst XXXXXX State Recv-Q Send-Q Local Address:Port Peer Address:Port Process ESTAB 0 0 [2002:a05:6608:295::]:52950 [2002:a05:6608:297::]:41597 ino:255134 sk:1001 <-> skmem:(r0,rb1707063,t872,tb262144,f0,w0,o0,bl0,d0) ts sack cubic wscale:8,8 rto:6 rtt:0.02/0.002 ato:4 mss:4096 pmtu:4500 rcvmss:536 advmss:4096 cwnd:10 bytes_sent:54823160 bytes_acked:54823121 bytes_received:54823120 segs_out:1370582 segs_in:1370580 data_segs_out:1370579 data_segs_in:1370578 send 16.4Gbps pacing_rate 32.6Gbps delivery_rate 1.72Gbps delivered:1370579 busy:26920ms unacked:1 rcv_rtt:34.615 rcv_space:65920 rcv_ssthresh:65535 minrtt:0.015 snd_wnd:65536 While we could argue this patch fixes a bug with RTAX_RTO_MIN, I do not add a Fixes: tag, so that we can soak it a bit before asking backports to stable branches. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_output.c | 16 +++++++++++++++- 3 files changed, 19 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index a8db7d43fb62..af9cb37fbe53 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -718,6 +718,8 @@ static inline void tcp_fast_path_check(struct sock *sk) tcp_fast_path_on(tp); } +u32 tcp_delack_max(const struct sock *sk); + /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(const struct sock *sk) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 69b8d7073708..e54f91eb943b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3762,7 +3762,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_options |= TCPI_OPT_SYN_DATA; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); - info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); + info->tcpi_ato = jiffies_to_usecs(min(icsk->icsk_ack.ato, + tcp_delack_max(sk))); info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1fc1f879cfd6..2d1e4b5ac1ca 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3977,6 +3977,20 @@ int tcp_connect(struct sock *sk) } EXPORT_SYMBOL(tcp_connect); +u32 tcp_delack_max(const struct sock *sk) +{ + const struct dst_entry *dst = __sk_dst_get(sk); + u32 delack_max = inet_csk(sk)->icsk_delack_max; + + if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) { + u32 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); + u32 delack_from_rto_min = max_t(int, 1, rto_min - 1); + + delack_max = min_t(u32, delack_max, delack_from_rto_min); + } + return delack_max; +} + /* Send out a delayed ack, the caller does the policy checking * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() * for details. @@ -4012,7 +4026,7 @@ void tcp_send_delayed_ack(struct sock *sk) ato = min(ato, max_ato); } - ato = min_t(u32, ato, inet_csk(sk)->icsk_delack_max); + ato = min_t(u32, ato, tcp_delack_max(sk)); /* Stay within the limit we were given */ timeout = jiffies + ato; -- cgit v1.2.3 From 54ff8ad69c6e93c0767451ae170b41c000e565dd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Sep 2023 20:17:12 +0000 Subject: net_sched: sch_fq: struct sched_data reorg q->flows can be often modified, and q->timer_slack is read mostly. Exchange the two fields, so that cache line countaining quantum, initial_quantum, and other critical parameters stay clean (read-mostly). Move q->watchdog next to q->stat_throttled Add comments explaining how the structure is split in three different parts. pahole output before the patch: struct fq_sched_data { struct fq_flow_head new_flows; /* 0 0x10 */ struct fq_flow_head old_flows; /* 0x10 0x10 */ struct rb_root delayed; /* 0x20 0x8 */ u64 time_next_delayed_flow; /* 0x28 0x8 */ u64 ktime_cache; /* 0x30 0x8 */ unsigned long unthrottle_latency_ns; /* 0x38 0x8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct fq_flow internal __attribute__((__aligned__(64))); /* 0x40 0x80 */ /* XXX last struct has 16 bytes of padding */ /* --- cacheline 3 boundary (192 bytes) --- */ u32 quantum; /* 0xc0 0x4 */ u32 initial_quantum; /* 0xc4 0x4 */ u32 flow_refill_delay; /* 0xc8 0x4 */ u32 flow_plimit; /* 0xcc 0x4 */ unsigned long flow_max_rate; /* 0xd0 0x8 */ u64 ce_threshold; /* 0xd8 0x8 */ u64 horizon; /* 0xe0 0x8 */ u32 orphan_mask; /* 0xe8 0x4 */ u32 low_rate_threshold; /* 0xec 0x4 */ struct rb_root * fq_root; /* 0xf0 0x8 */ u8 rate_enable; /* 0xf8 0x1 */ u8 fq_trees_log; /* 0xf9 0x1 */ u8 horizon_drop; /* 0xfa 0x1 */ /* XXX 1 byte hole, try to pack */ u32 flows; /* 0xfc 0x4 */ /* --- cacheline 4 boundary (256 bytes) --- */ u32 inactive_flows; /* 0x100 0x4 */ u32 throttled_flows; /* 0x104 0x4 */ u64 stat_gc_flows; /* 0x108 0x8 */ u64 stat_internal_packets; /* 0x110 0x8 */ u64 stat_throttled; /* 0x118 0x8 */ u64 stat_ce_mark; /* 0x120 0x8 */ u64 stat_horizon_drops; /* 0x128 0x8 */ u64 stat_horizon_caps; /* 0x130 0x8 */ u64 stat_flows_plimit; /* 0x138 0x8 */ /* --- cacheline 5 boundary (320 bytes) --- */ u64 stat_pkts_too_long; /* 0x140 0x8 */ u64 stat_allocation_errors; /* 0x148 0x8 */ u32 timer_slack; /* 0x150 0x4 */ /* XXX 4 bytes hole, try to pack */ struct qdisc_watchdog watchdog; /* 0x158 0x48 */ /* size: 448, cachelines: 7, members: 34 */ /* sum members: 411, holes: 2, sum holes: 5 */ /* padding: 32 */ /* paddings: 1, sum paddings: 16 */ /* forced alignments: 1 */ }; pahole output after the patch: struct fq_sched_data { struct fq_flow_head new_flows; /* 0 0x10 */ struct fq_flow_head old_flows; /* 0x10 0x10 */ struct rb_root delayed; /* 0x20 0x8 */ u64 time_next_delayed_flow; /* 0x28 0x8 */ u64 ktime_cache; /* 0x30 0x8 */ unsigned long unthrottle_latency_ns; /* 0x38 0x8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct fq_flow internal __attribute__((__aligned__(64))); /* 0x40 0x80 */ /* XXX last struct has 16 bytes of padding */ /* --- cacheline 3 boundary (192 bytes) --- */ u32 quantum; /* 0xc0 0x4 */ u32 initial_quantum; /* 0xc4 0x4 */ u32 flow_refill_delay; /* 0xc8 0x4 */ u32 flow_plimit; /* 0xcc 0x4 */ unsigned long flow_max_rate; /* 0xd0 0x8 */ u64 ce_threshold; /* 0xd8 0x8 */ u64 horizon; /* 0xe0 0x8 */ u32 orphan_mask; /* 0xe8 0x4 */ u32 low_rate_threshold; /* 0xec 0x4 */ struct rb_root * fq_root; /* 0xf0 0x8 */ u8 rate_enable; /* 0xf8 0x1 */ u8 fq_trees_log; /* 0xf9 0x1 */ u8 horizon_drop; /* 0xfa 0x1 */ /* XXX 1 byte hole, try to pack */ u32 timer_slack; /* 0xfc 0x4 */ /* --- cacheline 4 boundary (256 bytes) --- */ u32 flows; /* 0x100 0x4 */ u32 inactive_flows; /* 0x104 0x4 */ u32 throttled_flows; /* 0x108 0x4 */ /* XXX 4 bytes hole, try to pack */ u64 stat_throttled; /* 0x110 0x8 */ struct qdisc_watchdog watchdog; /* 0x118 0x48 */ /* --- cacheline 5 boundary (320 bytes) was 32 bytes ago --- */ u64 stat_gc_flows; /* 0x160 0x8 */ u64 stat_internal_packets; /* 0x168 0x8 */ u64 stat_ce_mark; /* 0x170 0x8 */ u64 stat_horizon_drops; /* 0x178 0x8 */ /* --- cacheline 6 boundary (384 bytes) --- */ u64 stat_horizon_caps; /* 0x180 0x8 */ u64 stat_flows_plimit; /* 0x188 0x8 */ u64 stat_pkts_too_long; /* 0x190 0x8 */ u64 stat_allocation_errors; /* 0x198 0x8 */ /* Force padding: */ u64 :64; u64 :64; u64 :64; u64 :64; /* size: 448, cachelines: 7, members: 34 */ /* sum members: 411, holes: 2, sum holes: 5 */ /* padding: 32 */ /* paddings: 1, sum paddings: 16 */ /* forced alignments: 1 */ }; Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index f59a2cb2c803..230300aac3ed 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -104,6 +104,9 @@ struct fq_sched_data { unsigned long unthrottle_latency_ns; struct fq_flow internal; /* for non classified or high prio packets */ + +/* Read mostly cache line */ + u32 quantum; u32 initial_quantum; u32 flow_refill_delay; @@ -117,22 +120,27 @@ struct fq_sched_data { u8 rate_enable; u8 fq_trees_log; u8 horizon_drop; + u32 timer_slack; /* hrtimer slack in ns */ + +/* Read/Write fields. */ + u32 flows; u32 inactive_flows; u32 throttled_flows; + u64 stat_throttled; + struct qdisc_watchdog watchdog; u64 stat_gc_flows; + +/* Seldom used fields. */ + u64 stat_internal_packets; - u64 stat_throttled; u64 stat_ce_mark; u64 stat_horizon_drops; u64 stat_horizon_caps; u64 stat_flows_plimit; u64 stat_pkts_too_long; u64 stat_allocation_errors; - - u32 timer_slack; /* hrtimer slack in ns */ - struct qdisc_watchdog watchdog; }; /* -- cgit v1.2.3 From ee9af4e14d166c34ec78896bb1ba545249445df0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Sep 2023 20:17:13 +0000 Subject: net_sched: sch_fq: change how @inactive is tracked Currently, when one fq qdisc has no more packets to send, it can still have some flows stored in its RR lists (q->new_flows & q->old_flows) This was a design choice, but what is a bit disturbing is that the inactive_flows counter does not include the count of empty flows in RR lists. As next patch needs to know better if there are active flows, this change makes inactive_flows exact. Before the patch, following command on an empty qdisc could have returned: lpaa17:~# tc -s -d qd sh dev eth1 | grep inactive flows 1322 (inactive 1316 throttled 0) flows 1330 (inactive 1325 throttled 0) flows 1193 (inactive 1190 throttled 0) flows 1208 (inactive 1202 throttled 0) After the patch, we now have: lpaa17:~# tc -s -d qd sh dev eth1 | grep inactive flows 1322 (inactive 1322 throttled 0) flows 1330 (inactive 1330 throttled 0) flows 1193 (inactive 1193 throttled 0) flows 1208 (inactive 1208 throttled 0) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 230300aac3ed..4af43a401dbb 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -125,7 +125,7 @@ struct fq_sched_data { /* Read/Write fields. */ u32 flows; - u32 inactive_flows; + u32 inactive_flows; /* Flows with no packet to send. */ u32 throttled_flows; u64 stat_throttled; @@ -402,9 +402,12 @@ static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow, static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow, struct sk_buff *skb) { + struct fq_sched_data *q = qdisc_priv(sch); + fq_erase_head(sch, flow, skb); skb_mark_not_on_list(skb); - flow->qlen--; + if (--flow->qlen == 0) + q->inactive_flows++; qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } @@ -484,13 +487,13 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_drop(skb, sch, to_free); } - f->qlen++; + if (f->qlen++ == 0) + q->inactive_flows--; qdisc_qstats_backlog_inc(sch, skb); if (fq_flow_is_detached(f)) { fq_flow_add_tail(&q->new_flows, f); if (time_after(jiffies, f->age + q->flow_refill_delay)) f->credit = max_t(u32, f->credit, q->quantum); - q->inactive_flows--; } /* Note: this overwrites f->age */ @@ -597,7 +600,6 @@ begin: fq_flow_add_tail(&q->old_flows, f); } else { fq_flow_set_detached(f); - q->inactive_flows++; } goto begin; } -- cgit v1.2.3 From 076433bd78d719b34d465c1e69eef512036b534c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Sep 2023 20:17:14 +0000 Subject: net_sched: sch_fq: add fast path for mostly idle qdisc TCQ_F_CAN_BYPASS can be used by few qdiscs. Idea is that if we queue a packet to an empty qdisc, following dequeue() would pick it immediately. FQ can not use the generic TCQ_F_CAN_BYPASS code, because some additional checks need to be performed. This patch adds a similar fast path to FQ. Most of the time, qdisc is not throttled, and many packets can avoid bringing/touching at least four cache lines, and consuming 128bytes of memory to store the state of a flow. After this patch, netperf can send UDP packets about 13 % faster, and pktgen goes 30 % faster (when FQ is in the way), on a fast NIC. TCP traffic is also improved, thanks to a reduction of cache line misses. I have measured a 5 % increase of throughput on a tcp_rr intensive workload. tc -s -d qd sh dev eth1 ... qdisc fq 8004: parent 1:2 limit 10000p flow_limit 100p buckets 1024 orphan_mask 1023 quantum 3028b initial_quantum 15140b low_rate_threshold 550Kbit refill_delay 40ms timer_slack 10us horizon 10s horizon_drop Sent 5646784384 bytes 1985161 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 flows 122 (inactive 122 throttled 0) gc 0 highprio 0 fastpath 659990 throttled 27762 latency 8.57us Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 1 + net/sched/sch_fq.c | 128 +++++++++++++++++++++++++++++------------ 2 files changed, 92 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 3f85ae578056..579f641846b8 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -962,6 +962,7 @@ struct tc_fq_qd_stats { __u64 ce_mark; /* packets above ce_threshold */ __u64 horizon_drops; __u64 horizon_caps; + __u64 fastpath_packets; }; /* Heavy-Hitter Filter */ diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 4af43a401dbb..5cf3b50a24d5 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -2,7 +2,7 @@ /* * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing) * - * Copyright (C) 2013-2015 Eric Dumazet + * Copyright (C) 2013-2023 Eric Dumazet * * Meant to be mostly used for locally generated traffic : * Fast classification depends on skb->sk being set before reaching us. @@ -73,7 +73,13 @@ struct fq_flow { struct sk_buff *tail; /* last skb in the list */ unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */ }; - struct rb_node fq_node; /* anchor in fq_root[] trees */ + union { + struct rb_node fq_node; /* anchor in fq_root[] trees */ + /* Following field is only used for q->internal, + * because q->internal is not hashed in fq_root[] + */ + u64 stat_fastpath_packets; + }; struct sock *sk; u32 socket_hash; /* sk_hash */ int qlen; /* number of packets in flow queue */ @@ -134,7 +140,7 @@ struct fq_sched_data { /* Seldom used fields. */ - u64 stat_internal_packets; + u64 stat_internal_packets; /* aka highprio */ u64 stat_ce_mark; u64 stat_horizon_drops; u64 stat_horizon_caps; @@ -266,17 +272,64 @@ static void fq_gc(struct fq_sched_data *q, kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree); } -static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) +/* Fast path can be used if : + * 1) Packet tstamp is in the past. + * 2) FQ qlen == 0 OR + * (no flow is currently eligible for transmit, + * AND fast path queue has less than 8 packets) + * 3) No SO_MAX_PACING_RATE on the socket (if any). + * 4) No @maxrate attribute on this qdisc, + * + * FQ can not use generic TCQ_F_CAN_BYPASS infrastructure. + */ +static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb) +{ + const struct fq_sched_data *q = qdisc_priv(sch); + const struct sock *sk; + + if (fq_skb_cb(skb)->time_to_send > q->ktime_cache) + return false; + + if (sch->q.qlen != 0) { + /* Even if some packets are stored in this qdisc, + * we can still enable fast path if all of them are + * scheduled in the future (ie no flows are eligible) + * or in the fast path queue. + */ + if (q->flows != q->inactive_flows + q->throttled_flows) + return false; + + /* Do not allow fast path queue to explode, we want Fair Queue mode + * under pressure. + */ + if (q->internal.qlen >= 8) + return false; + } + + sk = skb->sk; + if (sk && sk_fullsock(sk) && !sk_is_tcp(sk) && + sk->sk_max_pacing_rate != ~0UL) + return false; + + if (q->flow_max_rate != ~0UL) + return false; + + return true; +} + +static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb) { + struct fq_sched_data *q = qdisc_priv(sch); struct rb_node **p, *parent; struct sock *sk = skb->sk; struct rb_root *root; struct fq_flow *f; /* warning: no starvation prevention... */ - if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) + if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) { + q->stat_internal_packets++; /* highprio packet */ return &q->internal; - + } /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket * or a listener (SYNCOOKIE mode) * 1) request sockets are not full blown, @@ -307,6 +360,11 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) sk = (struct sock *)((hash << 1) | 1UL); } + if (fq_fastpath_check(sch, skb)) { + q->internal.stat_fastpath_packets++; + return &q->internal; + } + root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)]; if (q->flows >= (2U << q->fq_trees_log) && @@ -402,12 +460,8 @@ static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow, static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow, struct sk_buff *skb) { - struct fq_sched_data *q = qdisc_priv(sch); - fq_erase_head(sch, flow, skb); skb_mark_not_on_list(skb); - if (--flow->qlen == 0) - q->inactive_flows++; qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } @@ -459,49 +513,45 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch, to_free); + q->ktime_cache = ktime_get_ns(); if (!skb->tstamp) { - fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns(); + fq_skb_cb(skb)->time_to_send = q->ktime_cache; } else { - /* Check if packet timestamp is too far in the future. - * Try first if our cached value, to avoid ktime_get_ns() - * cost in most cases. - */ + /* Check if packet timestamp is too far in the future. */ if (fq_packet_beyond_horizon(skb, q)) { - /* Refresh our cache and check another time */ - q->ktime_cache = ktime_get_ns(); - if (fq_packet_beyond_horizon(skb, q)) { - if (q->horizon_drop) { + if (q->horizon_drop) { q->stat_horizon_drops++; return qdisc_drop(skb, sch, to_free); - } - q->stat_horizon_caps++; - skb->tstamp = q->ktime_cache + q->horizon; } + q->stat_horizon_caps++; + skb->tstamp = q->ktime_cache + q->horizon; } fq_skb_cb(skb)->time_to_send = skb->tstamp; } - f = fq_classify(skb, q); - if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { - q->stat_flows_plimit++; - return qdisc_drop(skb, sch, to_free); - } + f = fq_classify(sch, skb); - if (f->qlen++ == 0) - q->inactive_flows--; - qdisc_qstats_backlog_inc(sch, skb); - if (fq_flow_is_detached(f)) { - fq_flow_add_tail(&q->new_flows, f); - if (time_after(jiffies, f->age + q->flow_refill_delay)) - f->credit = max_t(u32, f->credit, q->quantum); + if (f != &q->internal) { + if (unlikely(f->qlen >= q->flow_plimit)) { + q->stat_flows_plimit++; + return qdisc_drop(skb, sch, to_free); + } + + if (fq_flow_is_detached(f)) { + fq_flow_add_tail(&q->new_flows, f); + if (time_after(jiffies, f->age + q->flow_refill_delay)) + f->credit = max_t(u32, f->credit, q->quantum); + } + + if (f->qlen == 0) + q->inactive_flows--; } + f->qlen++; /* Note: this overwrites f->age */ flow_queue_add(f, skb); - if (unlikely(f == &q->internal)) { - q->stat_internal_packets++; - } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -549,6 +599,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) skb = fq_peek(&q->internal); if (unlikely(skb)) { + q->internal.qlen--; fq_dequeue_skb(sch, &q->internal, skb); goto out; } @@ -592,6 +643,8 @@ begin: INET_ECN_set_ce(skb); q->stat_ce_mark++; } + if (--f->qlen == 0) + q->inactive_flows++; fq_dequeue_skb(sch, f, skb); } else { head->first = f->next; @@ -1024,6 +1077,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.gc_flows = q->stat_gc_flows; st.highprio_packets = q->stat_internal_packets; + st.fastpath_packets = q->internal.stat_fastpath_packets; st.tcp_retrans = 0; st.throttled = q->stat_throttled; st.flows_plimit = q->stat_flows_plimit; -- cgit v1.2.3 From 8f6c4ff9e0522da9313fbff5295ae208af679fed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Sep 2023 20:17:15 +0000 Subject: net_sched: sch_fq: always garbage collect FQ performs garbage collection at enqueue time, and only if number of flows is above a given threshold, which is hit after the qdisc has been used a bit. Since an RB-tree traversal is needed to locate a flow, it makes sense to perform gc all the time, to keep rb-trees smaller. This reduces by 50 % average storage costs in FQ, and avoids 1 cache line miss at enqueue time when fast path added in prior patch can not be used. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 5cf3b50a24d5..681bbf34b707 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -367,9 +367,7 @@ static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb) root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)]; - if (q->flows >= (2U << q->fq_trees_log) && - q->inactive_flows > q->flows/2) - fq_gc(q, root, sk); + fq_gc(q, root, sk); p = &root->rb_node; parent = NULL; -- cgit v1.2.3 From a56d9390bd6045e8c37bcfd0586ff5f65ef22997 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 08:52:18 +0000 Subject: net: l2tp_eth: use generic dev->stats fields Core networking has opt-in atomic variant of dev->stats, simply use DEV_STATS_INC(), DEV_STATS_ADD() and DEV_STATS_READ(). v2: removed @priv local var in l2tp_eth_dev_recv() (Simon) Signed-off-by: Eric Dumazet Cc: Simon Horman Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index f2ae03c40473..25ca89f80414 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -37,12 +37,6 @@ /* via netdev_priv() */ struct l2tp_eth { struct l2tp_session *session; - atomic_long_t tx_bytes; - atomic_long_t tx_packets; - atomic_long_t tx_dropped; - atomic_long_t rx_bytes; - atomic_long_t rx_packets; - atomic_long_t rx_errors; }; /* via l2tp_session_priv() */ @@ -79,10 +73,10 @@ static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev int ret = l2tp_xmit_skb(session, skb); if (likely(ret == NET_XMIT_SUCCESS)) { - atomic_long_add(len, &priv->tx_bytes); - atomic_long_inc(&priv->tx_packets); + DEV_STATS_ADD(dev, tx_bytes, len); + DEV_STATS_INC(dev, tx_packets); } else { - atomic_long_inc(&priv->tx_dropped); + DEV_STATS_INC(dev, tx_dropped); } return NETDEV_TX_OK; } @@ -90,14 +84,12 @@ static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev static void l2tp_eth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { - struct l2tp_eth *priv = netdev_priv(dev); - - stats->tx_bytes = (unsigned long)atomic_long_read(&priv->tx_bytes); - stats->tx_packets = (unsigned long)atomic_long_read(&priv->tx_packets); - stats->tx_dropped = (unsigned long)atomic_long_read(&priv->tx_dropped); - stats->rx_bytes = (unsigned long)atomic_long_read(&priv->rx_bytes); - stats->rx_packets = (unsigned long)atomic_long_read(&priv->rx_packets); - stats->rx_errors = (unsigned long)atomic_long_read(&priv->rx_errors); + stats->tx_bytes = DEV_STATS_READ(dev, tx_bytes); + stats->tx_packets = DEV_STATS_READ(dev, tx_packets); + stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped); + stats->rx_bytes = DEV_STATS_READ(dev, rx_bytes); + stats->rx_packets = DEV_STATS_READ(dev, rx_packets); + stats->rx_errors = DEV_STATS_READ(dev, rx_errors); } static const struct net_device_ops l2tp_eth_netdev_ops = { @@ -126,7 +118,6 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, { struct l2tp_eth_sess *spriv = l2tp_session_priv(session); struct net_device *dev; - struct l2tp_eth *priv; if (!pskb_may_pull(skb, ETH_HLEN)) goto error; @@ -144,12 +135,11 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, if (!dev) goto error_rcu; - priv = netdev_priv(dev); if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { - atomic_long_inc(&priv->rx_packets); - atomic_long_add(data_len, &priv->rx_bytes); + DEV_STATS_INC(dev, rx_packets); + DEV_STATS_ADD(dev, rx_bytes, data_len); } else { - atomic_long_inc(&priv->rx_errors); + DEV_STATS_INC(dev, rx_errors); } rcu_read_unlock(); -- cgit v1.2.3 From 06bc3668cc2a6db2831b9086f0e3c6ebda599dba Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 21 Sep 2023 21:42:35 +0200 Subject: openvswitch: reduce stack usage in do_execute_actions do_execute_actions() function can be called recursively multiple times while executing actions that require pipeline forking or recirculations. It may also be re-entered multiple times if the packet leaves openvswitch module and re-enters it through a different port. Currently, there is a 256-byte array allocated on stack in this function that is supposed to hold NSH header. Compilers tend to pre-allocate that space right at the beginning of the function: a88: 48 81 ec b0 01 00 00 sub $0x1b0,%rsp NSH is not a very common protocol, but the space is allocated on every recursive call or re-entry multiplying the wasted stack space. Move the stack allocation to push_nsh() function that is only used if NSH actions are actually present. push_nsh() is also a simple function without a possibility for re-entry, so the stack is returned right away. With this change the preallocated space is reduced by 256 B per call: b18: 48 81 ec b0 00 00 00 sub $0xb0,%rsp Signed-off-by: Ilya Maximets Reviewed-by: Eric Dumazet Reviewed-by: Eelco Chaudron echaudro@redhat.com Reviewed-by: Aaron Conole Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 5f8094acd056..6fcd7e2ca81f 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -311,11 +311,18 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key, return 0; } -static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key, - const struct nshhdr *nh) +static noinline_for_stack int push_nsh(struct sk_buff *skb, + struct sw_flow_key *key, + const struct nlattr *a) { + u8 buffer[NSH_HDR_MAX_LEN]; + struct nshhdr *nh = (struct nshhdr *)buffer; int err; + err = nsh_hdr_from_nlattr(a, nh, NSH_HDR_MAX_LEN); + if (err) + return err; + err = nsh_push(skb, nh); if (err) return err; @@ -1439,17 +1446,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = pop_eth(skb, key); break; - case OVS_ACTION_ATTR_PUSH_NSH: { - u8 buffer[NSH_HDR_MAX_LEN]; - struct nshhdr *nh = (struct nshhdr *)buffer; - - err = nsh_hdr_from_nlattr(nla_data(a), nh, - NSH_HDR_MAX_LEN); - if (unlikely(err)) - break; - err = push_nsh(skb, key, nh); + case OVS_ACTION_ATTR_PUSH_NSH: + err = push_nsh(skb, key, nla_data(a)); break; - } case OVS_ACTION_ATTR_POP_NSH: err = pop_nsh(skb, key); -- cgit v1.2.3 From 10bbf1652c1cca9819e98d56f3432c56d7a2d229 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 20:28:11 +0000 Subject: net: implement lockless SO_PRIORITY This is a followup of 8bf43be799d4 ("net: annotate data-races around sk->sk_priority"). sk->sk_priority can be read and written without holding the socket lock. Signed-off-by: Eric Dumazet Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 2 +- include/net/bluetooth/bluetooth.h | 2 +- net/appletalk/aarp.c | 2 +- net/ax25/af_ax25.c | 2 +- net/bluetooth/l2cap_sock.c | 2 +- net/can/j1939/socket.c | 2 +- net/can/raw.c | 2 +- net/core/sock.c | 23 ++++++++++++----------- net/dccp/ipv6.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 2 +- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- net/mptcp/sockopt.c | 2 +- net/netrom/af_netrom.c | 2 +- net/rose/af_rose.c | 2 +- net/sched/em_meta.c | 2 +- net/sctp/ipv6.c | 2 +- net/smc/af_smc.c | 2 +- net/x25/af_x25.c | 2 +- net/xdp/xsk.c | 2 +- 24 files changed, 36 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index ba8b6bd8233c..8e7238e97d0a 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -877,7 +877,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, skb->dev = dev; - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->protocol = cpu_to_be16(ETH_P_PPP_SES); ph = skb_put(skb, total_len + sizeof(struct pppoe_hdr)); diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index aa90adc3b2a4..7ffa8c192c3f 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -541,7 +541,7 @@ static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk, return ERR_PTR(-EFAULT); } - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); return skb; } diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index c7236daa2415..9fa0b246902b 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -664,7 +664,7 @@ out_unlock: sendit: if (skb->sk) - skb->priority = skb->sk->sk_priority; + skb->priority = READ_ONCE(skb->sk->sk_priority); if (dev_queue_xmit(skb)) goto drop; sent: diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 5db805d5f74d..558e158c98d0 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -939,7 +939,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) sock_init_data(NULL, sk); sk->sk_type = osk->sk_type; - sk->sk_priority = osk->sk_priority; + sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 3bdfc3f1e73d..e50d3d102078 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1615,7 +1615,7 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan, return ERR_PTR(-ENOTCONN); } - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); bt_cb(skb)->l2cap.chan = chan; diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index b28c976f52a0..14c431663233 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -884,7 +884,7 @@ static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev, skcb = j1939_skb_to_cb(skb); memset(skcb, 0, sizeof(*skcb)); skcb->addr = jsk->addr; - skcb->priority = j1939_prio(sk->sk_priority); + skcb->priority = j1939_prio(READ_ONCE(sk->sk_priority)); if (msg->msg_name) { struct sockaddr_can *addr = msg->msg_name; diff --git a/net/can/raw.c b/net/can/raw.c index d50c3f3d892f..73468d2ebd51 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -881,7 +881,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } skb->dev = dev; - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; diff --git a/net/core/sock.c b/net/core/sock.c index a5995750c5c5..1fdc0a0d8ff2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -806,9 +806,7 @@ EXPORT_SYMBOL(sock_no_linger); void sock_set_priority(struct sock *sk, u32 priority) { - lock_sock(sk); WRITE_ONCE(sk->sk_priority, priority); - release_sock(sk); } EXPORT_SYMBOL(sock_set_priority); @@ -1118,6 +1116,18 @@ int sk_setsockopt(struct sock *sk, int level, int optname, valbool = val ? 1 : 0; + /* handle options which do not require locking the socket. */ + switch (optname) { + case SO_PRIORITY: + if ((val >= 0 && val <= 6) || + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + sock_set_priority(sk, val); + return 0; + } + return -EPERM; + } + sockopt_lock_sock(sk); switch (optname) { @@ -1213,15 +1223,6 @@ set_sndbuf: sk->sk_no_check_tx = valbool; break; - case SO_PRIORITY: - if ((val >= 0 && val <= 6) || - sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || - sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - WRITE_ONCE(sk->sk_priority, val); - else - ret = -EPERM; - break; - case SO_LINGER: if (optlen < sizeof(ling)) { ret = -EINVAL; /* 1003.1g */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 80b956b39252..8d344b219f84 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -239,7 +239,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt, - np->tclass, sk->sk_priority); + np->tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e13a84433413..9f0bd518901a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -165,7 +165,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, * For cgroup2 classid is always zero. */ if (!classid) - classid = sk->sk_priority; + classid = READ_ONCE(sk->sk_priority); if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) goto errout; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4ab877cf6d35..6b14097e80ad 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1449,7 +1449,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, ip_options_build(skb, opt, cork->addr, rt); } - skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority; + skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->mark = cork->mark; skb->tstamp = cork->transmit_time; /* diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f13eb7e23d03..95e972be0c05 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -828,7 +828,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_priority : sk->sk_priority; + inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority); transmit_time = tcp_transmit_time(sk); xfrm_sk_clone_policy(ctl_sk, sk); txhash = (sk->sk_state == TCP_TIME_WAIT) ? diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index eee8ab1bfa0e..3f87611077ef 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -292,7 +292,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_transparent = inet_test_bit(TRANSPARENT, sk); tw->tw_mark = sk->sk_mark; - tw->tw_priority = sk->sk_priority; + tw->tw_priority = READ_ONCE(sk->sk_priority); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 0c50dcd35fe8..80043e46117c 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused fl6.daddr = sk->sk_v6_daddr; res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), - np->tclass, sk->sk_priority); + np->tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); return res; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 951ba8089b5b..cdaa9275e990 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1984,7 +1984,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; skb->tstamp = cork->base.transmit_time; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 94afb8d0f2d0..8a6e2e97f673 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -565,7 +565,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), - opt, tclass, sk->sk_priority); + opt, tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -1058,7 +1058,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) trace_tcp_send_reset(sk, skb); if (inet6_test_bit(REPFLOW, sk)) label = ip6_flowlabel(ipv6h); - priority = sk->sk_priority; + priority = READ_ONCE(sk->sk_priority); txhash = sk->sk_txhash; } if (sk->sk_state == TCP_TIME_WAIT) { diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 8260202c0066..f3485a6b35e7 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -89,7 +89,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); break; case SO_PRIORITY: - ssk->sk_priority = val; + WRITE_ONCE(ssk->sk_priority, val); break; case SO_SNDBUF: case SO_SNDBUFFORCE: diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 96e91ab71573..0eed00184adf 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -487,7 +487,7 @@ static struct sock *nr_make_new(struct sock *osk) sock_init_data(NULL, sk); sk->sk_type = osk->sk_type; - sk->sk_priority = osk->sk_priority; + sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 49dafe9ac72f..0cc5a4e19900 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -583,7 +583,7 @@ static struct sock *rose_make_new(struct sock *osk) #endif sk->sk_type = osk->sk_type; - sk->sk_priority = osk->sk_priority; + sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index da34fd4c9269..09d8afd04a2a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -546,7 +546,7 @@ META_COLLECTOR(int_sk_prio) *err = -1; return; } - dst->value = sk->sk_priority; + dst->value = READ_ONCE(sk->sk_priority); } META_COLLECTOR(int_sk_rcvlowat) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 5c0ed5909d85..24368f755ab1 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -247,7 +247,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t) rcu_read_lock(); res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt), - tclass, sk->sk_priority); + tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); return res; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index bacdd971615e..297681601414 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -493,7 +493,7 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, nsk->sk_sndtimeo = osk->sk_sndtimeo; nsk->sk_rcvtimeo = osk->sk_rcvtimeo; nsk->sk_mark = READ_ONCE(osk->sk_mark); - nsk->sk_priority = osk->sk_priority; + nsk->sk_priority = READ_ONCE(osk->sk_priority); nsk->sk_rcvlowat = osk->sk_rcvlowat; nsk->sk_bound_dev_if = osk->sk_bound_dev_if; nsk->sk_err = osk->sk_err; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 0fb5143bec7a..aad8ffeaee04 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -598,7 +598,7 @@ static struct sock *x25_make_new(struct sock *osk) x25 = x25_sk(sk); sk->sk_type = osk->sk_type; - sk->sk_priority = osk->sk_priority; + sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 7482d0aca504..f5e96e0d6e01 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -684,7 +684,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, } skb->dev = dev; - skb->priority = xs->sk.sk_priority; + skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; xsk_set_destructor_arg(skb); -- cgit v1.2.3 From 8ebfb6db5a01f16cd37254bfad7145204e4bf6f2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 20:28:12 +0000 Subject: net: lockless SO_PASSCRED, SO_PASSPIDFD and SO_PASSSEC sock->flags are atomic, no need to hold the socket lock in sk_setsockopt() for SO_PASSCRED, SO_PASSPIDFD and SO_PASSSEC. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 1fdc0a0d8ff2..f01c75724568 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1126,6 +1126,15 @@ int sk_setsockopt(struct sock *sk, int level, int optname, return 0; } return -EPERM; + case SO_PASSSEC: + assign_bit(SOCK_PASSSEC, &sock->flags, valbool); + return 0; + case SO_PASSCRED: + assign_bit(SOCK_PASSCRED, &sock->flags, valbool); + return 0; + case SO_PASSPIDFD: + assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool); + return 0; } sockopt_lock_sock(sk); @@ -1248,14 +1257,6 @@ set_sndbuf: case SO_BSDCOMPAT: break; - case SO_PASSCRED: - assign_bit(SOCK_PASSCRED, &sock->flags, valbool); - break; - - case SO_PASSPIDFD: - assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool); - break; - case SO_TIMESTAMP_OLD: case SO_TIMESTAMP_NEW: case SO_TIMESTAMPNS_OLD: @@ -1361,9 +1362,6 @@ set_sndbuf: sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool); break; - case SO_PASSSEC: - assign_bit(SOCK_PASSSEC, &sock->flags, valbool); - break; case SO_MARK: if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { -- cgit v1.2.3 From b120251590a9c771bae353e444503fa49793c75e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 20:28:13 +0000 Subject: net: lockless SO_{TYPE|PROTOCOL|DOMAIN|ERROR } setsockopt() This options can not be set and return -ENOPROTOOPT, no need to acqure socket lock. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index f01c75724568..4d20b74a93cb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1135,6 +1135,11 @@ int sk_setsockopt(struct sock *sk, int level, int optname, case SO_PASSPIDFD: assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool); return 0; + case SO_TYPE: + case SO_PROTOCOL: + case SO_DOMAIN: + case SO_ERROR: + return -ENOPROTOOPT; } sockopt_lock_sock(sk); @@ -1152,12 +1157,6 @@ int sk_setsockopt(struct sock *sk, int level, int optname, case SO_REUSEPORT: sk->sk_reuseport = valbool; break; - case SO_TYPE: - case SO_PROTOCOL: - case SO_DOMAIN: - case SO_ERROR: - ret = -ENOPROTOOPT; - break; case SO_DONTROUTE: sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); sk_dst_reset(sk); -- cgit v1.2.3 From 2a4319cf3c83fc5d1997466196b99b3e14584e76 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 20:28:14 +0000 Subject: net: lockless implementation of SO_BUSY_POLL, SO_PREFER_BUSY_POLL, SO_BUSY_POLL_BUDGET Setting sk->sk_ll_usec, sk_prefer_busy_poll and sk_busy_poll_budget do not require the socket lock, readers are lockless anyway. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 4d20b74a93cb..408081549bd7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1140,6 +1140,26 @@ int sk_setsockopt(struct sock *sk, int level, int optname, case SO_DOMAIN: case SO_ERROR: return -ENOPROTOOPT; +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: + if (val < 0) + return -EINVAL; + WRITE_ONCE(sk->sk_ll_usec, val); + return 0; + case SO_PREFER_BUSY_POLL: + if (valbool && !sockopt_capable(CAP_NET_ADMIN)) + return -EPERM; + WRITE_ONCE(sk->sk_prefer_busy_poll, valbool); + return 0; + case SO_BUSY_POLL_BUDGET: + if (val > READ_ONCE(sk->sk_busy_poll_budget) && + !sockopt_capable(CAP_NET_ADMIN)) + return -EPERM; + if (val < 0 || val > U16_MAX) + return -EINVAL; + WRITE_ONCE(sk->sk_busy_poll_budget, val); + return 0; +#endif } sockopt_lock_sock(sk); @@ -1402,30 +1422,6 @@ set_sndbuf: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; -#ifdef CONFIG_NET_RX_BUSY_POLL - case SO_BUSY_POLL: - if (val < 0) - ret = -EINVAL; - else - WRITE_ONCE(sk->sk_ll_usec, val); - break; - case SO_PREFER_BUSY_POLL: - if (valbool && !sockopt_capable(CAP_NET_ADMIN)) - ret = -EPERM; - else - WRITE_ONCE(sk->sk_prefer_busy_poll, valbool); - break; - case SO_BUSY_POLL_BUDGET: - if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) { - ret = -EPERM; - } else { - if (val < 0 || val > U16_MAX) - ret = -EINVAL; - else - WRITE_ONCE(sk->sk_busy_poll_budget, val); - } - break; -#endif case SO_MAX_PACING_RATE: { -- cgit v1.2.3 From 28b24f90020fed8e8e3e8e20575f08c1cd06e54f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 20:28:15 +0000 Subject: net: implement lockless SO_MAX_PACING_RATE SO_MAX_PACING_RATE setsockopt() does not need to hold the socket lock, because sk->sk_pacing_rate readers can run fine if the value is changed by other threads, after adding READ_ONCE() accessors. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/trace/events/mptcp.h | 2 +- net/core/sock.c | 40 +++++++++++++++++++++------------------- net/ipv4/tcp_bbr.c | 13 +++++++------ net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_output.c | 9 +++++---- net/sched/sch_fq.c | 2 +- 6 files changed, 37 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index 563e48617374..09e72215b9f9 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -44,7 +44,7 @@ TRACE_EVENT(mptcp_subflow_get_send, ssk = mptcp_subflow_tcp_sock(subflow); if (ssk && sk_fullsock(ssk)) { __entry->snd_wnd = tcp_sk(ssk)->snd_wnd; - __entry->pace = ssk->sk_pacing_rate; + __entry->pace = READ_ONCE(ssk->sk_pacing_rate); } else { __entry->snd_wnd = 0; __entry->pace = 0; diff --git a/net/core/sock.c b/net/core/sock.c index 408081549bd7..4254ed0e4817 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1160,6 +1160,27 @@ int sk_setsockopt(struct sock *sk, int level, int optname, WRITE_ONCE(sk->sk_busy_poll_budget, val); return 0; #endif + case SO_MAX_PACING_RATE: + { + unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val; + unsigned long pacing_rate; + + if (sizeof(ulval) != sizeof(val) && + optlen >= sizeof(ulval) && + copy_from_sockptr(&ulval, optval, sizeof(ulval))) { + return -EFAULT; + } + if (ulval != ~0UL) + cmpxchg(&sk->sk_pacing_status, + SK_PACING_NONE, + SK_PACING_NEEDED); + /* Pairs with READ_ONCE() from sk_getsockopt() */ + WRITE_ONCE(sk->sk_max_pacing_rate, ulval); + pacing_rate = READ_ONCE(sk->sk_pacing_rate); + if (ulval < pacing_rate) + WRITE_ONCE(sk->sk_pacing_rate, ulval); + return 0; + } } sockopt_lock_sock(sk); @@ -1423,25 +1444,6 @@ set_sndbuf: break; - case SO_MAX_PACING_RATE: - { - unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val; - - if (sizeof(ulval) != sizeof(val) && - optlen >= sizeof(ulval) && - copy_from_sockptr(&ulval, optval, sizeof(ulval))) { - ret = -EFAULT; - break; - } - if (ulval != ~0UL) - cmpxchg(&sk->sk_pacing_status, - SK_PACING_NONE, - SK_PACING_NEEDED); - /* Pairs with READ_ONCE() from sk_getsockopt() */ - WRITE_ONCE(sk->sk_max_pacing_rate, ulval); - sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval); - break; - } case SO_INCOMING_CPU: reuseport_update_incoming_cpu(sk, val); break; diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 146792cd26fe..22358032dd48 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -258,7 +258,7 @@ static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) u64 rate = bw; rate = bbr_rate_bytes_per_sec(sk, rate, gain); - rate = min_t(u64, rate, sk->sk_max_pacing_rate); + rate = min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate)); return rate; } @@ -278,7 +278,8 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) } bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT; do_div(bw, rtt_us); - sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain); + WRITE_ONCE(sk->sk_pacing_rate, + bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain)); } /* Pace using current bw estimate and a gain factor. */ @@ -290,14 +291,14 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) bbr_init_pacing_rate_from_rtt(sk); - if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) - sk->sk_pacing_rate = rate; + if (bbr_full_bw_reached(sk) || rate > READ_ONCE(sk->sk_pacing_rate)) + WRITE_ONCE(sk->sk_pacing_rate, rate); } /* override sysctl_tcp_min_tso_segs */ __bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk) { - return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; + return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2; } static u32 bbr_tso_segs_goal(struct sock *sk) @@ -309,7 +310,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk) * driver provided sk_gso_max_size. */ bytes = min_t(unsigned long, - sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), + READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift), GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER); segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 584825ddd0a0..22c2a7c2e65e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -927,8 +927,8 @@ static void tcp_update_pacing_rate(struct sock *sk) * without any lock. We want to make sure compiler wont store * intermediate values in this location. */ - WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate, - sk->sk_max_pacing_rate)); + WRITE_ONCE(sk->sk_pacing_rate, + min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate))); } /* Calculate rto without backoff. This is the second half of Van Jacobson's diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2d1e4b5ac1ca..970a07289480 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1201,7 +1201,7 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); if (sk->sk_pacing_status != SK_PACING_NONE) { - unsigned long rate = sk->sk_pacing_rate; + unsigned long rate = READ_ONCE(sk->sk_pacing_rate); /* Original sch_fq does not pace first 10 MSS * Note that tp->data_segs_out overflows after 2^32 packets, @@ -1973,7 +1973,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, unsigned long bytes; u32 r; - bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift); + bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift); r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log); if (r < BITS_PER_TYPE(sk->sk_gso_max_size)) @@ -2553,7 +2553,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit = max_t(unsigned long, 2 * skb->truesize, - sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); + READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes)); @@ -2561,7 +2561,8 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, if (static_branch_unlikely(&tcp_tx_delay_enabled) && tcp_sk(sk)->tcp_tx_delay) { - u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay; + u64 extra_bytes = (u64)READ_ONCE(sk->sk_pacing_rate) * + tcp_sk(sk)->tcp_tx_delay; /* TSQ is based on skb truesize sum (sk_wmem_alloc), so we * approximate our needs assuming an ~100% skb->truesize overhead. diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 681bbf34b707..d35419db7b94 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -668,7 +668,7 @@ begin: */ if (!skb->tstamp) { if (skb->sk) - rate = min(skb->sk->sk_pacing_rate, rate); + rate = min(READ_ONCE(skb->sk->sk_pacing_rate), rate); if (rate <= q->low_rate_threshold) { f->credit = 0; -- cgit v1.2.3 From 5eef0b8de1be40c5d05873b7e3d63824300c9f39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 20:28:16 +0000 Subject: net: lockless implementation of SO_TXREHASH sk->sk_txrehash readers are already safe against concurrent change of this field. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 4254ed0e4817..f0930f858714 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1181,6 +1181,16 @@ int sk_setsockopt(struct sock *sk, int level, int optname, WRITE_ONCE(sk->sk_pacing_rate, ulval); return 0; } + case SO_TXREHASH: + if (val < -1 || val > 1) + return -EINVAL; + if ((u8)val == SOCK_TXREHASH_DEFAULT) + val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); + /* Paired with READ_ONCE() in tcp_rtx_synack() + * and sk_getsockopt(). + */ + WRITE_ONCE(sk->sk_txrehash, (u8)val); + return 0; } sockopt_lock_sock(sk); @@ -1528,19 +1538,6 @@ set_sndbuf: break; } - case SO_TXREHASH: - if (val < -1 || val > 1) { - ret = -EINVAL; - break; - } - if ((u8)val == SOCK_TXREHASH_DEFAULT) - val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); - /* Paired with READ_ONCE() in tcp_rtx_synack() - * and sk_getsockopt(). - */ - WRITE_ONCE(sk->sk_txrehash, (u8)val); - break; - default: ret = -ENOPROTOOPT; break; -- cgit v1.2.3 From eb44ad4e635132754bfbcb18103f1dcb7058aedd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 20:28:18 +0000 Subject: net: annotate data-races around sk->sk_dst_pending_confirm This field can be read or written without socket lock being held. Add annotations to avoid load-store tearing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++--- net/core/sock.c | 2 +- net/ipv4/tcp_output.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index a50e37af2b57..01f0005cb7d8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2182,7 +2182,7 @@ static inline void __dst_negative_advice(struct sock *sk) if (ndst != dst) { rcu_assign_pointer(sk->sk_dst_cache, ndst); sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); } } } @@ -2199,7 +2199,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); old_dst = rcu_dereference_protected(sk->sk_dst_cache, lockdep_sock_is_held(sk)); rcu_assign_pointer(sk->sk_dst_cache, dst); @@ -2212,7 +2212,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); dst_release(old_dst); } diff --git a/net/core/sock.c b/net/core/sock.c index f0930f858714..290165954379 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -600,7 +600,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check, dst, cookie) == NULL) { sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); RCU_INIT_POINTER(sk->sk_dst_cache, NULL); dst_release(dst); return NULL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 970a07289480..8885552dff8e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1325,7 +1325,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; refcount_add(skb->truesize, &sk->sk_wmem_alloc); - skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); + skb_set_dst_pending_confirm(skb, READ_ONCE(sk->sk_dst_pending_confirm)); /* Build TCP header and checksum it. */ th = (struct tcphdr *)skb->data; -- cgit v1.2.3 From c9746e6a19c24b2d9a74d6657daee3b39fdc1bec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Sep 2023 03:42:14 +0000 Subject: inet: implement lockless IP_MULTICAST_TTL inet->mc_ttl can be read locklessly. Implement proper lockless reads and writes to inet->mc_ttl Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- net/ipv4/ip_sockglue.c | 31 ++++++++++++++++--------------- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 3 files changed, 18 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6b14097e80ad..f07ce051760d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1430,7 +1430,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, if (cork->ttl != 0) ttl = cork->ttl; else if (rt->rt_type == RTN_MULTICAST) - ttl = inet->mc_ttl; + ttl = READ_ONCE(inet->mc_ttl); else ttl = ip_select_ttl(inet, &rt->dst); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index cce9cb25f3b3..4ad3003378ae 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1039,6 +1039,17 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, WRITE_ONCE(inet->min_ttl, val); return 0; + case IP_MULTICAST_TTL: + if (sk->sk_type == SOCK_STREAM) + return -EINVAL; + if (optlen < 1) + return -EINVAL; + if (val == -1) + val = 1; + if (val < 0 || val > 255) + return -EINVAL; + WRITE_ONCE(inet->mc_ttl, val); + return 0; } err = 0; @@ -1101,17 +1112,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, goto e_inval; inet->pmtudisc = val; break; - case IP_MULTICAST_TTL: - if (sk->sk_type == SOCK_STREAM) - goto e_inval; - if (optlen < 1) - goto e_inval; - if (val == -1) - val = 1; - if (val < 0 || val > 255) - goto e_inval; - inet->mc_ttl = val; - break; case IP_UNICAST_IF: { struct net_device *dev = NULL; @@ -1592,6 +1592,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_MINTTL: val = READ_ONCE(inet->min_ttl); goto copyval; + case IP_MULTICAST_TTL: + val = READ_ONCE(inet->mc_ttl); + goto copyval; } if (needs_rtnl) @@ -1649,9 +1652,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, } break; } - case IP_MULTICAST_TTL: - val = inet->mc_ttl; - break; case IP_UNICAST_IF: val = (__force int)htonl((__u32) inet->uc_index); break; @@ -1718,7 +1718,8 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); } if (inet_test_bit(TTL, sk)) { - int hlim = inet->mc_ttl; + int hlim = READ_ONCE(inet->mc_ttl); + put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); } if (inet_test_bit(TOS, sk)) { diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 5820a8156c47..3eed16702248 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1316,7 +1316,7 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl) /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */ lock_sock(sk); - inet->mc_ttl = ttl; + WRITE_ONCE(inet->mc_ttl, ttl); #ifdef CONFIG_IP_VS_IPV6 if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); -- cgit v1.2.3 From ceaa714138a372ac63cc2c5c19ee0882d22827f9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Sep 2023 03:42:15 +0000 Subject: inet: implement lockless IP_MTU_DISCOVER inet->pmtudisc can be read locklessly. Implement proper lockless reads and writes to inet->pmtudisc ip_sock_set_mtu_discover() can now be called from arbitrary contexts. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip.h | 13 ++++++++----- net/ipv4/ip_output.c | 7 ++++--- net/ipv4/ip_sockglue.c | 17 ++++++----------- net/ipv4/ping.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 7 files changed, 22 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/include/net/ip.h b/include/net/ip.h index 3489a1cca5e7..46933a0d98ea 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -434,19 +434,22 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) static inline bool ip_sk_accept_pmtu(const struct sock *sk) { - return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE && - inet_sk(sk)->pmtudisc != IP_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc); + + return pmtudisc != IP_PMTUDISC_INTERFACE && + pmtudisc != IP_PMTUDISC_OMIT; } static inline bool ip_sk_use_pmtu(const struct sock *sk) { - return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; + return READ_ONCE(inet_sk(sk)->pmtudisc) < IP_PMTUDISC_PROBE; } static inline bool ip_sk_ignore_df(const struct sock *sk) { - return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO || - inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc); + + return pmtudisc < IP_PMTUDISC_DO || pmtudisc == IP_PMTUDISC_OMIT; } static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f07ce051760d..9fc7be2c2033 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1387,8 +1387,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk, struct ip_options *opt = NULL; struct rtable *rt = (struct rtable *)cork->dst; struct iphdr *iph; + u8 pmtudisc, ttl; __be16 df = 0; - __u8 ttl; skb = __skb_dequeue(queue); if (!skb) @@ -1418,8 +1418,9 @@ struct sk_buff *__ip_make_skb(struct sock *sk, /* DF bit is set when we want to see DF on outgoing frames. * If ignore_df is set too, we still allow to fragment this frame * locally. */ - if (inet->pmtudisc == IP_PMTUDISC_DO || - inet->pmtudisc == IP_PMTUDISC_PROBE || + pmtudisc = READ_ONCE(inet->pmtudisc); + if (pmtudisc == IP_PMTUDISC_DO || + pmtudisc == IP_PMTUDISC_PROBE || (skb->len <= dst_mtu(&rt->dst) && ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 4ad3003378ae..6d874cc03c8b 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -622,9 +622,7 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val) { if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) return -EINVAL; - lock_sock(sk); - inet_sk(sk)->pmtudisc = val; - release_sock(sk); + WRITE_ONCE(inet_sk(sk)->pmtudisc, val); return 0; } EXPORT_SYMBOL(ip_sock_set_mtu_discover); @@ -1050,6 +1048,8 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; WRITE_ONCE(inet->mc_ttl, val); return 0; + case IP_MTU_DISCOVER: + return ip_sock_set_mtu_discover(sk, val); } err = 0; @@ -1107,11 +1107,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, case IP_TOS: /* This sets both TOS and Precedence */ __ip_sock_set_tos(sk, val); break; - case IP_MTU_DISCOVER: - if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) - goto e_inval; - inet->pmtudisc = val; - break; case IP_UNICAST_IF: { struct net_device *dev = NULL; @@ -1595,6 +1590,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_MULTICAST_TTL: val = READ_ONCE(inet->mc_ttl); goto copyval; + case IP_MTU_DISCOVER: + val = READ_ONCE(inet->pmtudisc); + goto copyval; } if (needs_rtnl) @@ -1634,9 +1632,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_TOS: val = inet->tos; break; - case IP_MTU_DISCOVER: - val = inet->pmtudisc; - break; case IP_MTU: { struct dst_entry *dst; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 4dd809b7b188..50d12b0c8d46 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -551,7 +551,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ ipv4_sk_update_pmtu(skb, sk, info); - if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { + if (READ_ONCE(inet_sock->pmtudisc) != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; break; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4b5db5d1edc2..ade1aecd7c71 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -239,7 +239,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) if (code > NR_ICMP_UNREACH) break; if (code == ICMP_FRAG_NEEDED) { - harderr = inet->pmtudisc != IP_PMTUDISC_DONT; + harderr = READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT; err = EMSGSIZE; } else { err = icmp_err_convert[code].errno; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c3ff984b6354..731a723dc808 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -750,7 +750,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ ipv4_sk_update_pmtu(skb, sk, info); - if (inet->pmtudisc != IP_PMTUDISC_DONT) { + if (READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; break; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3eed16702248..4f6c795588fb 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1335,7 +1335,7 @@ static void set_mcast_pmtudisc(struct sock *sk, int val) /* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */ lock_sock(sk); - inet->pmtudisc = val; + WRITE_ONCE(inet->pmtudisc, val); #ifdef CONFIG_IP_VS_IPV6 if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); -- cgit v1.2.3 From e08d0b3d172311e2bb500865c0d0038533d0ff11 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Sep 2023 03:42:16 +0000 Subject: inet: implement lockless IP_TOS Some reads of inet->tos are racy. Add needed READ_ONCE() annotations and convert IP_TOS option lockless. v2: missing changes in include/net/route.h (David Ahern) Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip.h | 3 +-- include/net/route.h | 4 +-- net/dccp/ipv4.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/ipv4/ip_output.c | 4 +-- net/ipv4/ip_sockglue.c | 29 +++++++++------------- net/ipv4/tcp_ipv4.c | 9 ++++--- net/mptcp/sockopt.c | 8 +++--- net/sctp/protocol.c | 4 +-- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +- 10 files changed, 31 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/include/net/ip.h b/include/net/ip.h index 46933a0d98ea..6fbc0dcf4b97 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -258,7 +258,7 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet, static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet) { - return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos); + return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(READ_ONCE(inet->tos)); } /* datagram.c */ @@ -810,6 +810,5 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val); void ip_sock_set_pktinfo(struct sock *sk); void ip_sock_set_recverr(struct sock *sk); void ip_sock_set_tos(struct sock *sk, int val); -void __ip_sock_set_tos(struct sock *sk, int val); #endif /* _IP_H */ diff --git a/include/net/route.h b/include/net/route.h index 51a45b1887b5..5c248a8e3d0e 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -37,7 +37,7 @@ #define RTO_ONLINK 0x01 -#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) +#define RT_CONN_FLAGS(sk) (RT_TOS(READ_ONCE(inet_sk(sk)->tos)) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) static inline __u8 ip_sock_rt_scope(const struct sock *sk) @@ -50,7 +50,7 @@ static inline __u8 ip_sock_rt_scope(const struct sock *sk) static inline __u8 ip_sock_rt_tos(const struct sock *sk) { - return RT_TOS(inet_sk(sk)->tos); + return RT_TOS(READ_ONCE(inet_sk(sk)->tos)); } struct ip_tunnel_info; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 69453b936bd5..1b8cbfda6e5d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -511,7 +511,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, rcu_dereference(ireq->ireq_opt), - inet_sk(sk)->tos); + READ_ONCE(inet_sk(sk)->tos)); rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 9f0bd518901a..f01aee832aab 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -134,7 +134,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, * hence this needs to be included regardless of socket family. */ if (ext & (1 << (INET_DIAG_TOS - 1))) - if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0) + if (nla_put_u8(skb, INET_DIAG_TOS, READ_ONCE(inet->tos)) < 0) goto errout; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9fc7be2c2033..89e62ed08dad 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -544,7 +544,7 @@ EXPORT_SYMBOL(__ip_queue_xmit); int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) { - return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos); + return __ip_queue_xmit(sk, skb, fl, READ_ONCE(inet_sk(sk)->tos)); } EXPORT_SYMBOL(ip_queue_xmit); @@ -1438,7 +1438,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph = ip_hdr(skb); iph->version = 4; iph->ihl = 5; - iph->tos = (cork->tos != -1) ? cork->tos : inet->tos; + iph->tos = (cork->tos != -1) ? cork->tos : READ_ONCE(inet->tos); iph->frag_off = df; iph->ttl = ttl; iph->protocol = sk->sk_protocol; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6d874cc03c8b..50c008efbb6d 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -585,25 +585,20 @@ out: return err; } -void __ip_sock_set_tos(struct sock *sk, int val) +void ip_sock_set_tos(struct sock *sk, int val) { + u8 old_tos = READ_ONCE(inet_sk(sk)->tos); + if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; - val |= inet_sk(sk)->tos & INET_ECN_MASK; + val |= old_tos & INET_ECN_MASK; } - if (inet_sk(sk)->tos != val) { - inet_sk(sk)->tos = val; + if (old_tos != val) { + WRITE_ONCE(inet_sk(sk)->tos, val); WRITE_ONCE(sk->sk_priority, rt_tos2priority(val)); sk_dst_reset(sk); } } - -void ip_sock_set_tos(struct sock *sk, int val) -{ - lock_sock(sk); - __ip_sock_set_tos(sk, val); - release_sock(sk); -} EXPORT_SYMBOL(ip_sock_set_tos); void ip_sock_set_freebind(struct sock *sk) @@ -1050,6 +1045,9 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, return 0; case IP_MTU_DISCOVER: return ip_sock_set_mtu_discover(sk, val); + case IP_TOS: /* This sets both TOS and Precedence */ + ip_sock_set_tos(sk, val); + return 0; } err = 0; @@ -1104,9 +1102,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, } } break; - case IP_TOS: /* This sets both TOS and Precedence */ - __ip_sock_set_tos(sk, val); - break; case IP_UNICAST_IF: { struct net_device *dev = NULL; @@ -1593,6 +1588,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_MTU_DISCOVER: val = READ_ONCE(inet->pmtudisc); goto copyval; + case IP_TOS: + val = READ_ONCE(inet->tos); + goto copyval; } if (needs_rtnl) @@ -1629,9 +1627,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; } - case IP_TOS: - val = inet->tos; - break; case IP_MTU: { struct dst_entry *dst; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 95e972be0c05..a441740616d7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1024,10 +1024,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); - tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? - (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | - (inet_sk(sk)->tos & INET_ECN_MASK) : - inet_sk(sk)->tos; + tos = READ_ONCE(inet_sk(sk)->tos); + + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) + tos = (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (tos & INET_ECN_MASK); if (!INET_ECN_is_capable(tos) && tcp_bpf_ca_needs_ecn((struct sock *)req)) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index f3485a6b35e7..18ce624bfde2 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -734,11 +734,11 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, lock_sock(sk); sockopt_seq_inc(msk); - val = inet_sk(sk)->tos; + val = READ_ONCE(inet_sk(sk)->tos); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - __ip_sock_set_tos(ssk, val); + ip_sock_set_tos(ssk, val); } release_sock(sk); @@ -1343,7 +1343,7 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, switch (optname) { case IP_TOS: - return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos); + return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); } return -EOPNOTSUPP; @@ -1411,7 +1411,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) ssk->sk_bound_dev_if = sk->sk_bound_dev_if; ssk->sk_incoming_cpu = sk->sk_incoming_cpu; ssk->sk_ipv6only = sk->sk_ipv6only; - __ip_sock_set_tos(ssk, inet_sk(sk)->tos); + ip_sock_set_tos(ssk, inet_sk(sk)->tos); if (sk->sk_userlocks & tx_rx_locks) { ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 2185f44198de..94c6dd53cd62 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -426,7 +426,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct dst_entry *dst = NULL; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; - __u8 tos = inet_sk(sk)->tos; + u8 tos = READ_ONCE(inet_sk(sk)->tos); if (t->dscp & SCTP_DSCP_SET_MASK) tos = t->dscp & SCTP_DSCP_VAL_MASK; @@ -1057,7 +1057,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t) struct flowi4 *fl4 = &t->fl.u.ip4; struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); - __u8 dscp = inet->tos; + __u8 dscp = READ_ONCE(inet->tos); __be16 df = 0; pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb, diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index b1fc8afd072d..61a2a1988ce6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -716,7 +716,7 @@ run_test_transparent() # the required infrastructure in MPTCP sockopt code. To support TOS, the # following function has been exported (T). Not great but better than # checking for a specific kernel version. - if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then + if ! mptcp_lib_kallsyms_has "T ip_sock_set_tos$"; then echo "INFO: ${msg} not supported by the kernel: SKIP" mptcp_lib_result_skip "${TEST_GROUP}" return -- cgit v1.2.3 From a4725d0d893599253a4bb283fdabdd4a66d9451d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Sep 2023 03:42:17 +0000 Subject: inet: lockless getsockopt(IP_OPTIONS) inet->inet_opt being RCU protected, we can use RCU instead of locking the socket. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 50c008efbb6d..45d89487914a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1591,27 +1591,20 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_TOS: val = READ_ONCE(inet->tos); goto copyval; - } - - if (needs_rtnl) - rtnl_lock(); - sockopt_lock_sock(sk); - - switch (optname) { case IP_OPTIONS: { unsigned char optbuf[sizeof(struct ip_options)+40]; struct ip_options *opt = (struct ip_options *)optbuf; struct ip_options_rcu *inet_opt; - inet_opt = rcu_dereference_protected(inet->inet_opt, - lockdep_sock_is_held(sk)); + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); opt->optlen = 0; if (inet_opt) memcpy(optbuf, &inet_opt->opt, sizeof(struct ip_options) + inet_opt->opt.optlen); - sockopt_release_sock(sk); + rcu_read_unlock(); if (opt->optlen == 0) { len = 0; @@ -1627,6 +1620,13 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; } + } + + if (needs_rtnl) + rtnl_lock(); + sockopt_lock_sock(sk); + + switch (optname) { case IP_MTU: { struct dst_entry *dst; -- cgit v1.2.3 From 3523bc91e4b4da39ccf18a0252d13108877ece0a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Sep 2023 03:42:18 +0000 Subject: inet: lockless getsockopt(IP_MTU) sk_dst_get() does not require socket lock. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 45d89487914a..04579e390ddd 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1620,13 +1620,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; } - } - - if (needs_rtnl) - rtnl_lock(); - sockopt_lock_sock(sk); - - switch (optname) { case IP_MTU: { struct dst_entry *dst; @@ -1636,12 +1629,17 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, val = dst_mtu(dst); dst_release(dst); } - if (!val) { - sockopt_release_sock(sk); + if (!val) return -ENOTCONN; - } - break; + goto copyval; + } } + + if (needs_rtnl) + rtnl_lock(); + sockopt_lock_sock(sk); + + switch (optname) { case IP_UNICAST_IF: val = (__force int)htonl((__u32) inet->uc_index); break; -- cgit v1.2.3 From 959d5c11601b2b337c364b2e3102d392365e3dd3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Sep 2023 03:42:19 +0000 Subject: inet: implement lockless getsockopt(IP_UNICAST_IF) Add missing READ_ONCE() annotations when reading inet->uc_index Implementing getsockopt(IP_UNICAST_IF) locklessly seems possible, the setsockopt() part might not be possible at the moment. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/datagram.c | 2 +- net/ipv4/ip_sockglue.c | 10 +++++----- net/ipv4/ping.c | 2 +- net/ipv4/raw.c | 13 +++++++------ net/ipv4/udp.c | 12 +++++++----- 5 files changed, 21 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index cb5dbee9e018..1480e9ebdfef 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -43,7 +43,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len if (!saddr) saddr = inet->mc_addr; } else if (!oif) { - oif = inet->uc_index; + oif = READ_ONCE(inet->uc_index); } fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, oif, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 04579e390ddd..58995526c6e9 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1113,7 +1113,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, ifindex = (__force int)ntohl((__force __be32)val); if (ifindex == 0) { - inet->uc_index = 0; + WRITE_ONCE(inet->uc_index, 0); err = 0; break; } @@ -1130,7 +1130,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, if (sk->sk_bound_dev_if && midx != sk->sk_bound_dev_if) break; - inet->uc_index = ifindex; + WRITE_ONCE(inet->uc_index, ifindex); err = 0; break; } @@ -1633,6 +1633,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, return -ENOTCONN; goto copyval; } + case IP_UNICAST_IF: + val = (__force int)htonl((__u32) READ_ONCE(inet->uc_index)); + goto copyval; } if (needs_rtnl) @@ -1640,9 +1643,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, sockopt_lock_sock(sk); switch (optname) { - case IP_UNICAST_IF: - val = (__force int)htonl((__u32) inet->uc_index); - break; case IP_MULTICAST_IF: { struct in_addr addr; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 50d12b0c8d46..66ad1f95af49 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -777,7 +777,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!saddr) saddr = inet->mc_addr; } else if (!ipc.oif) - ipc.oif = inet->uc_index; + ipc.oif = READ_ONCE(inet->uc_index); flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope, sk->sk_protocol, inet_sk_flowi_flags(sk), faddr, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ade1aecd7c71..e2357d23202e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -482,7 +482,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int free = 0; __be32 daddr; __be32 saddr; - int err; + int uc_index, err; struct ip_options_data opt_copy; struct raw_frag_vec rfv; int hdrincl; @@ -576,24 +576,25 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) tos = get_rttos(&ipc, inet); scope = ip_sendmsg_scope(inet, &ipc, msg); + uc_index = READ_ONCE(inet->uc_index); if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; } else if (!ipc.oif) { - ipc.oif = inet->uc_index; - } else if (ipv4_is_lbcast(daddr) && inet->uc_index) { + ipc.oif = uc_index; + } else if (ipv4_is_lbcast(daddr) && uc_index) { /* oif is set, packet is to local broadcast * and uc_index is set. oif is most likely set * by sk_bound_dev_if. If uc_index != oif check if the * oif is an L3 master and uc_index is an L3 slave. * If so, we want to allow the send using the uc_index. */ - if (ipc.oif != inet->uc_index && + if (ipc.oif != uc_index && ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk), - inet->uc_index)) { - ipc.oif = inet->uc_index; + uc_index)) { + ipc.oif = uc_index; } } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 731a723dc808..1e0c3aba1e5a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1055,6 +1055,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; struct ip_options_data opt_copy; + int uc_index; if (len > 0xFFFF) return -EMSGSIZE; @@ -1173,6 +1174,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (scope == RT_SCOPE_LINK) connected = 0; + uc_index = READ_ONCE(inet->uc_index); if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) ipc.oif = inet->mc_index; @@ -1180,18 +1182,18 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) saddr = inet->mc_addr; connected = 0; } else if (!ipc.oif) { - ipc.oif = inet->uc_index; - } else if (ipv4_is_lbcast(daddr) && inet->uc_index) { + ipc.oif = uc_index; + } else if (ipv4_is_lbcast(daddr) && uc_index) { /* oif is set, packet is to local broadcast and * uc_index is set. oif is most likely set * by sk_bound_dev_if. If uc_index != oif check if the * oif is an L3 master and uc_index is an L3 slave. * If so, we want to allow the send using the uc_index. */ - if (ipc.oif != inet->uc_index && + if (ipc.oif != uc_index && ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk), - inet->uc_index)) { - ipc.oif = inet->uc_index; + uc_index)) { + ipc.oif = uc_index; } } -- cgit v1.2.3 From c4480eb5504c9771f935cbca58a3b874bdd36af8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Sep 2023 03:42:20 +0000 Subject: inet: lockless IP_PKTOPTIONS implementation Current implementation is already lockless, because the socket lock is released before reading socket fields. Add missing READ_ONCE() annotations. Note that corresponding WRITE_ONCE() are needed, the order of the patches do not really matter. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 76 ++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 58995526c6e9..1ee01ff64171 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1633,6 +1633,43 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, return -ENOTCONN; goto copyval; } + case IP_PKTOPTIONS: + { + struct msghdr msg; + + if (sk->sk_type != SOCK_STREAM) + return -ENOPROTOOPT; + + if (optval.is_kernel) { + msg.msg_control_is_user = false; + msg.msg_control = optval.kernel; + } else { + msg.msg_control_is_user = true; + msg.msg_control_user = optval.user; + } + msg.msg_controllen = len; + msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0; + + if (inet_test_bit(PKTINFO, sk)) { + struct in_pktinfo info; + + info.ipi_addr.s_addr = READ_ONCE(inet->inet_rcv_saddr); + info.ipi_spec_dst.s_addr = READ_ONCE(inet->inet_rcv_saddr); + info.ipi_ifindex = READ_ONCE(inet->mc_index); + put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); + } + if (inet_test_bit(TTL, sk)) { + int hlim = READ_ONCE(inet->mc_ttl); + + put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); + } + if (inet_test_bit(TOS, sk)) { + int tos = READ_ONCE(inet->rcv_tos); + put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); + } + len -= msg.msg_controllen; + return copy_to_sockptr(optlen, &len, sizeof(int)); + } case IP_UNICAST_IF: val = (__force int)htonl((__u32) READ_ONCE(inet->uc_index)); goto copyval; @@ -1678,45 +1715,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, else err = ip_get_mcast_msfilter(sk, optval, optlen, len); goto out; - case IP_PKTOPTIONS: - { - struct msghdr msg; - - sockopt_release_sock(sk); - - if (sk->sk_type != SOCK_STREAM) - return -ENOPROTOOPT; - - if (optval.is_kernel) { - msg.msg_control_is_user = false; - msg.msg_control = optval.kernel; - } else { - msg.msg_control_is_user = true; - msg.msg_control_user = optval.user; - } - msg.msg_controllen = len; - msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0; - - if (inet_test_bit(PKTINFO, sk)) { - struct in_pktinfo info; - - info.ipi_addr.s_addr = inet->inet_rcv_saddr; - info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr; - info.ipi_ifindex = inet->mc_index; - put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); - } - if (inet_test_bit(TTL, sk)) { - int hlim = READ_ONCE(inet->mc_ttl); - - put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); - } - if (inet_test_bit(TOS, sk)) { - int tos = inet->rcv_tos; - put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); - } - len -= msg.msg_controllen; - return copy_to_sockptr(optlen, &len, sizeof(int)); - } case IP_LOCAL_PORT_RANGE: val = inet->local_port_range.hi << 16 | inet->local_port_range.lo; break; -- cgit v1.2.3 From 02715925222c137f418ecac417b68c7801e8f729 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Sep 2023 03:42:21 +0000 Subject: inet: implement lockless getsockopt(IP_MULTICAST_IF) Add missing annotations to inet->mc_index and inet->mc_addr to fix data-races. getsockopt(IP_MULTICAST_IF) can be lockless. setsockopt() side is left for later. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/datagram.c | 4 ++-- net/ipv4/ip_sockglue.c | 25 ++++++++++++------------- net/ipv4/ping.c | 4 ++-- net/ipv4/raw.c | 4 ++-- net/ipv4/udp.c | 4 ++-- 5 files changed, 20 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 1480e9ebdfef..2cc50cbfc2a3 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -39,9 +39,9 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len saddr = inet->inet_saddr; if (ipv4_is_multicast(usin->sin_addr.s_addr)) { if (!oif || netif_index_is_l3_master(sock_net(sk), oif)) - oif = inet->mc_index; + oif = READ_ONCE(inet->mc_index); if (!saddr) - saddr = inet->mc_addr; + saddr = READ_ONCE(inet->mc_addr); } else if (!oif) { oif = READ_ONCE(inet->uc_index); } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 1ee01ff64171..0b74ac49d6a6 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1168,8 +1168,8 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, if (!mreq.imr_ifindex) { if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) { - inet->mc_index = 0; - inet->mc_addr = 0; + WRITE_ONCE(inet->mc_index, 0); + WRITE_ONCE(inet->mc_addr, 0); err = 0; break; } @@ -1194,8 +1194,8 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, midx != sk->sk_bound_dev_if) break; - inet->mc_index = mreq.imr_ifindex; - inet->mc_addr = mreq.imr_address.s_addr; + WRITE_ONCE(inet->mc_index, mreq.imr_ifindex); + WRITE_ONCE(inet->mc_addr, mreq.imr_address.s_addr); err = 0; break; } @@ -1673,19 +1673,11 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_UNICAST_IF: val = (__force int)htonl((__u32) READ_ONCE(inet->uc_index)); goto copyval; - } - - if (needs_rtnl) - rtnl_lock(); - sockopt_lock_sock(sk); - - switch (optname) { case IP_MULTICAST_IF: { struct in_addr addr; len = min_t(unsigned int, len, sizeof(struct in_addr)); - addr.s_addr = inet->mc_addr; - sockopt_release_sock(sk); + addr.s_addr = READ_ONCE(inet->mc_addr); if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; @@ -1693,6 +1685,13 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; } + } + + if (needs_rtnl) + rtnl_lock(); + sockopt_lock_sock(sk); + + switch (optname) { case IP_MSFILTER: { struct ip_msfilter msf; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 66ad1f95af49..2c61f444e1c7 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -773,9 +773,9 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) - ipc.oif = inet->mc_index; + ipc.oif = READ_ONCE(inet->mc_index); if (!saddr) - saddr = inet->mc_addr; + saddr = READ_ONCE(inet->mc_addr); } else if (!ipc.oif) ipc.oif = READ_ONCE(inet->uc_index); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index e2357d23202e..27da9d7294c0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -579,9 +579,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) uc_index = READ_ONCE(inet->uc_index); if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) - ipc.oif = inet->mc_index; + ipc.oif = READ_ONCE(inet->mc_index); if (!saddr) - saddr = inet->mc_addr; + saddr = READ_ONCE(inet->mc_addr); } else if (!ipc.oif) { ipc.oif = uc_index; } else if (ipv4_is_lbcast(daddr) && uc_index) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1e0c3aba1e5a..7f7724beca33 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1177,9 +1177,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) uc_index = READ_ONCE(inet->uc_index); if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) - ipc.oif = inet->mc_index; + ipc.oif = READ_ONCE(inet->mc_index); if (!saddr) - saddr = inet->mc_addr; + saddr = READ_ONCE(inet->mc_addr); connected = 0; } else if (!ipc.oif) { ipc.oif = uc_index; -- cgit v1.2.3 From e7b34822fa4dcf6101deb3d51a77efd77533571d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:28:52 -0700 Subject: net: openvswitch: Annotate struct dp_meter_instance with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct dp_meter_instance. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Pravin B Shelar Cc: dev@openvswitch.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20230922172858.3822653-10-keescook@chromium.org Signed-off-by: Jakub Kicinski --- net/openvswitch/meter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h index 0c33889a8515..013de694221f 100644 --- a/net/openvswitch/meter.h +++ b/net/openvswitch/meter.h @@ -45,7 +45,7 @@ struct dp_meter { struct dp_meter_instance { struct rcu_head rcu; u32 n_meters; - struct dp_meter __rcu *dp_meters[]; + struct dp_meter __rcu *dp_meters[] __counted_by(n_meters); }; struct dp_meter_table { -- cgit v1.2.3 From 16ae53d80c00445c903128f2a64af87b5a03d474 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:28:54 -0700 Subject: net: openvswitch: Annotate struct dp_meter with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct dp_meter. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Pravin B Shelar Cc: dev@openvswitch.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20230922172858.3822653-12-keescook@chromium.org Signed-off-by: Jakub Kicinski --- net/openvswitch/meter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h index 013de694221f..ed11cd12b512 100644 --- a/net/openvswitch/meter.h +++ b/net/openvswitch/meter.h @@ -39,7 +39,7 @@ struct dp_meter { u32 max_delta_t; u64 used; struct ovs_flow_stats stats; - struct dp_meter_band bands[]; + struct dp_meter_band bands[] __counted_by(n_bands); }; struct dp_meter_instance { -- cgit v1.2.3 From a6b07a51b161ba1ad3d81919955fe77b697f9d48 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 21 Sep 2023 09:07:40 -0400 Subject: handshake: Fix sign of socket file descriptor fields Socket file descriptors are signed integers. Use nla_get/put_s32 for those to avoid implicit signed conversion in the netlink protocol. Signed-off-by: Chuck Lever Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/169530165057.8905.8650469415145814828.stgit@oracle-102.nfsv4bat.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/handshake.yaml | 4 ++-- net/handshake/genl.c | 2 +- net/handshake/netlink.c | 2 +- net/handshake/tlshd.c | 2 +- tools/net/ynl/generated/handshake-user.h | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml index 6d89e30f5fd5..a49b46b80e16 100644 --- a/Documentation/netlink/specs/handshake.yaml +++ b/Documentation/netlink/specs/handshake.yaml @@ -43,7 +43,7 @@ attribute-sets: attributes: - name: sockfd - type: u32 + type: s32 - name: handler-class type: u32 @@ -79,7 +79,7 @@ attribute-sets: type: u32 - name: sockfd - type: u32 + type: s32 - name: remote-auth type: u32 diff --git a/net/handshake/genl.c b/net/handshake/genl.c index 233be5cbfec9..f55d14d7b726 100644 --- a/net/handshake/genl.c +++ b/net/handshake/genl.c @@ -18,7 +18,7 @@ static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HAN /* HANDSHAKE_CMD_DONE - do */ static const struct nla_policy handshake_done_nl_policy[HANDSHAKE_A_DONE_REMOTE_AUTH + 1] = { [HANDSHAKE_A_DONE_STATUS] = { .type = NLA_U32, }, - [HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_U32, }, + [HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_S32, }, [HANDSHAKE_A_DONE_REMOTE_AUTH] = { .type = NLA_U32, }, }; diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c index d0bc1dd8e65a..64a0046dd611 100644 --- a/net/handshake/netlink.c +++ b/net/handshake/netlink.c @@ -163,7 +163,7 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info) if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD)) return -EINVAL; - fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]); + fd = nla_get_s32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]); sock = sockfd_lookup(fd, &err); if (!sock) diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index bbfb4095ddd6..7ac80201aa1f 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -214,7 +214,7 @@ static int tls_handshake_accept(struct handshake_req *req, goto out_cancel; ret = -EMSGSIZE; - ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_SOCKFD, fd); + ret = nla_put_s32(msg, HANDSHAKE_A_ACCEPT_SOCKFD, fd); if (ret < 0) goto out_cancel; ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_MESSAGE_TYPE, treq->th_type); diff --git a/tools/net/ynl/generated/handshake-user.h b/tools/net/ynl/generated/handshake-user.h index 47646bb91cea..f8e481fa9e09 100644 --- a/tools/net/ynl/generated/handshake-user.h +++ b/tools/net/ynl/generated/handshake-user.h @@ -65,7 +65,7 @@ struct handshake_accept_rsp { __u32 peername_len; } _present; - __u32 sockfd; + __s32 sockfd; enum handshake_msg_type message_type; __u32 timeout; enum handshake_auth auth_mode; @@ -104,7 +104,7 @@ struct handshake_done_req { } _present; __u32 status; - __u32 sockfd; + __s32 sockfd; unsigned int n_remote_auth; __u32 *remote_auth; }; @@ -122,7 +122,7 @@ handshake_done_req_set_status(struct handshake_done_req *req, __u32 status) req->status = status; } static inline void -handshake_done_req_set_sockfd(struct handshake_done_req *req, __u32 sockfd) +handshake_done_req_set_sockfd(struct handshake_done_req *req, __s32 sockfd) { req->_present.sockfd = 1; req->sockfd = sockfd; -- cgit v1.2.3 From 160f404495aa9282cac99b518d1b379e31ef1bdd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 21 Sep 2023 09:08:07 -0400 Subject: handshake: Fix sign of key_serial_t fields key_serial_t fields are signed integers. Use nla_get/put_s32 for those to avoid implicit signed conversion in the netlink protocol. Signed-off-by: Chuck Lever Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/169530167716.8905.645746457741372879.stgit@oracle-102.nfsv4bat.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/handshake.yaml | 4 ++-- net/handshake/tlshd.c | 4 ++-- tools/net/ynl/generated/handshake-user.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml index a49b46b80e16..b934cc513e3d 100644 --- a/Documentation/netlink/specs/handshake.yaml +++ b/Documentation/netlink/specs/handshake.yaml @@ -34,10 +34,10 @@ attribute-sets: attributes: - name: cert - type: u32 + type: s32 - name: privkey - type: u32 + type: s32 - name: accept attributes: diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index 7ac80201aa1f..d697f68c598c 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -173,9 +173,9 @@ static int tls_handshake_put_certificate(struct sk_buff *msg, if (!entry_attr) return -EMSGSIZE; - if (nla_put_u32(msg, HANDSHAKE_A_X509_CERT, + if (nla_put_s32(msg, HANDSHAKE_A_X509_CERT, treq->th_certificate) || - nla_put_u32(msg, HANDSHAKE_A_X509_PRIVKEY, + nla_put_s32(msg, HANDSHAKE_A_X509_PRIVKEY, treq->th_privkey)) { nla_nest_cancel(msg, entry_attr); return -EMSGSIZE; diff --git a/tools/net/ynl/generated/handshake-user.h b/tools/net/ynl/generated/handshake-user.h index f8e481fa9e09..2b34acc608de 100644 --- a/tools/net/ynl/generated/handshake-user.h +++ b/tools/net/ynl/generated/handshake-user.h @@ -28,8 +28,8 @@ struct handshake_x509 { __u32 privkey:1; } _present; - __u32 cert; - __u32 privkey; + __s32 cert; + __s32 privkey; }; /* ============== HANDSHAKE_CMD_ACCEPT ============== */ -- cgit v1.2.3 From cbc3a153222805d65f821e10f4f78b6afce06f86 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Sep 2023 22:03:53 +0000 Subject: tcp_metrics: add missing barriers on delete When removing an item from RCU protected list, we must prevent store-tearing, using rcu_assign_pointer() or WRITE_ONCE(). Fixes: 04f721c671656 ("tcp_metrics: Rewrite tcp_metrics_flush_all") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Acked-by: Neal Cardwell Signed-off-by: Paolo Abeni --- net/ipv4/tcp_metrics.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index c196759f1d3b..4bfa2fb27de5 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -908,7 +908,7 @@ static void tcp_metrics_flush_all(struct net *net) match = net ? net_eq(tm_net(tm), net) : !refcount_read(&tm_net(tm)->ns.count); if (match) { - *pp = tm->tcpm_next; + rcu_assign_pointer(*pp, tm->tcpm_next); kfree_rcu(tm, rcu_head); } else { pp = &tm->tcpm_next; @@ -949,7 +949,7 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) if (addr_same(&tm->tcpm_daddr, &daddr) && (!src || addr_same(&tm->tcpm_saddr, &saddr)) && net_eq(tm_net(tm), net)) { - *pp = tm->tcpm_next; + rcu_assign_pointer(*pp, tm->tcpm_next); kfree_rcu(tm, rcu_head); found = true; } else { -- cgit v1.2.3 From 081480014a64a69d901f8ef1ffdd56d6085cf87e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Sep 2023 22:03:54 +0000 Subject: tcp_metrics: properly set tp->snd_ssthresh in tcp_init_metrics() We need to set tp->snd_ssthresh to TCP_INFINITE_SSTHRESH in the case tcp_get_metrics() fails for some reason. Fixes: 9ad7c049f0f7 ("tcp: RFC2988bis + taking RTT sample from 3WHS for the passive open side") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Acked-by: Neal Cardwell Signed-off-by: Paolo Abeni --- net/ipv4/tcp_metrics.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 4bfa2fb27de5..0c03f564878f 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -470,6 +470,10 @@ void tcp_init_metrics(struct sock *sk) u32 val, crtt = 0; /* cached RTT scaled by 8 */ sk_dst_confirm(sk); + /* ssthresh may have been reduced unnecessarily during. + * 3WHS. Restore it back to its initial default. + */ + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; if (!dst) goto reset; @@ -489,11 +493,6 @@ void tcp_init_metrics(struct sock *sk) tp->snd_ssthresh = val; if (tp->snd_ssthresh > tp->snd_cwnd_clamp) tp->snd_ssthresh = tp->snd_cwnd_clamp; - } else { - /* ssthresh may have been reduced unnecessarily during. - * 3WHS. Restore it back to its initial default. - */ - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; } val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val && tp->reordering != val) -- cgit v1.2.3 From a135798e6e200ecb2f864cecca6d257ba278370c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Sep 2023 22:03:55 +0000 Subject: tcp_metrics: do not create an entry from tcp_init_metrics() tcp_init_metrics() only wants to get metrics if they were previously stored in the cache. Creating an entry is adding useless costs, especially when tcp_no_metrics_save is set. Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Acked-by: Neal Cardwell Signed-off-by: Paolo Abeni --- net/ipv4/tcp_metrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0c03f564878f..7aca12c59c18 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -478,7 +478,7 @@ void tcp_init_metrics(struct sock *sk) goto reset; rcu_read_lock(); - tm = tcp_get_metrics(sk, dst, true); + tm = tcp_get_metrics(sk, dst, false); if (!tm) { rcu_read_unlock(); goto reset; -- cgit v1.2.3 From 6532e257aa73645e28dee5b2232cc3c88be62083 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Sep 2023 22:03:56 +0000 Subject: tcp_metrics: optimize tcp_metrics_flush_all() This is inspired by several syzbot reports where tcp_metrics_flush_all() was seen in the traces. We can avoid acquiring tcp_metrics_lock for empty buckets, and we should add one cond_resched() to break potential long loops. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Acked-by: Neal Cardwell Signed-off-by: Paolo Abeni --- net/ipv4/tcp_metrics.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 7aca12c59c18..c2a925538542 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -898,11 +898,13 @@ static void tcp_metrics_flush_all(struct net *net) unsigned int row; for (row = 0; row < max_rows; row++, hb++) { - struct tcp_metrics_block __rcu **pp; + struct tcp_metrics_block __rcu **pp = &hb->chain; bool match; + if (!rcu_access_pointer(*pp)) + continue; + spin_lock_bh(&tcp_metrics_lock); - pp = &hb->chain; for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { match = net ? net_eq(tm_net(tm), net) : !refcount_read(&tm_net(tm)->ns.count); @@ -914,6 +916,7 @@ static void tcp_metrics_flush_all(struct net *net) } } spin_unlock_bh(&tcp_metrics_lock); + cond_resched(); } } -- cgit v1.2.3 From ef35bed6fad6eda8de0277eda77803c748f306d1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 24 Sep 2023 10:03:07 +0200 Subject: udp_tunnel: Use flex array to simplify code 'n_tables' is small, UDP_TUNNEL_NIC_MAX_TABLES = 4 as a maximum. So there is no real point to allocate the 'entries' pointers array with a dedicate memory allocation. Using a flexible array for struct udp_tunnel_nic->entries avoids the overhead of an additional memory allocation. This also saves an indirection when the array is accessed. Finally, __counted_by() can be used for run-time bounds checking if configured and supported by the compiler. Signed-off-by: Christophe JAILLET Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/4a096ba9cf981a588aa87235bb91e933ee162b3d.1695542544.git.christophe.jaillet@wanadoo.fr Signed-off-by: Paolo Abeni --- net/ipv4/udp_tunnel_nic.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index 029219749785..b6d2d16189c0 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -47,7 +47,7 @@ struct udp_tunnel_nic { unsigned int n_tables; unsigned long missed; - struct udp_tunnel_nic_table_entry **entries; + struct udp_tunnel_nic_table_entry *entries[] __counted_by(n_tables); }; /* We ensure all work structs are done using driver state, but not the code. @@ -725,16 +725,12 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, struct udp_tunnel_nic *utn; unsigned int i; - utn = kzalloc(sizeof(*utn), GFP_KERNEL); + utn = kzalloc(struct_size(utn, entries, n_tables), GFP_KERNEL); if (!utn) return NULL; utn->n_tables = n_tables; INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work); - utn->entries = kmalloc_array(n_tables, sizeof(void *), GFP_KERNEL); - if (!utn->entries) - goto err_free_utn; - for (i = 0; i < n_tables; i++) { utn->entries[i] = kcalloc(info->tables[i].n_entries, sizeof(*utn->entries[i]), GFP_KERNEL); @@ -747,8 +743,6 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, err_free_prev_entries: while (i--) kfree(utn->entries[i]); - kfree(utn->entries); -err_free_utn: kfree(utn); return NULL; } @@ -759,7 +753,6 @@ static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn) for (i = 0; i < utn->n_tables; i++) kfree(utn->entries[i]); - kfree(utn->entries); kfree(utn); } -- cgit v1.2.3 From f25e621f5d4c423fa7217afbddd427007b0e0ec0 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Sun, 24 Sep 2023 17:30:14 +0200 Subject: ipv6: mark address parameters of udp_tunnel6_xmit_skb() as const The function doesn't modify the addresses passed as input, mark them as 'const' to make that clear. Signed-off-by: Beniamino Galvani Reviewed-by: Guillaume Nault Link: https://lore.kernel.org/r/20230924153014.786962-1-b.galvani@gmail.com Signed-off-by: Paolo Abeni --- include/net/udp_tunnel.h | 5 +++-- net/ipv6/ip6_udp_tunnel.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 29251c3519cf..21ba0a25f936 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -154,8 +154,9 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, - struct net_device *dev, struct in6_addr *saddr, - struct in6_addr *daddr, + struct net_device *dev, + const struct in6_addr *saddr, + const struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck); diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index cdc4d4ee2420..70d38705c92f 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -75,8 +75,9 @@ EXPORT_SYMBOL_GPL(udp_sock_create6); int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, - struct net_device *dev, struct in6_addr *saddr, - struct in6_addr *daddr, + struct net_device *dev, + const struct in6_addr *saddr, + const struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck) { -- cgit v1.2.3 From fefe5dc4afeafe896c90d5b20b605f2759343c3b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 22 Sep 2023 15:31:04 +0200 Subject: net: dsa: propagate extack to ds->ops->port_hsr_join() Drivers can provide meaningful error messages which state a reason why they can't perform an offload, and dsa_slave_changeupper() already has the infrastructure to propagate these over netlink rather than printing to the kernel log. So pass the extack argument and modify the xrs700x driver's port_hsr_join() prototype. Also take the opportunity and use the extack for the 2 -EOPNOTSUPP cases from xrs700x_hsr_join(). Signed-off-by: Vladimir Oltean Signed-off-by: Lukasz Majewski Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- drivers/net/dsa/xrs700x/xrs700x.c | 18 ++++++++++++------ include/net/dsa.h | 3 ++- net/dsa/port.c | 5 +++-- net/dsa/port.h | 3 ++- net/dsa/slave.c | 2 +- 5 files changed, 20 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c index 753fef757f11..5b02e9e426fd 100644 --- a/drivers/net/dsa/xrs700x/xrs700x.c +++ b/drivers/net/dsa/xrs700x/xrs700x.c @@ -548,7 +548,8 @@ static void xrs700x_bridge_leave(struct dsa_switch *ds, int port, } static int xrs700x_hsr_join(struct dsa_switch *ds, int port, - struct net_device *hsr) + struct net_device *hsr, + struct netlink_ext_ack *extack) { unsigned int val = XRS_HSR_CFG_HSR_PRP; struct dsa_port *partner = NULL, *dp; @@ -562,16 +563,21 @@ static int xrs700x_hsr_join(struct dsa_switch *ds, int port, if (ret) return ret; - /* Only ports 1 and 2 can be HSR/PRP redundant ports. */ - if (port != 1 && port != 2) + if (port != 1 && port != 2) { + NL_SET_ERR_MSG_MOD(extack, + "Only ports 1 and 2 can offload HSR/PRP"); return -EOPNOTSUPP; + } - if (ver == HSR_V1) + if (ver == HSR_V1) { val |= XRS_HSR_CFG_HSR; - else if (ver == PRP_V1) + } else if (ver == PRP_V1) { val |= XRS_HSR_CFG_PRP; - else + } else { + NL_SET_ERR_MSG_MOD(extack, + "Only HSR v1 and PRP v1 can be offloaded"); return -EOPNOTSUPP; + } dsa_hsr_foreach_port(dp, ds, hsr) { if (dp->index != port) { diff --git a/include/net/dsa.h b/include/net/dsa.h index 0b9c6aa27047..426724808e76 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1198,7 +1198,8 @@ struct dsa_switch_ops { * HSR integration */ int (*port_hsr_join)(struct dsa_switch *ds, int port, - struct net_device *hsr); + struct net_device *hsr, + struct netlink_ext_ack *extack); int (*port_hsr_leave)(struct dsa_switch *ds, int port, struct net_device *hsr); diff --git a/net/dsa/port.c b/net/dsa/port.c index 37ab238e8304..5f01bd4f9dec 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -2024,7 +2024,8 @@ void dsa_shared_port_link_unregister_of(struct dsa_port *dp) dsa_shared_port_setup_phy_of(dp, false); } -int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr) +int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr, + struct netlink_ext_ack *extack) { struct dsa_switch *ds = dp->ds; int err; @@ -2034,7 +2035,7 @@ int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr) dp->hsr_dev = hsr; - err = ds->ops->port_hsr_join(ds, dp->index, hsr); + err = ds->ops->port_hsr_join(ds, dp->index, hsr, extack); if (err) dp->hsr_dev = NULL; diff --git a/net/dsa/port.h b/net/dsa/port.h index dc812512fd0e..334879964e2c 100644 --- a/net/dsa/port.h +++ b/net/dsa/port.h @@ -103,7 +103,8 @@ int dsa_port_phylink_create(struct dsa_port *dp); void dsa_port_phylink_destroy(struct dsa_port *dp); int dsa_shared_port_link_register_of(struct dsa_port *dp); void dsa_shared_port_link_unregister_of(struct dsa_port *dp); -int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); +int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr, + struct netlink_ext_ack *extack); void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast); void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 48db91b33390..2b3d89b77121 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2862,7 +2862,7 @@ static int dsa_slave_changeupper(struct net_device *dev, } } else if (is_hsr_master(info->upper_dev)) { if (info->linking) { - err = dsa_port_hsr_join(dp, info->upper_dev); + err = dsa_port_hsr_join(dp, info->upper_dev, extack); if (err == -EOPNOTSUPP) { NL_SET_ERR_MSG_WEAK_MOD(extack, "Offloading not supported"); -- cgit v1.2.3 From 6715042cd112e1db971583e2eed89b90d6f9b139 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 22 Sep 2023 15:31:05 +0200 Subject: net: dsa: notify drivers of MAC address changes on user ports In some cases, drivers may need to veto the changing of a MAC address on a user port. Such is the case with KSZ9477 when it offloads a HSR device, because it programs the MAC address of multiple ports to a shared hardware register. Those ports need to have equal MAC addresses for the lifetime of the HSR offload. Signed-off-by: Vladimir Oltean Signed-off-by: Lukasz Majewski Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- include/net/dsa.h | 10 ++++++++++ net/dsa/slave.c | 7 +++++++ 2 files changed, 17 insertions(+) (limited to 'net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 426724808e76..d98439ea6146 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -969,6 +969,16 @@ struct dsa_switch_ops { struct phy_device *phy); void (*port_disable)(struct dsa_switch *ds, int port); + + /* + * Notification for MAC address changes on user ports. Drivers can + * currently only veto operations. They should not use the method to + * program the hardware, since the operation is not rolled back in case + * of other errors. + */ + int (*port_set_mac_address)(struct dsa_switch *ds, int port, + const unsigned char *addr); + /* * Compatibility between device trees defining multiple CPU ports and * drivers which are not OK to use by default the numerically smallest diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 2b3d89b77121..4c3e502d7e16 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -457,6 +457,13 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + if (ds->ops->port_set_mac_address) { + err = ds->ops->port_set_mac_address(ds, dp->index, + addr->sa_data); + if (err) + return err; + } + /* If the port is down, the address isn't synced yet to hardware or * to the DSA master, so there is nothing to change. */ -- cgit v1.2.3 From 5e5db71a92c537ff37149b58bba6a8b2588a46f0 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Fri, 22 Sep 2023 15:31:06 +0200 Subject: net: dsa: tag_ksz: Extend ksz9477_xmit() for HSR frame duplication The KSZ9477 has support for HSR (High-Availability Seamless Redundancy). One of its offloading (i.e. performed in the switch IC hardware) features is to duplicate received frame to both HSR aware switch ports. To achieve this goal - the tail TAG needs to be modified. To be more specific, both ports must be marked as destination (egress) ones. The NETIF_F_HW_HSR_DUP flag indicates that the device supports HSR and assures (in HSR core code) that frame is sent only once from HOST to switch with tail tag indicating both ports. Signed-off-by: Lukasz Majewski Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- net/dsa/tag_ksz.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index ea100bd25939..3632e47dea9e 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -293,6 +293,14 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, if (is_link_local_ether_addr(hdr->h_dest)) val |= KSZ9477_TAIL_TAG_OVERRIDE; + if (dev->features & NETIF_F_HW_HSR_DUP) { + struct net_device *hsr_dev = dp->hsr_dev; + struct dsa_port *other_dp; + + dsa_hsr_foreach_port(other_dp, dp->ds, hsr_dev) + val |= BIT(other_dp->index); + } + *tag = cpu_to_be16(val); return ksz_defer_xmit(dp, skb); -- cgit v1.2.3 From 473267a4911f2469722c74ca58087d951072f72a Mon Sep 17 00:00:00 2001 From: Patrick Rohr Date: Mon, 25 Sep 2023 14:47:11 -0700 Subject: net: add sysctl to disable rfc4862 5.5.3e lifetime handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds a sysctl to opt-out of RFC4862 section 5.5.3e's valid lifetime derivation mechanism. RFC4862 section 5.5.3e prescribes that the valid lifetime in a Router Advertisement PIO shall be ignored if it less than 2 hours and to reset the lifetime of the corresponding address to 2 hours. An in-progress 6man draft (see draft-ietf-6man-slaac-renum-07 section 4.2) is currently looking to remove this mechanism. While this draft has not been moving particularly quickly for other reasons, there is widespread consensus on section 4.2 which updates RFC4862 section 5.5.3e. Cc: Maciej Żenczykowski Cc: Lorenzo Colitti Cc: Jen Linkova Signed-off-by: Patrick Rohr Reviewed-by: Jiri Pirko Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230925214711.959704-1-prohr@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 11 ++++++++++ include/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 38 ++++++++++++++++++++++------------ 3 files changed, 37 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 5bfa1837968c..f7dfde3b09a9 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2311,6 +2311,17 @@ accept_ra_pinfo - BOOLEAN - enabled if accept_ra is enabled. - disabled if accept_ra is disabled. +ra_honor_pio_life - BOOLEAN + Whether to use RFC4862 Section 5.5.3e to determine the valid + lifetime of an address matching a prefix sent in a Router + Advertisement Prefix Information Option. + + - If enabled, the PIO valid lifetime will always be honored. + - If disabled, RFC4862 section 5.5.3e is used to determine + the valid lifetime of the address. + + Default: 0 (disabled) + accept_ra_rt_info_min_plen - INTEGER Minimum prefix length of Route Information in RA. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e400ff757f13..5e605e384aac 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -82,6 +82,7 @@ struct ipv6_devconf { __u32 ioam6_id_wide; __u8 ioam6_enabled; __u8 ndisc_evict_nocarrier; + __u8 ra_honor_pio_life; struct ctl_table_header *sysctl_header; }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0b6ee962c84e..c2d471ad7922 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -236,6 +236,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, .ndisc_evict_nocarrier = 1, + .ra_honor_pio_life = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -297,6 +298,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, .ndisc_evict_nocarrier = 1, + .ra_honor_pio_life = 0, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -2657,22 +2659,23 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!create && stored_lft) { + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = !create && stored_lft; + + if (update_lft && !in6_dev->cnf.ra_honor_pio_life) { const u32 minimum_lft = min_t(u32, stored_lft, MIN_VALID_LIFETIME); valid_lft = max(valid_lft, minimum_lft); - - /* RFC4862 Section 5.5.3e: - * "Note that the preferred lifetime of the - * corresponding address is always reset to - * the Preferred Lifetime in the received - * Prefix Information option, regardless of - * whether the valid lifetime is also reset or - * ignored." - * - * So we should always update prefered_lft here. - */ - update_lft = 1; } if (update_lft) { @@ -6846,6 +6849,15 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "ra_honor_pio_life", + .data = &ipv6_devconf.ra_honor_pio_life, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #ifdef CONFIG_IPV6_ROUTER_PREF { .procname = "accept_ra_rtr_pref", -- cgit v1.2.3 From dd8bb80308c474ea0e7ddb244962cb2d7001bae2 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 25 Aug 2023 14:46:56 +0800 Subject: can: raw: Remove NULL check before dev_{put, hold} The call netdev_{put, hold} of dev_{put, hold} will check NULL, so there is no need to check before using dev_{put, hold}, remove it to silence the warning: ./net/can/raw.c:497:2-9: WARNING: NULL check before dev_{put, hold} functions is not needed. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6231 Signed-off-by: Jiapeng Chong Reported-by: Simon Horman Acked-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20230825064656.87751-1-jiapeng.chong@linux.alibaba.com Signed-off-by: Marc Kleine-Budde --- net/can/raw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/can/raw.c b/net/can/raw.c index 73468d2ebd51..e6b822624ba2 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -493,8 +493,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) out_put_dev: /* remove potential reference from dev_get_by_index() */ - if (dev) - dev_put(dev); + dev_put(dev); out: release_sock(sk); rtnl_unlock(); -- cgit v1.2.3 From 2b464cc2fd57c5a0250ae2f31505ac8e26e9748c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 28 Sep 2023 14:17:48 +0200 Subject: sctp: Spelling s/preceeding/preceding/g Fix a misspelling of "preceding". Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/663b14d07d6d716ddc34482834d6b65a2f714cfb.1695903447.git.geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- net/sctp/sm_make_chunk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 08527d882e56..f80208edd6a5 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3303,7 +3303,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, /* Process the TLVs contained within the ASCONF chunk. */ sctp_walk_params(param, addip) { - /* Skip preceeding address parameters. */ + /* Skip preceding address parameters. */ if (param.p->type == SCTP_PARAM_IPV4_ADDRESS || param.p->type == SCTP_PARAM_IPV6_ADDRESS) continue; -- cgit v1.2.3 From d86e5fbd4c965fdda72f99ccd54a1031ea4df51d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 Oct 2023 18:19:20 +0000 Subject: net: skb_queue_purge_reason() optimizations 1) Exit early if the list is empty. 2) splice the list into a local list, so that we block hard irqs only once. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231003181920.3280453-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2198979470ec..da3f96bdd6f6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3722,10 +3722,19 @@ EXPORT_SYMBOL(skb_dequeue_tail); void skb_queue_purge_reason(struct sk_buff_head *list, enum skb_drop_reason reason) { - struct sk_buff *skb; + struct sk_buff_head tmp; + unsigned long flags; + + if (skb_queue_empty_lockless(list)) + return; + + __skb_queue_head_init(&tmp); + + spin_lock_irqsave(&list->lock, flags); + skb_queue_splice_init(list, &tmp); + spin_unlock_irqrestore(&list->lock, flags); - while ((skb = skb_dequeue(list)) != NULL) - kfree_skb_reason(skb, reason); + __skb_queue_purge_reason(&tmp, reason); } EXPORT_SYMBOL(skb_queue_purge_reason); -- cgit v1.2.3 From 2ae45136a93839326f8fdc1286a4b0457da97ce8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Oct 2023 13:17:35 +0000 Subject: net_sched: sch_fq: remove q->ktime_cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that both enqueue() and dequeue() need to use ktime_get_ns(), there is no point wasting 8 bytes in struct fq_sched_data. This makes room for future fields. ;) Signed-off-by: Eric Dumazet Acked-by: Dave Taht Reviewed-by: Willem de Bruijn Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Paolo Abeni --- net/sched/sch_fq.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index d35419db7b94..818ac786379d 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -106,7 +106,6 @@ struct fq_sched_data { struct rb_root delayed; /* for rate limited flows */ u64 time_next_delayed_flow; - u64 ktime_cache; /* copy of last ktime_get_ns() */ unsigned long unthrottle_latency_ns; struct fq_flow internal; /* for non classified or high prio packets */ @@ -282,12 +281,13 @@ static void fq_gc(struct fq_sched_data *q, * * FQ can not use generic TCQ_F_CAN_BYPASS infrastructure. */ -static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb) +static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb, + u64 now) { const struct fq_sched_data *q = qdisc_priv(sch); const struct sock *sk; - if (fq_skb_cb(skb)->time_to_send > q->ktime_cache) + if (fq_skb_cb(skb)->time_to_send > now) return false; if (sch->q.qlen != 0) { @@ -317,7 +317,8 @@ static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb) return true; } -static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb) +static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb, + u64 now) { struct fq_sched_data *q = qdisc_priv(sch); struct rb_node **p, *parent; @@ -360,7 +361,7 @@ static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb) sk = (struct sock *)((hash << 1) | 1UL); } - if (fq_fastpath_check(sch, skb)) { + if (fq_fastpath_check(sch, skb, now)) { q->internal.stat_fastpath_packets++; return &q->internal; } @@ -497,9 +498,9 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) } static bool fq_packet_beyond_horizon(const struct sk_buff *skb, - const struct fq_sched_data *q) + const struct fq_sched_data *q, u64 now) { - return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon)); + return unlikely((s64)skb->tstamp > (s64)(now + q->horizon)); } static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, @@ -507,27 +508,28 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct fq_sched_data *q = qdisc_priv(sch); struct fq_flow *f; + u64 now; if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch, to_free); - q->ktime_cache = ktime_get_ns(); + now = ktime_get_ns(); if (!skb->tstamp) { - fq_skb_cb(skb)->time_to_send = q->ktime_cache; + fq_skb_cb(skb)->time_to_send = now; } else { /* Check if packet timestamp is too far in the future. */ - if (fq_packet_beyond_horizon(skb, q)) { + if (fq_packet_beyond_horizon(skb, q, now)) { if (q->horizon_drop) { q->stat_horizon_drops++; return qdisc_drop(skb, sch, to_free); } q->stat_horizon_caps++; - skb->tstamp = q->ktime_cache + q->horizon; + skb->tstamp = now + q->horizon; } fq_skb_cb(skb)->time_to_send = skb->tstamp; } - f = fq_classify(sch, skb); + f = fq_classify(sch, skb, now); if (f != &q->internal) { if (unlikely(f->qlen >= q->flow_plimit)) { @@ -602,7 +604,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) goto out; } - q->ktime_cache = now = ktime_get_ns(); + now = ktime_get_ns(); fq_check_throttled(q, now); begin: head = &q->new_flows; -- cgit v1.2.3 From 5579ee462dfe768297563a6083e21df52c3ad856 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Oct 2023 13:17:36 +0000 Subject: net_sched: export pfifo_fast prio2band[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pfifo_fast prio2band[] is renamed to sch_default_prio2band[] and exported because we want to share it in FQ. Signed-off-by: Eric Dumazet Acked-by: Dave Taht Reviewed-by: Willem de Bruijn Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Paolo Abeni --- include/net/sch_generic.h | 1 + net/sched/sch_generic.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f232512505f8..c7318c73cfd6 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -587,6 +587,7 @@ static inline void sch_tree_unlock(struct Qdisc *q) extern struct Qdisc noop_qdisc; extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_fast_ops; +extern const u8 sch_default_prio2band[TC_PRIO_MAX + 1]; extern struct Qdisc_ops mq_qdisc_ops; extern struct Qdisc_ops noqueue_qdisc_ops; extern const struct Qdisc_ops *default_qdisc_ops; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5d7e23f4cc0e..4195a4bc26ca 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -694,9 +694,10 @@ struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; -static const u8 prio2band[TC_PRIO_MAX + 1] = { - 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 +const u8 sch_default_prio2band[TC_PRIO_MAX + 1] = { + 1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }; +EXPORT_SYMBOL(sch_default_prio2band); /* 3-band FIFO queue: old style, but should be a bit faster than generic prio+fifo combination. @@ -721,7 +722,7 @@ static inline struct skb_array *band2list(struct pfifo_fast_priv *priv, static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, struct sk_buff **to_free) { - int band = prio2band[skb->priority & TC_PRIO_MAX]; + int band = sch_default_prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct skb_array *q = band2list(priv, band); unsigned int pkt_len = qdisc_pkt_len(skb); @@ -830,7 +831,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) { struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; - memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1); + memcpy(&opt.priomap, sch_default_prio2band, TC_PRIO_MAX + 1); if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; return skb->len; -- cgit v1.2.3 From 29f834aa326e659ed354c406056e94ea3d29706a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Oct 2023 13:17:37 +0000 Subject: net_sched: sch_fq: add 3 bands and WRR scheduling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before Google adopted FQ for its production servers, we had to ensure AF4 packets would get a higher share than BE1 ones. As discussed this week in Netconf 2023 in Paris, it is time to upstream this for public use. After this patch FQ can replace pfifo_fast, with the following differences : - FQ uses WRR instead of strict prio, to avoid starvation of low priority packets. - We make sure each band/prio tracks its own usage against sch->limit. This was done to make sure flood of low priority packets would not prevent AF4 packets to be queued. Contributed by Willem. - priomap can be changed, if needed (default value are the ones coming from pfifo_fast). In this patch, we set default band weights so that : - high prio (band=0) packets get 90% of the bandwidth if they compete with low prio (band=2) packets. - high prio packets get 75% of the bandwidth if they compete with medium prio (band=1) packets. Following patch in this series adds the possibility to tune the per-band weights. As we added many fields in 'struct fq_sched_data', we had to make sure to have the first cache line read-mostly, and avoid wasting precious cache lines. More optimizations are possible but will be sent separately. Signed-off-by: Eric Dumazet Acked-by: Dave Taht Reviewed-by: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Paolo Abeni --- include/uapi/linux/pkt_sched.h | 11 ++- net/sched/sch_fq.c | 204 ++++++++++++++++++++++++++++++++--------- 2 files changed, 171 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 579f641846b8..ec5ab44d41a2 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -941,15 +941,19 @@ enum { TCA_FQ_HORIZON_DROP, /* drop packets beyond horizon, or cap their EDT */ + TCA_FQ_PRIOMAP, /* prio2band */ + __TCA_FQ_MAX }; #define TCA_FQ_MAX (__TCA_FQ_MAX - 1) +#define FQ_BANDS 3 + struct tc_fq_qd_stats { __u64 gc_flows; - __u64 highprio_packets; - __u64 tcp_retrans; + __u64 highprio_packets; /* obsolete */ + __u64 tcp_retrans; /* obsolete */ __u64 throttled; __u64 flows_plimit; __u64 pkts_too_long; @@ -963,6 +967,9 @@ struct tc_fq_qd_stats { __u64 horizon_drops; __u64 horizon_caps; __u64 fastpath_packets; + __u64 band_drops[FQ_BANDS]; + __u32 band_pkt_count[FQ_BANDS]; + __u32 pad; }; /* Heavy-Hitter Filter */ diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 818ac786379d..081105801fa6 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -51,7 +51,8 @@ #include struct fq_skb_cb { - u64 time_to_send; + u64 time_to_send; + u8 band; }; static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb) @@ -84,32 +85,28 @@ struct fq_flow { u32 socket_hash; /* sk_hash */ int qlen; /* number of packets in flow queue */ -/* Second cache line, used in fq_dequeue() */ +/* Second cache line */ int credit; - /* 32bit hole on 64bit arches */ - + int band; struct fq_flow *next; /* next pointer in RR lists */ struct rb_node rate_node; /* anchor in q->delayed tree */ u64 time_next_packet; -} ____cacheline_aligned_in_smp; +}; struct fq_flow_head { struct fq_flow *first; struct fq_flow *last; }; -struct fq_sched_data { +struct fq_perband_flows { struct fq_flow_head new_flows; - struct fq_flow_head old_flows; + int credit; + int quantum; /* based on band nr : 576KB, 192KB, 64KB */ +}; - struct rb_root delayed; /* for rate limited flows */ - u64 time_next_delayed_flow; - unsigned long unthrottle_latency_ns; - - struct fq_flow internal; /* for non classified or high prio packets */ - +struct fq_sched_data { /* Read mostly cache line */ u32 quantum; @@ -125,10 +122,21 @@ struct fq_sched_data { u8 rate_enable; u8 fq_trees_log; u8 horizon_drop; + u8 prio2band[(TC_PRIO_MAX + 1) >> 2]; u32 timer_slack; /* hrtimer slack in ns */ /* Read/Write fields. */ + unsigned int band_nr; /* band being serviced in fq_dequeue() */ + + struct fq_perband_flows band_flows[FQ_BANDS]; + + struct fq_flow internal; /* fastpath queue. */ + struct rb_root delayed; /* for rate limited flows */ + u64 time_next_delayed_flow; + unsigned long unthrottle_latency_ns; + + u32 band_pkt_count[FQ_BANDS]; u32 flows; u32 inactive_flows; /* Flows with no packet to send. */ u32 throttled_flows; @@ -139,7 +147,7 @@ struct fq_sched_data { /* Seldom used fields. */ - u64 stat_internal_packets; /* aka highprio */ + u64 stat_band_drops[FQ_BANDS]; u64 stat_ce_mark; u64 stat_horizon_drops; u64 stat_horizon_caps; @@ -148,6 +156,12 @@ struct fq_sched_data { u64 stat_allocation_errors; }; +/* return the i-th 2-bit value ("crumb") */ +static u8 fq_prio2band(const u8 *prio2band, unsigned int prio) +{ + return (prio2band[prio / 4] >> (2 * (prio & 0x3))) & 0x3; +} + /* * f->tail and f->age share the same location. * We can use the low order bit to differentiate if this location points @@ -172,8 +186,19 @@ static bool fq_flow_is_throttled(const struct fq_flow *f) return f->next == &throttled; } -static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow) +enum new_flow { + NEW_FLOW, + OLD_FLOW +}; + +static void fq_flow_add_tail(struct fq_sched_data *q, struct fq_flow *flow, + enum new_flow list_sel) { + struct fq_perband_flows *pband = &q->band_flows[flow->band]; + struct fq_flow_head *head = (list_sel == NEW_FLOW) ? + &pband->new_flows : + &pband->old_flows; + if (head->first) head->last->next = flow; else @@ -186,7 +211,7 @@ static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f) { rb_erase(&f->rate_node, &q->delayed); q->throttled_flows--; - fq_flow_add_tail(&q->old_flows, f); + fq_flow_add_tail(q, f, OLD_FLOW); } static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f) @@ -326,11 +351,6 @@ static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb, struct rb_root *root; struct fq_flow *f; - /* warning: no starvation prevention... */ - if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) { - q->stat_internal_packets++; /* highprio packet */ - return &q->internal; - } /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket * or a listener (SYNCOOKIE mode) * 1) request sockets are not full blown, @@ -509,9 +529,13 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct fq_sched_data *q = qdisc_priv(sch); struct fq_flow *f; u64 now; + u8 band; - if (unlikely(sch->q.qlen >= sch->limit)) + band = fq_prio2band(q->prio2band, skb->priority & TC_PRIO_MAX); + if (unlikely(q->band_pkt_count[band] >= sch->limit)) { + q->stat_band_drops[band]++; return qdisc_drop(skb, sch, to_free); + } now = ktime_get_ns(); if (!skb->tstamp) { @@ -538,11 +562,14 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } if (fq_flow_is_detached(f)) { - fq_flow_add_tail(&q->new_flows, f); + fq_flow_add_tail(q, f, NEW_FLOW); if (time_after(jiffies, f->age + q->flow_refill_delay)) f->credit = max_t(u32, f->credit, q->quantum); } + f->band = band; + q->band_pkt_count[band]++; + fq_skb_cb(skb)->band = band; if (f->qlen == 0) q->inactive_flows--; } @@ -584,13 +611,26 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now) } } +static struct fq_flow_head *fq_pband_head_select(struct fq_perband_flows *pband) +{ + if (pband->credit <= 0) + return NULL; + + if (pband->new_flows.first) + return &pband->new_flows; + + return pband->old_flows.first ? &pband->old_flows : NULL; +} + static struct sk_buff *fq_dequeue(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); + struct fq_perband_flows *pband; struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; unsigned long rate; + int retry; u32 plen; u64 now; @@ -606,24 +646,31 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) now = ktime_get_ns(); fq_check_throttled(q, now); + retry = 0; + pband = &q->band_flows[q->band_nr]; begin: - head = &q->new_flows; - if (!head->first) { - head = &q->old_flows; - if (!head->first) { - if (q->time_next_delayed_flow != ~0ULL) - qdisc_watchdog_schedule_range_ns(&q->watchdog, + head = fq_pband_head_select(pband); + if (!head) { + while (++retry < FQ_BANDS) { + if (++q->band_nr == FQ_BANDS) + q->band_nr = 0; + pband = &q->band_flows[q->band_nr]; + pband->credit = min(pband->credit + pband->quantum, + pband->quantum); + goto begin; + } + if (q->time_next_delayed_flow != ~0ULL) + qdisc_watchdog_schedule_range_ns(&q->watchdog, q->time_next_delayed_flow, q->timer_slack); - return NULL; - } + return NULL; } f = head->first; - + retry = 0; if (f->credit <= 0) { f->credit += q->quantum; head->first = f->next; - fq_flow_add_tail(&q->old_flows, f); + fq_flow_add_tail(q, f, OLD_FLOW); goto begin; } @@ -645,12 +692,13 @@ begin: } if (--f->qlen == 0) q->inactive_flows++; + q->band_pkt_count[fq_skb_cb(skb)->band]--; fq_dequeue_skb(sch, f, skb); } else { head->first = f->next; /* force a pass through old_flows to prevent starvation */ - if ((head == &q->new_flows) && q->old_flows.first) { - fq_flow_add_tail(&q->old_flows, f); + if (head == &pband->new_flows) { + fq_flow_add_tail(q, f, OLD_FLOW); } else { fq_flow_set_detached(f); } @@ -658,6 +706,7 @@ begin: } plen = qdisc_pkt_len(skb); f->credit -= plen; + pband->credit -= plen; if (!q->rate_enable) goto out; @@ -749,8 +798,10 @@ static void fq_reset(struct Qdisc *sch) kmem_cache_free(fq_flow_cachep, f); } } - q->new_flows.first = NULL; - q->old_flows.first = NULL; + for (idx = 0; idx < FQ_BANDS; idx++) { + q->band_flows[idx].new_flows.first = NULL; + q->band_flows[idx].old_flows.first = NULL; + } q->delayed = RB_ROOT; q->flows = 0; q->inactive_flows = 0; @@ -864,8 +915,54 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 }, [TCA_FQ_HORIZON] = { .type = NLA_U32 }, [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 }, + [TCA_FQ_PRIOMAP] = { + .type = NLA_BINARY, + .len = sizeof(struct tc_prio_qopt), + }, }; +/* compress a u8 array with all elems <= 3 to an array of 2-bit fields */ +static void fq_prio2band_compress_crumb(const u8 *in, u8 *out) +{ + const int num_elems = TC_PRIO_MAX + 1; + int i; + + memset(out, 0, num_elems / 4); + for (i = 0; i < num_elems; i++) + out[i / 4] |= in[i] << (2 * (i & 0x3)); +} + +static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out) +{ + const int num_elems = TC_PRIO_MAX + 1; + int i; + + for (i = 0; i < num_elems; i++) + out[i] = fq_prio2band(in, i); +} + +static int fq_load_priomap(struct fq_sched_data *q, + const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + const struct tc_prio_qopt *map = nla_data(attr); + int i; + + if (map->bands != FQ_BANDS) { + NL_SET_ERR_MSG_MOD(extack, "FQ only supports 3 bands"); + return -EINVAL; + } + for (i = 0; i < TC_PRIO_MAX + 1; i++) { + if (map->priomap[i] >= FQ_BANDS) { + NL_SET_ERR_MSG_FMT_MOD(extack, "FQ priomap field %d maps to a too high band %d", + i, map->priomap[i]); + return -EINVAL; + } + } + fq_prio2band_compress_crumb(map->priomap, q->prio2band); + return 0; +} + static int fq_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -940,6 +1037,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, q->flow_refill_delay = usecs_to_jiffies(usecs_delay); } + if (!err && tb[TCA_FQ_PRIOMAP]) + err = fq_load_priomap(q, tb[TCA_FQ_PRIOMAP], extack); + if (tb[TCA_FQ_ORPHAN_MASK]) q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]); @@ -991,7 +1091,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct fq_sched_data *q = qdisc_priv(sch); - int err; + int i, err; sch->limit = 10000; q->flow_plimit = 100; @@ -1001,8 +1101,13 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, q->flow_max_rate = ~0UL; q->time_next_delayed_flow = ~0ULL; q->rate_enable = 1; - q->new_flows.first = NULL; - q->old_flows.first = NULL; + for (i = 0; i < FQ_BANDS; i++) { + q->band_flows[i].new_flows.first = NULL; + q->band_flows[i].old_flows.first = NULL; + } + q->band_flows[0].quantum = 9 << 16; + q->band_flows[1].quantum = 3 << 16; + q->band_flows[2].quantum = 1 << 16; q->delayed = RB_ROOT; q->fq_root = NULL; q->fq_trees_log = ilog2(1024); @@ -1017,6 +1122,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, /* Default ce_threshold of 4294 seconds */ q->ce_threshold = (u64)NSEC_PER_USEC * ~0U; + fq_prio2band_compress_crumb(sch_default_prio2band, q->prio2band); qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC); if (opt) @@ -1031,6 +1137,9 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct fq_sched_data *q = qdisc_priv(sch); u64 ce_threshold = q->ce_threshold; + struct tc_prio_qopt prio = { + .bands = FQ_BANDS, + }; u64 horizon = q->horizon; struct nlattr *opts; @@ -1062,6 +1171,10 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop)) goto nla_put_failure; + fq_prio2band_decompress_crumb(q->prio2band, prio.priomap); + if (nla_put(skb, TCA_FQ_PRIOMAP, sizeof(prio), &prio)) + goto nla_put_failure; + return nla_nest_end(skb, opts); nla_put_failure: @@ -1072,11 +1185,14 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_sched_data *q = qdisc_priv(sch); struct tc_fq_qd_stats st; + int i; + + st.pad = 0; sch_tree_lock(sch); st.gc_flows = q->stat_gc_flows; - st.highprio_packets = q->stat_internal_packets; + st.highprio_packets = 0; st.fastpath_packets = q->internal.stat_fastpath_packets; st.tcp_retrans = 0; st.throttled = q->stat_throttled; @@ -1093,6 +1209,10 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.ce_mark = q->stat_ce_mark; st.horizon_drops = q->stat_horizon_drops; st.horizon_caps = q->stat_horizon_caps; + for (i = 0; i < FQ_BANDS; i++) { + st.band_drops[i] = q->stat_band_drops[i]; + st.band_pkt_count[i] = q->band_pkt_count[i]; + } sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); @@ -1120,7 +1240,7 @@ static int __init fq_module_init(void) fq_flow_cachep = kmem_cache_create("fq_flow_cache", sizeof(struct fq_flow), - 0, 0, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); if (!fq_flow_cachep) return -ENOMEM; -- cgit v1.2.3 From 49e7265fd098fdade2bbdd9331e6b914cda7fa83 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Oct 2023 13:17:38 +0000 Subject: net_sched: sch_fq: add TCA_FQ_WEIGHTS attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This attribute can be used to tune the per band weight and report them in "tc qdisc show" output: qdisc fq 802f: parent 1:9 limit 100000p flow_limit 500p buckets 1024 orphan_mask 1023 quantum 8364b initial_quantum 41820b low_rate_threshold 550Kbit refill_delay 40ms timer_slack 10us horizon 10s horizon_drop bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 weights 589824 196608 65536 Sent 236460814 bytes 792991 pkt (dropped 0, overlimits 0 requeues 0) rate 25816bit 10pps backlog 0b 0p requeues 0 flows 4 (inactive 4 throttled 0) gc 0 throttled 19 latency 17.6us fastpath 773882 Signed-off-by: Eric Dumazet Acked-by: Dave Taht Reviewed-by: Willem de Bruijn Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Paolo Abeni --- include/uapi/linux/pkt_sched.h | 3 +++ net/sched/sch_fq.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'net') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index ec5ab44d41a2..f762a10bfb78 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -943,12 +943,15 @@ enum { TCA_FQ_PRIOMAP, /* prio2band */ + TCA_FQ_WEIGHTS, /* Weights for each band */ + __TCA_FQ_MAX }; #define TCA_FQ_MAX (__TCA_FQ_MAX - 1) #define FQ_BANDS 3 +#define FQ_MIN_WEIGHT 16384 struct tc_fq_qd_stats { __u64 gc_flows; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 081105801fa6..8eacdb54e72f 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -919,6 +919,10 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { .type = NLA_BINARY, .len = sizeof(struct tc_prio_qopt), }, + [TCA_FQ_WEIGHTS] = { + .type = NLA_BINARY, + .len = FQ_BANDS * sizeof(s32), + }, }; /* compress a u8 array with all elems <= 3 to an array of 2-bit fields */ @@ -941,6 +945,25 @@ static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out) out[i] = fq_prio2band(in, i); } +static int fq_load_weights(struct fq_sched_data *q, + const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + s32 *weights = nla_data(attr); + int i; + + for (i = 0; i < FQ_BANDS; i++) { + if (weights[i] < FQ_MIN_WEIGHT) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Weight %d less that minimum allowed %d", + weights[i], FQ_MIN_WEIGHT); + return -EINVAL; + } + } + for (i = 0; i < FQ_BANDS; i++) + q->band_flows[i].quantum = weights[i]; + return 0; +} + static int fq_load_priomap(struct fq_sched_data *q, const struct nlattr *attr, struct netlink_ext_ack *extack) @@ -1040,6 +1063,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, if (!err && tb[TCA_FQ_PRIOMAP]) err = fq_load_priomap(q, tb[TCA_FQ_PRIOMAP], extack); + if (!err && tb[TCA_FQ_WEIGHTS]) + err = fq_load_weights(q, tb[TCA_FQ_WEIGHTS], extack); + if (tb[TCA_FQ_ORPHAN_MASK]) q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]); @@ -1142,6 +1168,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) }; u64 horizon = q->horizon; struct nlattr *opts; + s32 weights[3]; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) @@ -1175,6 +1202,12 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_FQ_PRIOMAP, sizeof(prio), &prio)) goto nla_put_failure; + weights[0] = q->band_flows[0].quantum; + weights[1] = q->band_flows[1].quantum; + weights[2] = q->band_flows[2].quantum; + if (nla_put(skb, TCA_FQ_WEIGHTS, sizeof(weights), &weights)) + goto nla_put_failure; + return nla_nest_end(skb, opts); nla_put_failure: -- cgit v1.2.3 From 0fef0907d6faaab280d052e385d0da876182a1d2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 Oct 2023 16:18:23 -0700 Subject: netem: Annotate struct disttable with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct disttable. Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Link: https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci [1] Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Reviewed-by: Stephen Hemminger Link: https://lore.kernel.org/r/20231003231823.work.684-kees@kernel.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 4ad39a4a3cf5..6ba2dc191ed9 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -67,7 +67,7 @@ struct disttable { u32 size; - s16 table[]; + s16 table[] __counted_by(size); }; struct netem_sched_data { -- cgit v1.2.3 From eaede99c3aeb38613c40a150f676f772faf2b42b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 Oct 2023 16:21:02 -0700 Subject: netlink: Annotate struct netlink_policy_dump_state with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct netlink_policy_dump_state. Additionally update the size of the usage array length before accessing it. This requires remembering the old size for the memset() and later assignments. Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Johannes Berg Cc: netdev@vger.kernel.org Link: https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci [1] Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/netlink/policy.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netlink/policy.c b/net/netlink/policy.c index 87e3de0fde89..e2f111edf66c 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -21,7 +21,7 @@ struct netlink_policy_dump_state { struct { const struct nla_policy *policy; unsigned int maxtype; - } policies[]; + } policies[] __counted_by(n_alloc); }; static int add_policy(struct netlink_policy_dump_state **statep, @@ -29,7 +29,7 @@ static int add_policy(struct netlink_policy_dump_state **statep, unsigned int maxtype) { struct netlink_policy_dump_state *state = *statep; - unsigned int n_alloc, i; + unsigned int old_n_alloc, n_alloc, i; if (!policy || !maxtype) return 0; @@ -52,12 +52,13 @@ static int add_policy(struct netlink_policy_dump_state **statep, if (!state) return -ENOMEM; - memset(&state->policies[state->n_alloc], 0, - flex_array_size(state, policies, n_alloc - state->n_alloc)); - - state->policies[state->n_alloc].policy = policy; - state->policies[state->n_alloc].maxtype = maxtype; + old_n_alloc = state->n_alloc; state->n_alloc = n_alloc; + memset(&state->policies[old_n_alloc], 0, + flex_array_size(state, policies, n_alloc - old_n_alloc)); + + state->policies[old_n_alloc].policy = policy; + state->policies[old_n_alloc].maxtype = maxtype; *statep = state; return 0; -- cgit v1.2.3 From b3783e5efde4201b2cc7a2fee41791b413137f4c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 Oct 2023 16:17:41 -0700 Subject: net/packet: Annotate struct packet_fanout with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct packet_fanout. Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Willem de Bruijn Cc: Anqi Shen Cc: netdev@vger.kernel.org Link: https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci [1] Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/packet/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/packet/internal.h b/net/packet/internal.h index 63f4865202c1..d29c94c45159 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -94,7 +94,7 @@ struct packet_fanout { spinlock_t lock; refcount_t sk_ref; struct packet_type prot_hook ____cacheline_aligned_in_smp; - struct sock __rcu *arr[]; + struct sock __rcu *arr[] __counted_by(max_num_members); }; struct packet_rollover { -- cgit v1.2.3 From 48533eca606efcf63ef4080ded5618e0b17ee3d7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2023 11:45:04 +0000 Subject: net: sock_dequeue_err_skb() optimization Exit early if the list is empty. Some applications using TCP zerocopy are calling recvmsg( ... MSG_ERRQUEUE) and hit this case quite often, probably because busy polling only deals with sk_receive_queue. Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20231005114504.642589-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index da3f96bdd6f6..0401f40973a5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5162,6 +5162,9 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) bool icmp_next = false; unsigned long flags; + if (skb_queue_empty_lockless(q)) + return NULL; + spin_lock_irqsave(&q->lock, flags); skb = __skb_dequeue(q); if (skb && (skb_next = skb_peek(q))) { -- cgit v1.2.3 From dab4e1f06cabb6834de14264394ccab197007302 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Sat, 7 Oct 2023 10:14:14 +0200 Subject: bpf: Derive source IP addr via bpf_*_fib_lookup() Extend the bpf_fib_lookup() helper by making it to return the source IPv4/IPv6 address if the BPF_FIB_LOOKUP_SRC flag is set. For example, the following snippet can be used to derive the desired source IP address: struct bpf_fib_lookup p = { .ipv4_dst = ip4->daddr }; ret = bpf_skb_fib_lookup(skb, p, sizeof(p), BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH); if (ret != BPF_FIB_LKUP_RET_SUCCESS) return TC_ACT_SHOT; /* the p.ipv4_src now contains the source address */ The inability to derive the proper source address may cause malfunctions in BPF-based dataplanes for hosts containing netdevs with more than one routable IP address or for multi-homed hosts. For example, Cilium implements packet masquerading in BPF. If an egressing netdev to which the Cilium's BPF prog is attached has multiple IP addresses, then only one [hardcoded] IP address can be used for masquerading. This breaks connectivity if any other IP address should have been selected instead, for example, when a public and private addresses are attached to the same egress interface. The change was tested with Cilium [1]. Nikolay Aleksandrov helped to figure out the IPv6 addr selection. [1]: https://github.com/cilium/cilium/pull/28283 Signed-off-by: Martynas Pumputis Link: https://lore.kernel.org/r/20231007081415.33502-2-m@lambda.lt Signed-off-by: Martin KaFai Lau --- include/net/ipv6_stubs.h | 5 +++++ include/uapi/linux/bpf.h | 10 ++++++++++ net/core/filter.c | 18 +++++++++++++++++- net/ipv6/af_inet6.c | 1 + tools/include/uapi/linux/bpf.h | 10 ++++++++++ 5 files changed, 43 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index c48186bf4737..21da31e1dff5 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -85,6 +85,11 @@ struct ipv6_bpf_stub { sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); + int (*ipv6_dev_get_saddr)(struct net *net, + const struct net_device *dst_dev, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a7d4a1a69f21..e0aa457f94a9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3264,6 +3264,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6964,6 +6969,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6976,6 +6982,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -7010,6 +7017,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ diff --git a/net/core/filter.c b/net/core/filter.c index a094694899c9..3880bf0b740d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5850,6 +5850,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.fi->fib_priority; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) + params->ipv4_src = fib_result_prefsrc(net, &res); + /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here */ @@ -5992,6 +5995,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) { + if (res.f6i->fib6_prefsrc.plen) { + *src = res.f6i->fib6_prefsrc.addr; + } else { + err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, + &fl6.daddr, 0, + src); + if (err) + return BPF_FIB_LKUP_RET_NO_SRC_ADDR; + } + } + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) goto set_fwd_params; @@ -6010,7 +6025,8 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ + BPF_FIB_LOOKUP_SRC) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c6ad0d6e99b5..6337fb4504fd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1061,6 +1061,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, + .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a7d4a1a69f21..e0aa457f94a9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3264,6 +3264,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6964,6 +6969,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6976,6 +6982,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -7010,6 +7017,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ -- cgit v1.2.3 From 95b9a87c6a6b708cccda1f9b7baf9920b80cdabf Mon Sep 17 00:00:00 2001 From: David Morley Date: Fri, 6 Oct 2023 01:18:40 +0000 Subject: tcp: record last received ipv6 flowlabel In order to better estimate whether a data packet has been retransmitted or is the result of a TLP, we save the last received ipv6 flowlabel. To make space for this field we resize the "ato" field in inet_connection_sock as the current value of TCP_DELACK_MAX can be fully contained in 8 bits and add a compile_time_assert ensuring this field is the required size. v2: addressed kernel bot feedback about dccp_delack_timer() v3: addressed build error introduced by commit bbf80d713fe7 ("tcp: derive delack_max from rto_min") Signed-off-by: David Morley Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Tested-by: David Morley Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/inet_connection_sock.h | 5 ++++- include/net/tcp.h | 2 ++ net/dccp/timer.c | 4 ++-- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_input.c | 15 +++++++++++++++ net/ipv4/tcp_timer.c | 2 +- 6 files changed, 26 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 5d2fcc137b88..d6d9d1c1985a 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -114,7 +114,10 @@ struct inet_connection_sock { __u8 quick; /* Scheduled number of quick acks */ __u8 pingpong; /* The session is interactive */ __u8 retry; /* Number of attempts */ - __u32 ato; /* Predicted tick of soft clock */ + #define ATO_BITS 8 + __u32 ato:ATO_BITS, /* Predicted tick of soft clock */ + lrcv_flowlabel:20, /* last received ipv6 flowlabel */ + unused:4; unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ __u16 last_seg_size; /* Size of last incoming segment */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 9eb0a2855311..7fdedf5c71f0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -131,6 +131,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */ #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */ +static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); + #if HZ >= 100 #define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */ #define TCP_ATO_MIN ((unsigned)(HZ/25)) diff --git a/net/dccp/timer.c b/net/dccp/timer.c index b3255e87cc7e..a4cfb47b60e5 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -196,8 +196,8 @@ static void dccp_delack_timer(struct timer_list *t) if (inet_csk_ack_scheduled(sk)) { if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, - icsk->icsk_rto); + icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, + icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9a8b134d8ada..faabb5a4a378 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3756,8 +3756,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_options |= TCPI_OPT_SYN_DATA; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); - info->tcpi_ato = jiffies_to_usecs(min(icsk->icsk_ack.ato, - tcp_delack_max(sk))); + info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato, + tcp_delack_max(sk))); info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4b8f2e74d71d..2ae4b211c125 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -778,6 +778,16 @@ new_measure: tp->rcvq_space.time = tp->tcp_mstamp; } +static void tcp_save_lrcv_flowlabel(struct sock *sk, const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet_connection_sock *icsk = inet_csk(sk); + + if (skb->protocol == htons(ETH_P_IPV6)) + icsk->icsk_ack.lrcv_flowlabel = ntohl(ip6_flowlabel(ipv6_hdr(skb))); +#endif +} + /* There is something which you must keep in mind when you analyze the * behavior of the tp->ato delayed ack timeout interval. When a * connection starts up, we want to ack as quickly as possible. The @@ -826,6 +836,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) } } icsk->icsk_ack.lrcvtime = now; + tcp_save_lrcv_flowlabel(sk, skb); tcp_ecn_check_ce(sk, skb); @@ -4519,6 +4530,9 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq && sk_rethink_txhash(sk)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH); + + /* Save last flowlabel after a spurious retrans. */ + tcp_save_lrcv_flowlabel(sk, skb); } static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) @@ -4835,6 +4849,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) u32 seq, end_seq; bool fragstolen; + tcp_save_lrcv_flowlabel(sk, skb); tcp_ecn_check_ce(sk, skb); if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3f61c6a70a1f..0862b73dd3b5 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -322,7 +322,7 @@ void tcp_delack_timer_handler(struct sock *sk) if (inet_csk_ack_scheduled(sk)) { if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); + icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. -- cgit v1.2.3 From 939463016b7a869d8b407cfcda4d6545de399698 Mon Sep 17 00:00:00 2001 From: David Morley Date: Fri, 6 Oct 2023 01:18:41 +0000 Subject: tcp: change data receiver flowlabel after one dup This commit changes the data receiver repath behavior to occur after receiving a single duplicate. This can help recover ACK connectivity quicker if a TLP was sent along a nonworking path. For instance, consider the case where we have an initially nonworking forward path and reverse path and subsequently switch to only working forward paths. Before this patch we would have the following behavior. +---------+--------+--------+----------+----------+----------+ | Event | For FL | Rev FL | FP Works | RP Works | Data Del | +---------+--------+--------+----------+----------+----------+ | Initial | A | 1 | N | N | 0 | +---------+--------+--------+----------+----------+----------+ | TLP | A | 1 | N | N | 0 | +---------+--------+--------+----------+----------+----------+ | RTO 1 | B | 1 | Y | N | 1 | +---------+--------+--------+----------+----------+----------+ | RTO 2 | C | 1 | Y | N | 2 | +---------+--------+--------+----------+----------+----------+ | RTO 3 | D | 2 | Y | Y | 3 | +---------+--------+--------+----------+----------+----------+ This patch gets rid of at least RTO 3, avoiding additional unnecessary repaths of a working forward path to a (potentially) nonworking one. In addition, this commit changes the behavior to avoid repathing upon rx of duplicate data if the local endpoint is in CA_Loss (in which case the RTOs will already be changing the outgoing flowlabel). Signed-off-by: David Morley Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Tested-by: David Morley Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/ipv4/tcp_input.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2ae4b211c125..ab87f0285b72 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4524,15 +4524,23 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) { /* When the ACK path fails or drops most ACKs, the sender would * timeout and spuriously retransmit the same segment repeatedly. - * The receiver remembers and reflects via DSACKs. Leverage the - * DSACK state and change the txhash to re-route speculatively. + * If it seems our ACKs are not reaching the other side, + * based on receiving a duplicate data segment with new flowlabel + * (suggesting the sender suffered an RTO), and we are not already + * repathing due to our own RTO, then rehash the socket to repath our + * packets. */ - if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq && +#if IS_ENABLED(CONFIG_IPV6) + if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss && + skb->protocol == htons(ETH_P_IPV6) && + (tcp_sk(sk)->inet_conn.icsk_ack.lrcv_flowlabel != + ntohl(ip6_flowlabel(ipv6_hdr(skb)))) && sk_rethink_txhash(sk)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH); /* Save last flowlabel after a spurious retrans. */ tcp_save_lrcv_flowlabel(sk, skb); +#endif } static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) -- cgit v1.2.3 From afed2b54c5403393986c3b3555152dfd4ab7998a Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 29 Sep 2023 21:19:18 +0200 Subject: netfilter: nf_tables: Always allocate nft_rule_dump_ctx It will move into struct netlink_callback's scratch area later, just put nf_tables_dump_rules_start in shape to reduce churn later. Suggested-by: Pablo Neira Ayuso Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 48 +++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b4405db710b0..ea30bee41a6e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3521,10 +3521,10 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (family != NFPROTO_UNSPEC && family != table->family) continue; - if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) + if (ctx->table && strcmp(ctx->table, table->name) != 0) continue; - if (ctx && ctx->table && ctx->chain) { + if (ctx->table && ctx->chain) { struct rhlist_head *list, *tmp; list = rhltable_lookup(&table->chains_ht, ctx->chain, @@ -3548,7 +3548,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, goto done; } - if (ctx && ctx->table) + if (ctx->table) break; } done: @@ -3563,27 +3563,23 @@ static int nf_tables_dump_rules_start(struct netlink_callback *cb) const struct nlattr * const *nla = cb->data; struct nft_rule_dump_ctx *ctx = NULL; - if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) { - ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); - if (!ctx) - return -ENOMEM; + ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); + if (!ctx) + return -ENOMEM; - if (nla[NFTA_RULE_TABLE]) { - ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], - GFP_ATOMIC); - if (!ctx->table) { - kfree(ctx); - return -ENOMEM; - } + if (nla[NFTA_RULE_TABLE]) { + ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC); + if (!ctx->table) { + kfree(ctx); + return -ENOMEM; } - if (nla[NFTA_RULE_CHAIN]) { - ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], - GFP_ATOMIC); - if (!ctx->chain) { - kfree(ctx->table); - kfree(ctx); - return -ENOMEM; - } + } + if (nla[NFTA_RULE_CHAIN]) { + ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], GFP_ATOMIC); + if (!ctx->chain) { + kfree(ctx->table); + kfree(ctx); + return -ENOMEM; } } @@ -3595,11 +3591,9 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb) { struct nft_rule_dump_ctx *ctx = cb->data; - if (ctx) { - kfree(ctx->table); - kfree(ctx->chain); - kfree(ctx); - } + kfree(ctx->table); + kfree(ctx->chain); + kfree(ctx); return 0; } -- cgit v1.2.3 From 30fa41a0f6df4c85790cc6499ddc4a926a113bfa Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 29 Sep 2023 21:19:19 +0200 Subject: netfilter: nf_tables: Drop pointless memset when dumping rules None of the dump callbacks uses netlink_callback::args beyond the first element, no need to zero the data. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ea30bee41a6e..cd3c7dd15530 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3465,10 +3465,6 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, goto cont_skip; if (*idx < s_idx) goto cont; - if (*idx > s_idx) { - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - } if (prule) handle = prule->handle; else -- cgit v1.2.3 From 405c8fd62d612dd0e1d5ca59903449616453a56d Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 29 Sep 2023 21:19:20 +0200 Subject: netfilter: nf_tables: Carry reset flag in nft_rule_dump_ctx This relieves the dump callback from having to check nlmsg_type upon each call and instead performs the check once in .start callback. Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index cd3c7dd15530..567c414351da 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3443,15 +3443,16 @@ static void audit_log_rule_reset(const struct nft_table *table, struct nft_rule_dump_ctx { char *table; char *chain; + bool reset; }; static int __nf_tables_dump_rules(struct sk_buff *skb, unsigned int *idx, struct netlink_callback *cb, const struct nft_table *table, - const struct nft_chain *chain, - bool reset) + const struct nft_chain *chain) { + struct nft_rule_dump_ctx *ctx = cb->data; struct net *net = sock_net(skb->sk); const struct nft_rule *rule, *prule; unsigned int s_idx = cb->args[0]; @@ -3475,7 +3476,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, NFT_MSG_NEWRULE, NLM_F_MULTI | NLM_F_APPEND, table->family, - table, chain, rule, handle, reset) < 0) { + table, chain, rule, handle, ctx->reset) < 0) { ret = 1; break; } @@ -3487,7 +3488,7 @@ cont_skip: (*idx)++; } - if (reset && entries) + if (ctx->reset && entries) audit_log_rule_reset(table, cb->seq, entries); return ret; @@ -3504,10 +3505,6 @@ static int nf_tables_dump_rules(struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nftables_pernet *nft_net; - bool reset = false; - - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) - reset = true; rcu_read_lock(); nft_net = nft_pernet(net); @@ -3532,7 +3529,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (!nft_is_active(net, chain)) continue; __nf_tables_dump_rules(skb, &idx, - cb, table, chain, reset); + cb, table, chain); break; } goto done; @@ -3540,7 +3537,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry_rcu(chain, &table->chains, list) { if (__nf_tables_dump_rules(skb, &idx, - cb, table, chain, reset)) + cb, table, chain)) goto done; } @@ -3578,6 +3575,8 @@ static int nf_tables_dump_rules_start(struct netlink_callback *cb) return -ENOMEM; } } + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) + ctx->reset = true; cb->data = ctx; return 0; -- cgit v1.2.3 From 8194d599bc01bc6e89b14af436803cf90d0a8650 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 29 Sep 2023 21:19:21 +0200 Subject: netfilter: nf_tables: Carry s_idx in nft_rule_dump_ctx In order to move the context into struct netlink_callback's scratch area, the latter must be unused first. Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 567c414351da..a2e6c826bd08 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3441,6 +3441,7 @@ static void audit_log_rule_reset(const struct nft_table *table, } struct nft_rule_dump_ctx { + unsigned int s_idx; char *table; char *chain; bool reset; @@ -3455,7 +3456,6 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, struct nft_rule_dump_ctx *ctx = cb->data; struct net *net = sock_net(skb->sk); const struct nft_rule *rule, *prule; - unsigned int s_idx = cb->args[0]; unsigned int entries = 0; int ret = 0; u64 handle; @@ -3464,7 +3464,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_is_active(net, rule)) goto cont_skip; - if (*idx < s_idx) + if (*idx < ctx->s_idx) goto cont; if (prule) handle = prule->handle; @@ -3498,7 +3498,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - const struct nft_rule_dump_ctx *ctx = cb->data; + struct nft_rule_dump_ctx *ctx = cb->data; struct nft_table *table; const struct nft_chain *chain; unsigned int idx = 0; @@ -3547,7 +3547,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, done: rcu_read_unlock(); - cb->args[0] = idx; + ctx->s_idx = idx; return skb->len; } -- cgit v1.2.3 From 99ab9f84b85ec3eec099278bff61269ad0b078ce Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 29 Sep 2023 21:19:22 +0200 Subject: netfilter: nf_tables: Don't allocate nft_rule_dump_ctx Since struct netlink_callback::args is not used by rule dumpers anymore, use it to hold nft_rule_dump_ctx. Add a build-time check to make sure it won't ever exceed the available space. Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a2e6c826bd08..68321345bb6d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3453,7 +3453,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, const struct nft_table *table, const struct nft_chain *chain) { - struct nft_rule_dump_ctx *ctx = cb->data; + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); const struct nft_rule *rule, *prule; unsigned int entries = 0; @@ -3498,7 +3498,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - struct nft_rule_dump_ctx *ctx = cb->data; + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; struct nft_table *table; const struct nft_chain *chain; unsigned int idx = 0; @@ -3553,42 +3553,35 @@ done: static int nf_tables_dump_rules_start(struct netlink_callback *cb) { + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; const struct nlattr * const *nla = cb->data; - struct nft_rule_dump_ctx *ctx = NULL; - ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); - if (!ctx) - return -ENOMEM; + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); if (nla[NFTA_RULE_TABLE]) { ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC); - if (!ctx->table) { - kfree(ctx); + if (!ctx->table) return -ENOMEM; - } } if (nla[NFTA_RULE_CHAIN]) { ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], GFP_ATOMIC); if (!ctx->chain) { kfree(ctx->table); - kfree(ctx); return -ENOMEM; } } if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) ctx->reset = true; - cb->data = ctx; return 0; } static int nf_tables_dump_rules_done(struct netlink_callback *cb) { - struct nft_rule_dump_ctx *ctx = cb->data; + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; kfree(ctx->table); kfree(ctx->chain); - kfree(ctx); return 0; } -- cgit v1.2.3 From 8a23f4ab92f9b7f258ca28cce2e34b80f56ab9d1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 6 Oct 2023 11:27:29 +0200 Subject: netfilter: conntrack: simplify nf_conntrack_alter_reply nf_conntrack_alter_reply doesn't do helper reassignment anymore. Remove the comments that make this claim. Furthermore, remove dead code from the function and place ot in nf_conntrack.h. Signed-off-by: Florian Westphal --- include/net/netfilter/nf_conntrack.h | 14 ++++++++++---- net/netfilter/nf_conntrack_core.c | 18 ------------------ net/netfilter/nf_conntrack_helper.c | 7 +------ 3 files changed, 11 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 4085765c3370..cba3ccf03fcc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -160,10 +160,6 @@ static inline struct net *nf_ct_net(const struct nf_conn *ct) return read_pnet(&ct->ct_net); } -/* Alter reply tuple (maybe alter helper). */ -void nf_conntrack_alter_reply(struct nf_conn *ct, - const struct nf_conntrack_tuple *newreply); - /* Is this tuple taken? (ignoring any belonging to the given conntrack). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, @@ -284,6 +280,16 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; } +static inline void nf_conntrack_alter_reply(struct nf_conn *ct, + const struct nf_conntrack_tuple *newreply) +{ + /* Must be unconfirmed, so not in hash table yet */ + if (WARN_ON(nf_ct_is_confirmed(ct))) + return; + + ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; +} + #define nfct_time_stamp ((u32)(jiffies)) /* jiffies until ct expires, 0 if already expired */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9f6f2e643575..124136b5a79a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2042,24 +2042,6 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_in); -/* Alter reply tuple (maybe alter helper). This is for NAT, and is - implicitly racy: see __nf_conntrack_confirm */ -void nf_conntrack_alter_reply(struct nf_conn *ct, - const struct nf_conntrack_tuple *newreply) -{ - struct nf_conn_help *help = nfct_help(ct); - - /* Should be unconfirmed, so not in hash table yet */ - WARN_ON(nf_ct_is_confirmed(ct)); - - nf_ct_dump_tuple(newreply); - - ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; - if (ct->master || (help && !hlist_empty(&help->expectations))) - return; -} -EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); - /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index f22691f83853..4ed5878cb25b 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -194,12 +194,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, struct nf_conntrack_helper *helper = NULL; struct nf_conn_help *help; - /* We already got a helper explicitly attached. The function - * nf_conntrack_alter_reply - in case NAT is in use - asks for looking - * the helper up again. Since now the user is in full control of - * making consistent helper configurations, skip this automatic - * re-lookup, otherwise we'll lose the helper. - */ + /* We already got a helper explicitly attached (e.g. nft_ct) */ if (test_bit(IPS_HELPER_BIT, &ct->status)) return 0; -- cgit v1.2.3 From 6ac9c51eebe8209f58fd71f51c856184136b8613 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 6 Oct 2023 11:28:47 +0200 Subject: netfilter: conntrack: prefer tcp_error_log to pr_debug pr_debug doesn't provide any information other than that a packet did not match existing state but also was found to not create a new connection. Replaces this with tcp_error_log, which will also dump packets' content so one can see if this is a stray FIN or RST. Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_proto_tcp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4018acb1d674..e573be5afde7 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -835,7 +835,8 @@ static bool tcp_error(const struct tcphdr *th, static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, - const struct tcphdr *th) + const struct tcphdr *th, + const struct nf_hook_state *state) { enum tcp_conntrack new_state; struct net *net = nf_ct_net(ct); @@ -846,7 +847,7 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Invalid: delete conntrack */ if (new_state >= TCP_CONNTRACK_MAX) { - pr_debug("nf_ct_tcp: invalid new deleting.\n"); + tcp_error_log(skb, state, "invalid new"); return false; } @@ -980,7 +981,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, if (tcp_error(th, skb, dataoff, state)) return -NF_ACCEPT; - if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th)) + if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th, state)) return -NF_ACCEPT; spin_lock_bh(&ct->lock); -- cgit v1.2.3 From 1dab47139e6118a420acec8426a860ea4b40c379 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Oct 2023 16:10:28 +0200 Subject: appletalk: remove ipddp driver After the cops driver is removed, ipddp is now the only CONFIG_DEV_APPLETALK but as far as I can tell, this also has no users and can be removed, making appletalk support purely based on ethertalk, using ethernet hardware. Link: https://lore.kernel.org/netdev/e490dd0c-a65d-4acf-89c6-c06cb48ec880@app.fastmail.com/ Link: https://lore.kernel.org/netdev/9cac4fbd-9557-b0b8-54fa-93f0290a6fb8@schmorgal.com/ Cc: Doug Brown Signed-off-by: Arnd Bergmann Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20231009141139.1766345-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/networking/index.rst | 1 - Documentation/networking/ipddp.rst | 78 --------- MAINTAINERS | 1 - drivers/net/Makefile | 1 - drivers/net/appletalk/Kconfig | 72 -------- drivers/net/appletalk/Makefile | 6 - drivers/net/appletalk/ipddp.c | 345 ------------------------------------- drivers/net/appletalk/ipddp.h | 28 --- net/Kconfig | 2 +- net/appletalk/Kconfig | 30 ++++ 10 files changed, 31 insertions(+), 533 deletions(-) delete mode 100644 Documentation/networking/ipddp.rst delete mode 100644 drivers/net/appletalk/Kconfig delete mode 100644 drivers/net/appletalk/Makefile delete mode 100644 drivers/net/appletalk/ipddp.c delete mode 100644 drivers/net/appletalk/ipddp.h create mode 100644 net/appletalk/Kconfig (limited to 'net') diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 5b75c3f7a137..2ffc5ad10295 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -59,7 +59,6 @@ Contents: gtp ila ioam6-sysctl - ipddp ip_dynaddr ipsec ip-sysctl diff --git a/Documentation/networking/ipddp.rst b/Documentation/networking/ipddp.rst deleted file mode 100644 index be7091b77927..000000000000 --- a/Documentation/networking/ipddp.rst +++ /dev/null @@ -1,78 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -========================================================= -AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation -========================================================= - -Documentation ipddp.c - -This file is written by Jay Schulist - -Introduction ------------- - -AppleTalk-IP (IPDDP) is the method computers connected to AppleTalk -networks can use to communicate via IP. AppleTalk-IP is simply IP datagrams -inside AppleTalk packets. - -Through this driver you can either allow your Linux box to communicate -IP over an AppleTalk network or you can provide IP gatewaying functions -for your AppleTalk users. - -You can currently encapsulate or decapsulate AppleTalk-IP on LocalTalk, -EtherTalk and PPPTalk. The only limit on the protocol is that of what -kernel AppleTalk layer and drivers are available. - -Each mode requires its own user space software. - -Compiling AppleTalk-IP Decapsulation/Encapsulation -================================================== - -AppleTalk-IP decapsulation needs to be compiled into your kernel. You -will need to turn on AppleTalk-IP driver support. Then you will need to -select ONE of the two options; IP to AppleTalk-IP encapsulation support or -AppleTalk-IP to IP decapsulation support. If you compile the driver -statically you will only be able to use the driver for the function you have -enabled in the kernel. If you compile the driver as a module you can -select what mode you want it to run in via a module loading param. -ipddp_mode=1 for AppleTalk-IP encapsulation and ipddp_mode=2 for -AppleTalk-IP to IP decapsulation. - -Basic instructions for user space tools -======================================= - -I will briefly describe the operation of the tools, but you will -need to consult the supporting documentation for each set of tools. - -Decapsulation - You will need to download a software package called -MacGate. In this distribution there will be a tool called MacRoute -which enables you to add routes to the kernel for your Macs by hand. -Also the tool MacRegGateWay is included to register the -proper IP Gateway and IP addresses for your machine. Included in this -distribution is a patch to netatalk-1.4b2+asun2.0a17.2 (available from -ftp.u.washington.edu/pub/user-supported/asun/) this patch is optional -but it allows automatic adding and deleting of routes for Macs. (Handy -for locations with large Mac installations) - -Encapsulation - You will need to download a software daemon called ipddpd. -This software expects there to be an AppleTalk-IP gateway on the network. -You will also need to add the proper routes to route your Linux box's IP -traffic out the ipddp interface. - -Common Uses of ipddp.c ----------------------- -Of course AppleTalk-IP decapsulation and encapsulation, but specifically -decapsulation is being used most for connecting LocalTalk networks to -IP networks. Although it has been used on EtherTalk networks to allow -Macs that are only able to tunnel IP over EtherTalk. - -Encapsulation has been used to allow a Linux box stuck on a LocalTalk -network to use IP. It should work equally well if you are stuck on an -EtherTalk only network. - -Further Assistance -------------------- -You can contact me (Jay Schulist ) with any -questions regarding decapsulation or encapsulation. Bradford W. Johnson - originally wrote the ipddp.c driver for IP -encapsulation in AppleTalk. diff --git a/MAINTAINERS b/MAINTAINERS index 1bd96045beb8..698ebbd78075 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1459,7 +1459,6 @@ F: drivers/hwmon/applesmc.c APPLETALK NETWORK LAYER L: netdev@vger.kernel.org S: Odd fixes -F: drivers/net/appletalk/ F: include/linux/atalk.h F: include/uapi/linux/atalk.h F: net/appletalk/ diff --git a/drivers/net/Makefile b/drivers/net/Makefile index e26f98f897c5..8a83db32509d 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_MHI_NET) += mhi_net.o # Networking Drivers # obj-$(CONFIG_ARCNET) += arcnet/ -obj-$(CONFIG_DEV_APPLETALK) += appletalk/ obj-$(CONFIG_CAIF) += caif/ obj-$(CONFIG_CAN) += can/ obj-$(CONFIG_NET_DSA) += dsa/ diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig deleted file mode 100644 index b94f731e4576..000000000000 --- a/drivers/net/appletalk/Kconfig +++ /dev/null @@ -1,72 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Appletalk driver configuration -# -config ATALK - tristate "Appletalk protocol support" - select LLC - help - AppleTalk is the protocol that Apple computers can use to communicate - on a network. If your Linux box is connected to such a network and you - wish to connect to it, say Y. You will need to use the netatalk package - so that your Linux box can act as a print and file server for Macs as - well as access AppleTalk printers. Check out - on the WWW for details. - EtherTalk is the name used for AppleTalk over Ethernet and the - cheaper and slower LocalTalk is AppleTalk over a proprietary Apple - network using serial links. EtherTalk and LocalTalk are fully - supported by Linux. - - General information about how to connect Linux, Windows machines and - Macs is on the WWW at . The - NET3-4-HOWTO, available from - , contains valuable - information as well. - - To compile this driver as a module, choose M here: the module will be - called appletalk. You almost certainly want to compile it as a - module so you can restart your AppleTalk stack without rebooting - your machine. I hear that the GNU boycott of Apple is over, so - even politically correct people are allowed to say Y here. - -config DEV_APPLETALK - tristate "Appletalk interfaces support" - depends on ATALK - help - AppleTalk is the protocol that Apple computers can use to communicate - on a network. If your Linux box is connected to such a network, and wish - to do IP over it, or you have a LocalTalk card and wish to use it to - connect to the AppleTalk network, say Y. - -config IPDDP - tristate "Appletalk-IP driver support" - depends on DEV_APPLETALK && ATALK - help - This allows IP networking for users who only have AppleTalk - networking available. This feature is experimental. With this - driver, you can encapsulate IP inside AppleTalk (e.g. if your Linux - box is stuck on an AppleTalk only network) or decapsulate (e.g. if - you want your Linux box to act as an Internet gateway for a zoo of - AppleTalk connected Macs). Please see the file - for more information. - - If you say Y here, the AppleTalk-IP support will be compiled into - the kernel. In this case, you can either use encapsulation or - decapsulation, but not both. With the following two questions, you - decide which one you want. - - To compile the AppleTalk-IP support as a module, choose M here: the - module will be called ipddp. - In this case, you will be able to use both encapsulation and - decapsulation simultaneously, by loading two copies of the module - and specifying different values for the module option ipddp_mode. - -config IPDDP_ENCAP - bool "IP to Appletalk-IP Encapsulation support" - depends on IPDDP - help - If you say Y here, the AppleTalk-IP code will be able to encapsulate - IP packets inside AppleTalk frames; this is useful if your Linux box - is stuck on an AppleTalk network (which hopefully contains a - decapsulator somewhere). Please see - for more information. diff --git a/drivers/net/appletalk/Makefile b/drivers/net/appletalk/Makefile deleted file mode 100644 index d8c7b23ec7ff..000000000000 --- a/drivers/net/appletalk/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for drivers/net/appletalk -# - -obj-$(CONFIG_IPDDP) += ipddp.o diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c deleted file mode 100644 index d558535390f9..000000000000 --- a/drivers/net/appletalk/ipddp.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * ipddp.c: IP to Appletalk-IP Encapsulation driver for Linux - * Appletalk-IP to IP Decapsulation driver for Linux - * - * Authors: - * - DDP-IP Encap by: Bradford W. Johnson - * - DDP-IP Decap by: Jay Schulist - * - * Derived from: - * - Almost all code already existed in net/appletalk/ddp.c I just - * moved/reorginized it into a driver file. Original IP-over-DDP code - * was done by Bradford W. Johnson - * - skeleton.c: A network driver outline for linux. - * Written 1993-94 by Donald Becker. - * - dummy.c: A dummy net driver. By Nick Holloway. - * - MacGate: A user space Daemon for Appletalk-IP Decap for - * Linux by Jay Schulist - * - * Copyright 1993 United States Government as represented by the - * Director, National Security Agency. - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ipddp.h" /* Our stuff */ - -static const char version[] = KERN_INFO "ipddp.c:v0.01 8/28/97 Bradford W. Johnson \n"; - -static struct ipddp_route *ipddp_route_list; -static DEFINE_SPINLOCK(ipddp_route_lock); - -#ifdef CONFIG_IPDDP_ENCAP -static int ipddp_mode = IPDDP_ENCAP; -#else -static int ipddp_mode = IPDDP_DECAP; -#endif - -/* Index to functions, as function prototypes. */ -static netdev_tx_t ipddp_xmit(struct sk_buff *skb, - struct net_device *dev); -static int ipddp_create(struct ipddp_route *new_rt); -static int ipddp_delete(struct ipddp_route *rt); -static struct ipddp_route* __ipddp_find_route(struct ipddp_route *rt); -static int ipddp_siocdevprivate(struct net_device *dev, struct ifreq *ifr, - void __user *data, int cmd); - -static const struct net_device_ops ipddp_netdev_ops = { - .ndo_start_xmit = ipddp_xmit, - .ndo_siocdevprivate = ipddp_siocdevprivate, - .ndo_set_mac_address = eth_mac_addr, - .ndo_validate_addr = eth_validate_addr, -}; - -static struct net_device * __init ipddp_init(void) -{ - static unsigned version_printed; - struct net_device *dev; - int err; - - dev = alloc_etherdev(0); - if (!dev) - return ERR_PTR(-ENOMEM); - - netif_keep_dst(dev); - strcpy(dev->name, "ipddp%d"); - - if (version_printed++ == 0) - printk(version); - - /* Initialize the device structure. */ - dev->netdev_ops = &ipddp_netdev_ops; - - dev->type = ARPHRD_IPDDP; /* IP over DDP tunnel */ - dev->mtu = 585; - dev->flags |= IFF_NOARP; - - /* - * The worst case header we will need is currently a - * ethernet header (14 bytes) and a ddp header (sizeof ddpehdr+1) - * We send over SNAP so that takes another 8 bytes. - */ - dev->hard_header_len = 14+8+sizeof(struct ddpehdr)+1; - - err = register_netdev(dev); - if (err) { - free_netdev(dev); - return ERR_PTR(err); - } - - /* Let the user now what mode we are in */ - if(ipddp_mode == IPDDP_ENCAP) - printk("%s: Appletalk-IP Encap. mode by Bradford W. Johnson \n", - dev->name); - if(ipddp_mode == IPDDP_DECAP) - printk("%s: Appletalk-IP Decap. mode by Jay Schulist \n", - dev->name); - - return dev; -} - - -/* - * Transmit LLAP/ELAP frame using aarp_send_ddp. - */ -static netdev_tx_t ipddp_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct rtable *rtable = skb_rtable(skb); - __be32 paddr = 0; - struct ddpehdr *ddp; - struct ipddp_route *rt; - struct atalk_addr *our_addr; - - if (rtable->rt_gw_family == AF_INET) - paddr = rtable->rt_gw4; - - spin_lock(&ipddp_route_lock); - - /* - * Find appropriate route to use, based only on IP number. - */ - for(rt = ipddp_route_list; rt != NULL; rt = rt->next) - { - if(rt->ip == paddr) - break; - } - if(rt == NULL) { - spin_unlock(&ipddp_route_lock); - return NETDEV_TX_OK; - } - - our_addr = atalk_find_dev_addr(rt->dev); - - if(ipddp_mode == IPDDP_DECAP) - /* - * Pull off the excess room that should not be there. - * This is due to a hard-header problem. This is the - * quick fix for now though, till it breaks. - */ - skb_pull(skb, 35-(sizeof(struct ddpehdr)+1)); - - /* Create the Extended DDP header */ - ddp = (struct ddpehdr *)skb->data; - ddp->deh_len_hops = htons(skb->len + (1<<10)); - ddp->deh_sum = 0; - - /* - * For Localtalk we need aarp_send_ddp to strip the - * long DDP header and place a shot DDP header on it. - */ - if(rt->dev->type == ARPHRD_LOCALTLK) - { - ddp->deh_dnet = 0; /* FIXME more hops?? */ - ddp->deh_snet = 0; - } - else - { - ddp->deh_dnet = rt->at.s_net; /* FIXME more hops?? */ - ddp->deh_snet = our_addr->s_net; - } - ddp->deh_dnode = rt->at.s_node; - ddp->deh_snode = our_addr->s_node; - ddp->deh_dport = 72; - ddp->deh_sport = 72; - - *((__u8 *)(ddp+1)) = 22; /* ddp type = IP */ - - skb->protocol = htons(ETH_P_ATALK); /* Protocol has changed */ - - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - - aarp_send_ddp(rt->dev, skb, &rt->at, NULL); - - spin_unlock(&ipddp_route_lock); - - return NETDEV_TX_OK; -} - -/* - * Create a routing entry. We first verify that the - * record does not already exist. If it does we return -EEXIST - */ -static int ipddp_create(struct ipddp_route *new_rt) -{ - struct ipddp_route *rt = kzalloc(sizeof(*rt), GFP_KERNEL); - - if (rt == NULL) - return -ENOMEM; - - rt->ip = new_rt->ip; - rt->at = new_rt->at; - rt->next = NULL; - if ((rt->dev = atrtr_get_dev(&rt->at)) == NULL) { - kfree(rt); - return -ENETUNREACH; - } - - spin_lock_bh(&ipddp_route_lock); - if (__ipddp_find_route(rt)) { - spin_unlock_bh(&ipddp_route_lock); - kfree(rt); - return -EEXIST; - } - - rt->next = ipddp_route_list; - ipddp_route_list = rt; - - spin_unlock_bh(&ipddp_route_lock); - - return 0; -} - -/* - * Delete a route, we only delete a FULL match. - * If route does not exist we return -ENOENT. - */ -static int ipddp_delete(struct ipddp_route *rt) -{ - struct ipddp_route **r = &ipddp_route_list; - struct ipddp_route *tmp; - - spin_lock_bh(&ipddp_route_lock); - while((tmp = *r) != NULL) - { - if(tmp->ip == rt->ip && - tmp->at.s_net == rt->at.s_net && - tmp->at.s_node == rt->at.s_node) - { - *r = tmp->next; - spin_unlock_bh(&ipddp_route_lock); - kfree(tmp); - return 0; - } - r = &tmp->next; - } - - spin_unlock_bh(&ipddp_route_lock); - return -ENOENT; -} - -/* - * Find a routing entry, we only return a FULL match - */ -static struct ipddp_route* __ipddp_find_route(struct ipddp_route *rt) -{ - struct ipddp_route *f; - - for(f = ipddp_route_list; f != NULL; f = f->next) - { - if(f->ip == rt->ip && - f->at.s_net == rt->at.s_net && - f->at.s_node == rt->at.s_node) - return f; - } - - return NULL; -} - -static int ipddp_siocdevprivate(struct net_device *dev, struct ifreq *ifr, - void __user *data, int cmd) -{ - struct ipddp_route rcp, rcp2, *rp; - - if (in_compat_syscall()) - return -EOPNOTSUPP; - - if(!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (copy_from_user(&rcp, data, sizeof(rcp))) - return -EFAULT; - - switch(cmd) - { - case SIOCADDIPDDPRT: - return ipddp_create(&rcp); - - case SIOCFINDIPDDPRT: - spin_lock_bh(&ipddp_route_lock); - rp = __ipddp_find_route(&rcp); - if (rp) { - memset(&rcp2, 0, sizeof(rcp2)); - rcp2.ip = rp->ip; - rcp2.at = rp->at; - rcp2.flags = rp->flags; - } - spin_unlock_bh(&ipddp_route_lock); - - if (rp) { - if (copy_to_user(data, &rcp2, - sizeof(struct ipddp_route))) - return -EFAULT; - return 0; - } else - return -ENOENT; - - case SIOCDELIPDDPRT: - return ipddp_delete(&rcp); - - default: - return -EINVAL; - } -} - -static struct net_device *dev_ipddp; - -MODULE_LICENSE("GPL"); -module_param(ipddp_mode, int, 0); - -static int __init ipddp_init_module(void) -{ - dev_ipddp = ipddp_init(); - return PTR_ERR_OR_ZERO(dev_ipddp); -} - -static void __exit ipddp_cleanup_module(void) -{ - struct ipddp_route *p; - - unregister_netdev(dev_ipddp); - free_netdev(dev_ipddp); - - while (ipddp_route_list) { - p = ipddp_route_list->next; - kfree(ipddp_route_list); - ipddp_route_list = p; - } -} - -module_init(ipddp_init_module); -module_exit(ipddp_cleanup_module); diff --git a/drivers/net/appletalk/ipddp.h b/drivers/net/appletalk/ipddp.h deleted file mode 100644 index 9a8e45a46925..000000000000 --- a/drivers/net/appletalk/ipddp.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ipddp.h: Header for IP-over-DDP driver for Linux. - */ - -#ifndef __LINUX_IPDDP_H -#define __LINUX_IPDDP_H - -#ifdef __KERNEL__ - -#define SIOCADDIPDDPRT (SIOCDEVPRIVATE) -#define SIOCDELIPDDPRT (SIOCDEVPRIVATE+1) -#define SIOCFINDIPDDPRT (SIOCDEVPRIVATE+2) - -struct ipddp_route -{ - struct net_device *dev; /* Carrier device */ - __be32 ip; /* IP address */ - struct atalk_addr at; /* Gateway appletalk address */ - int flags; - struct ipddp_route *next; -}; - -#define IPDDP_ENCAP 1 -#define IPDDP_DECAP 2 - -#endif /* __KERNEL__ */ -#endif /* __LINUX_IPDDP_H */ diff --git a/net/Kconfig b/net/Kconfig index d532ec33f1fe..e248236c29a7 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -246,7 +246,7 @@ source "net/bridge/Kconfig" source "net/dsa/Kconfig" source "net/8021q/Kconfig" source "net/llc/Kconfig" -source "drivers/net/appletalk/Kconfig" +source "net/appletalk/Kconfig" source "net/x25/Kconfig" source "net/lapb/Kconfig" source "net/phonet/Kconfig" diff --git a/net/appletalk/Kconfig b/net/appletalk/Kconfig new file mode 100644 index 000000000000..041141abf925 --- /dev/null +++ b/net/appletalk/Kconfig @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Appletalk configuration +# +config ATALK + tristate "Appletalk protocol support" + select LLC + help + AppleTalk is the protocol that Apple computers can use to communicate + on a network. If your Linux box is connected to such a network and you + wish to connect to it, say Y. You will need to use the netatalk package + so that your Linux box can act as a print and file server for Macs as + well as access AppleTalk printers. Check out + on the WWW for details. + EtherTalk is the name used for AppleTalk over Ethernet and the + cheaper and slower LocalTalk is AppleTalk over a proprietary Apple + network using serial links. EtherTalk and LocalTalk are fully + supported by Linux. + + General information about how to connect Linux, Windows machines and + Macs is on the WWW at . The + NET3-4-HOWTO, available from + , contains valuable + information as well. + + To compile this driver as a module, choose M here: the module will be + called appletalk. You almost certainly want to compile it as a + module so you can restart your AppleTalk stack without rebooting + your machine. I hear that the GNU boycott of Apple is over, so + even politically correct people are allowed to say Y here. -- cgit v1.2.3 From 63b9f7a19ff154778cef85cf9a28f31c4a77e847 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 9 Oct 2023 11:39:55 +0100 Subject: net: dsa: remove dsa_port_phylink_validate() As all drivers now provide phylink capabilities (including MAC), the if() condition in dsa_port_phylink_validate() will always be true. We will always use the generic validator, which phylink will call itself if the .validate method isn't populated. Thus, there is now no need to implement the .validate method, so this implementation can be removed. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/port.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'net') diff --git a/net/dsa/port.c b/net/dsa/port.c index 5f01bd4f9dec..6e0d000a97c4 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1554,20 +1554,6 @@ static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp) return phydev; } -static void dsa_port_phylink_validate(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state) -{ - /* Skip call for drivers which don't yet set mac_capabilities, - * since validating in that case would mean their PHY will advertise - * nothing. In turn, skipping validation makes them advertise - * everything that the PHY supports, so those drivers should be - * converted ASAP. - */ - if (config->mac_capabilities) - phylink_generic_validate(config, supported, state); -} - static struct phylink_pcs * dsa_port_phylink_mac_select_pcs(struct phylink_config *config, phy_interface_t interface) @@ -1666,7 +1652,6 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config, } static const struct phylink_mac_ops dsa_port_phylink_mac_ops = { - .validate = dsa_port_phylink_validate, .mac_select_pcs = dsa_port_phylink_mac_select_pcs, .mac_prepare = dsa_port_phylink_mac_prepare, .mac_config = dsa_port_phylink_mac_config, -- cgit v1.2.3 From 5247dbf16cee4e83eb89e4d3b87bd5e79c5d1655 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Mon, 9 Oct 2023 19:16:33 +0800 Subject: net/core: Introduce netdev_core_stats_inc() Although there is a kfree_skb_reason() helper function that can be used to find the reason why this skb is dropped, but most callers didn't increase one of rx_dropped, tx_dropped, rx_nohandler and rx_otherhost_dropped. For the users, people are more concerned about why the dropped in ip is increasing. Introduce netdev_core_stats_inc() for trace the caller of dev_core_stats_*_inc(). Also, add __code to netdev_core_stats_alloc(), as it's called with small probability. And add noinline make sure netdev_core_stats_inc was never inlined. Signed-off-by: Yajun Deng Suggested-by: Alexander Lobakin Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 21 ++++----------------- net/core/dev.c | 21 +++++++++++++++++++-- 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e070a4540fba..11d704bfec9b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4002,32 +4002,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev, return false; } -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev); - -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev) -{ - /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ - struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); - - if (likely(p)) - return p; - - return netdev_core_stats_alloc(dev); -} +void netdev_core_stats_inc(struct net_device *dev, u32 offset); #define DEV_CORE_STATS_INC(FIELD) \ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ { \ - struct net_device_core_stats __percpu *p; \ - \ - p = dev_core_stats(dev); \ - if (p) \ - this_cpu_inc(p->FIELD); \ + netdev_core_stats_inc(dev, \ + offsetof(struct net_device_core_stats, FIELD)); \ } DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) DEV_CORE_STATS_INC(rx_otherhost_dropped) +#undef DEV_CORE_STATS_INC static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, diff --git a/net/core/dev.c b/net/core/dev.c index 606a366cc209..02949a929e7f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10497,7 +10497,8 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, } EXPORT_SYMBOL(netdev_stats_to_stats64); -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev) +static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc( + struct net_device *dev) { struct net_device_core_stats __percpu *p; @@ -10510,7 +10511,23 @@ struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device /* This READ_ONCE() pairs with the cmpxchg() above */ return READ_ONCE(dev->core_stats); } -EXPORT_SYMBOL(netdev_core_stats_alloc); + +noinline void netdev_core_stats_inc(struct net_device *dev, u32 offset) +{ + /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ + struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); + unsigned long __percpu *field; + + if (unlikely(!p)) { + p = netdev_core_stats_alloc(dev); + if (!p) + return; + } + + field = (__force unsigned long __percpu *)((__force void *)p + offset); + this_cpu_inc(*field); +} +EXPORT_SYMBOL_GPL(netdev_core_stats_inc); /** * dev_get_stats - get network device statistics -- cgit v1.2.3 From b3098d32ed6e6f4c03a95f14426143f1b0af620f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 9 Oct 2023 10:41:51 -0400 Subject: net: add skb_segment kunit test Add unit testing for skb segment. This function is exercised by many different code paths, such as GSO_PARTIAL or GSO_BY_FRAGS, linear (with or without head_frag), frags or frag_list skbs, etc. It is infeasible to manually run tests that cover all code paths when making changes. The long and complex function also makes it hard to establish through analysis alone that a patch has no unintended side-effects. Add code coverage through kunit regression testing. Introduce kunit infrastructure for tests under net/core, and add this first test. This first skb_segment test exercises a simple case: a linear skb. Follow-on patches will parametrize the test and add more variants. Tested: Built and ran the test with make ARCH=um mrproper ./tools/testing/kunit/kunit.py run \ --kconfig_add CONFIG_NET=y \ --kconfig_add CONFIG_DEBUG_KERNEL=y \ --kconfig_add CONFIG_DEBUG_INFO=y \ --kconfig_add=CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y \ net_core_gso Signed-off-by: Willem de Bruijn Reviewed-by: Florian Westphal Signed-off-by: David S. Miller --- net/Kconfig | 9 ++++++ net/core/Makefile | 1 + net/core/gso_test.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+) create mode 100644 net/core/gso_test.c (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index e248236c29a7..3ec6bc98fa05 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -508,4 +508,13 @@ config NETDEV_ADDR_LIST_TEST default KUNIT_ALL_TESTS depends on KUNIT +config NET_TEST + tristate "KUnit tests for networking" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + KUnit tests covering core networking infra, such as sk_buff. + + If unsure, say N. + endif # if NET diff --git a/net/core/Makefile b/net/core/Makefile index 731db2eaa610..0cb734cbc24b 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -40,3 +40,4 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o +obj-$(CONFIG_NET_TEST) += gso_test.o diff --git a/net/core/gso_test.c b/net/core/gso_test.c new file mode 100644 index 000000000000..454874c11b90 --- /dev/null +++ b/net/core/gso_test.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +static const char hdr[] = "abcdefgh"; +static const int gso_size = 1000, last_seg_size = 1; + +/* default: create 3 segment gso packet */ +static int payload_len = (2 * gso_size) + last_seg_size; + +static void __init_skb(struct sk_buff *skb) +{ + skb_reset_mac_header(skb); + memcpy(skb_mac_header(skb), hdr, sizeof(hdr)); + + /* skb_segment expects skb->data at start of payload */ + skb_pull(skb, sizeof(hdr)); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + + /* proto is arbitrary, as long as not ETH_P_TEB or vlan */ + skb->protocol = htons(ETH_P_ATALK); + skb_shinfo(skb)->gso_size = gso_size; +} + +static void gso_test_func(struct kunit *test) +{ + const int shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + struct sk_buff *skb, *segs, *cur; + struct page *page; + + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + skb = build_skb(page_address(page), sizeof(hdr) + payload_len + shinfo_size); + KUNIT_ASSERT_NOT_NULL(test, skb); + __skb_put(skb, sizeof(hdr) + payload_len); + + __init_skb(skb); + + segs = skb_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); + if (IS_ERR(segs)) { + KUNIT_FAIL(test, "segs error %lld", PTR_ERR(segs)); + goto free_gso_skb; + } else if (!segs) { + KUNIT_FAIL(test, "no segments"); + goto free_gso_skb; + } + + for (cur = segs; cur; cur = cur->next) { + /* segs have skb->data pointing to the mac header */ + KUNIT_ASSERT_PTR_EQ(test, skb_mac_header(cur), cur->data); + KUNIT_ASSERT_PTR_EQ(test, skb_network_header(cur), cur->data + sizeof(hdr)); + + /* header was copied to all segs */ + KUNIT_ASSERT_EQ(test, memcmp(skb_mac_header(cur), hdr, sizeof(hdr)), 0); + + /* all segs are gso_size, except for last */ + if (cur->next) { + KUNIT_ASSERT_EQ(test, cur->len, sizeof(hdr) + gso_size); + } else { + KUNIT_ASSERT_EQ(test, cur->len, sizeof(hdr) + last_seg_size); + + /* last seg can be found through segs->prev pointer */ + KUNIT_ASSERT_PTR_EQ(test, cur, segs->prev); + } + } + + consume_skb(segs); +free_gso_skb: + consume_skb(skb); +} + +static struct kunit_case gso_test_cases[] = { + KUNIT_CASE(gso_test_func), + {} +}; + +static struct kunit_suite gso_test_suite = { + .name = "net_core_gso", + .test_cases = gso_test_cases, +}; + +kunit_test_suite(gso_test_suite); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for segmentation offload"); -- cgit v1.2.3 From 1b4fa28a8b07eb331aeb7fbfc806c0d2e3dc3627 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 9 Oct 2023 10:41:52 -0400 Subject: net: parametrize skb_segment unit test to expand coverage Expand the test with variants - GSO_TEST_NO_GSO: payload size less than or equal to gso_size - GSO_TEST_FRAGS: payload in both linear and page frags - GSO_TEST_FRAGS_PURE: payload exclusively in page frags - GSO_TEST_GSO_PARTIAL: produce one gso segment of multiple of gso_size, plus optionally one non-gso trailer segment Define a test struct that encodes the input gso skb and output segs. Input in terms of linear and fragment lengths. Output as length of each segment. Signed-off-by: Willem de Bruijn Reviewed-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/gso_test.c | 129 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 112 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/core/gso_test.c b/net/core/gso_test.c index 454874c11b90..c4e0b0832dba 100644 --- a/net/core/gso_test.c +++ b/net/core/gso_test.c @@ -4,10 +4,7 @@ #include static const char hdr[] = "abcdefgh"; -static const int gso_size = 1000, last_seg_size = 1; - -/* default: create 3 segment gso packet */ -static int payload_len = (2 * gso_size) + last_seg_size; +static const int gso_size = 1000; static void __init_skb(struct sk_buff *skb) { @@ -24,21 +21,121 @@ static void __init_skb(struct sk_buff *skb) skb_shinfo(skb)->gso_size = gso_size; } +enum gso_test_nr { + GSO_TEST_LINEAR, + GSO_TEST_NO_GSO, + GSO_TEST_FRAGS, + GSO_TEST_FRAGS_PURE, + GSO_TEST_GSO_PARTIAL, +}; + +struct gso_test_case { + enum gso_test_nr id; + const char *name; + + /* input */ + unsigned int linear_len; + unsigned int nr_frags; + const unsigned int *frags; + + /* output as expected */ + unsigned int nr_segs; + const unsigned int *segs; +}; + +static struct gso_test_case cases[] = { + { + .id = GSO_TEST_NO_GSO, + .name = "no_gso", + .linear_len = gso_size, + .nr_segs = 1, + .segs = (const unsigned int[]) { gso_size }, + }, + { + .id = GSO_TEST_LINEAR, + .name = "linear", + .linear_len = gso_size + gso_size + 1, + .nr_segs = 3, + .segs = (const unsigned int[]) { gso_size, gso_size, 1 }, + }, + { + .id = GSO_TEST_FRAGS, + .name = "frags", + .linear_len = gso_size, + .nr_frags = 2, + .frags = (const unsigned int[]) { gso_size, 1 }, + .nr_segs = 3, + .segs = (const unsigned int[]) { gso_size, gso_size, 1 }, + }, + { + .id = GSO_TEST_FRAGS_PURE, + .name = "frags_pure", + .nr_frags = 3, + .frags = (const unsigned int[]) { gso_size, gso_size, 2 }, + .nr_segs = 3, + .segs = (const unsigned int[]) { gso_size, gso_size, 2 }, + }, + { + .id = GSO_TEST_GSO_PARTIAL, + .name = "gso_partial", + .linear_len = gso_size, + .nr_frags = 2, + .frags = (const unsigned int[]) { gso_size, 3 }, + .nr_segs = 2, + .segs = (const unsigned int[]) { 2 * gso_size, 3 }, + }, +}; + +static void gso_test_case_to_desc(struct gso_test_case *t, char *desc) +{ + sprintf(desc, "%s", t->name); +} + +KUNIT_ARRAY_PARAM(gso_test, cases, gso_test_case_to_desc); + static void gso_test_func(struct kunit *test) { const int shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + const struct gso_test_case *tcase; struct sk_buff *skb, *segs, *cur; + netdev_features_t features; struct page *page; + int i; + + tcase = test->param_value; page = alloc_page(GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, page); - skb = build_skb(page_address(page), sizeof(hdr) + payload_len + shinfo_size); + skb = build_skb(page_address(page), sizeof(hdr) + tcase->linear_len + shinfo_size); KUNIT_ASSERT_NOT_NULL(test, skb); - __skb_put(skb, sizeof(hdr) + payload_len); + __skb_put(skb, sizeof(hdr) + tcase->linear_len); __init_skb(skb); - segs = skb_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); + if (tcase->nr_frags) { + unsigned int pg_off = 0; + + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + page_ref_add(page, tcase->nr_frags - 1); + + for (i = 0; i < tcase->nr_frags; i++) { + skb_fill_page_desc(skb, i, page, pg_off, tcase->frags[i]); + pg_off += tcase->frags[i]; + } + + KUNIT_ASSERT_LE(test, pg_off, PAGE_SIZE); + + skb->data_len = pg_off; + skb->len += skb->data_len; + skb->truesize += skb->data_len; + } + + features = NETIF_F_SG | NETIF_F_HW_CSUM; + if (tcase->id == GSO_TEST_GSO_PARTIAL) + features |= NETIF_F_GSO_PARTIAL; + + segs = skb_segment(skb, features); if (IS_ERR(segs)) { KUNIT_FAIL(test, "segs error %lld", PTR_ERR(segs)); goto free_gso_skb; @@ -47,7 +144,9 @@ static void gso_test_func(struct kunit *test) goto free_gso_skb; } - for (cur = segs; cur; cur = cur->next) { + for (cur = segs, i = 0; cur; cur = cur->next, i++) { + KUNIT_ASSERT_EQ(test, cur->len, sizeof(hdr) + tcase->segs[i]); + /* segs have skb->data pointing to the mac header */ KUNIT_ASSERT_PTR_EQ(test, skb_mac_header(cur), cur->data); KUNIT_ASSERT_PTR_EQ(test, skb_network_header(cur), cur->data + sizeof(hdr)); @@ -55,24 +154,20 @@ static void gso_test_func(struct kunit *test) /* header was copied to all segs */ KUNIT_ASSERT_EQ(test, memcmp(skb_mac_header(cur), hdr, sizeof(hdr)), 0); - /* all segs are gso_size, except for last */ - if (cur->next) { - KUNIT_ASSERT_EQ(test, cur->len, sizeof(hdr) + gso_size); - } else { - KUNIT_ASSERT_EQ(test, cur->len, sizeof(hdr) + last_seg_size); - - /* last seg can be found through segs->prev pointer */ + /* last seg can be found through segs->prev pointer */ + if (!cur->next) KUNIT_ASSERT_PTR_EQ(test, cur, segs->prev); - } } + KUNIT_ASSERT_EQ(test, i, tcase->nr_segs); + consume_skb(segs); free_gso_skb: consume_skb(skb); } static struct kunit_case gso_test_cases[] = { - KUNIT_CASE(gso_test_func), + KUNIT_CASE_PARAM(gso_test_func, gso_test_gen_params), {} }; -- cgit v1.2.3 From 4688ecb1385f95d3a687286304710723260ad125 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 9 Oct 2023 10:41:53 -0400 Subject: net: expand skb_segment unit test with frag_list coverage Expand the test with these variants that use skb frag_list: - GSO_TEST_FRAG_LIST: frag_skb length is gso_size - GSO_TEST_FRAG_LIST_PURE: same, data exclusively in frag skbs - GSO_TEST_FRAG_LIST_NON_UNIFORM: frag_skb length may vary - GSO_TEST_GSO_BY_FRAGS: frag_skb length defines gso_size, i.e., segs may have varying sizes. Signed-off-by: Willem de Bruijn Reviewed-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/gso_test.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'net') diff --git a/net/core/gso_test.c b/net/core/gso_test.c index c4e0b0832dba..c1a6cffb6f96 100644 --- a/net/core/gso_test.c +++ b/net/core/gso_test.c @@ -27,6 +27,10 @@ enum gso_test_nr { GSO_TEST_FRAGS, GSO_TEST_FRAGS_PURE, GSO_TEST_GSO_PARTIAL, + GSO_TEST_FRAG_LIST, + GSO_TEST_FRAG_LIST_PURE, + GSO_TEST_FRAG_LIST_NON_UNIFORM, + GSO_TEST_GSO_BY_FRAGS, }; struct gso_test_case { @@ -37,6 +41,8 @@ struct gso_test_case { unsigned int linear_len; unsigned int nr_frags; const unsigned int *frags; + unsigned int nr_frag_skbs; + const unsigned int *frag_skbs; /* output as expected */ unsigned int nr_segs; @@ -84,6 +90,48 @@ static struct gso_test_case cases[] = { .nr_segs = 2, .segs = (const unsigned int[]) { 2 * gso_size, 3 }, }, + { + /* commit 89319d3801d1: frag_list on mss boundaries */ + .id = GSO_TEST_FRAG_LIST, + .name = "frag_list", + .linear_len = gso_size, + .nr_frag_skbs = 2, + .frag_skbs = (const unsigned int[]) { gso_size, gso_size }, + .nr_segs = 3, + .segs = (const unsigned int[]) { gso_size, gso_size, gso_size }, + }, + { + .id = GSO_TEST_FRAG_LIST_PURE, + .name = "frag_list_pure", + .nr_frag_skbs = 2, + .frag_skbs = (const unsigned int[]) { gso_size, gso_size }, + .nr_segs = 2, + .segs = (const unsigned int[]) { gso_size, gso_size }, + }, + { + /* commit 43170c4e0ba7: GRO of frag_list trains */ + .id = GSO_TEST_FRAG_LIST_NON_UNIFORM, + .name = "frag_list_non_uniform", + .linear_len = gso_size, + .nr_frag_skbs = 4, + .frag_skbs = (const unsigned int[]) { gso_size, 1, gso_size, 2 }, + .nr_segs = 4, + .segs = (const unsigned int[]) { gso_size, gso_size, gso_size, 3 }, + }, + { + /* commit 3953c46c3ac7 ("sk_buff: allow segmenting based on frag sizes") and + * commit 90017accff61 ("sctp: Add GSO support") + * + * "there will be a cover skb with protocol headers and + * children ones containing the actual segments" + */ + .id = GSO_TEST_GSO_BY_FRAGS, + .name = "gso_by_frags", + .nr_frag_skbs = 4, + .frag_skbs = (const unsigned int[]) { 100, 200, 300, 400 }, + .nr_segs = 4, + .segs = (const unsigned int[]) { 100, 200, 300, 400 }, + }, }; static void gso_test_case_to_desc(struct gso_test_case *t, char *desc) @@ -131,10 +179,54 @@ static void gso_test_func(struct kunit *test) skb->truesize += skb->data_len; } + if (tcase->frag_skbs) { + unsigned int total_size = 0, total_true_size = 0, alloc_size = 0; + struct sk_buff *frag_skb, *prev = NULL; + + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + page_ref_add(page, tcase->nr_frag_skbs - 1); + + for (i = 0; i < tcase->nr_frag_skbs; i++) { + unsigned int frag_size; + + frag_size = tcase->frag_skbs[i]; + frag_skb = build_skb(page_address(page) + alloc_size, + frag_size + shinfo_size); + KUNIT_ASSERT_NOT_NULL(test, frag_skb); + __skb_put(frag_skb, frag_size); + + if (prev) + prev->next = frag_skb; + else + skb_shinfo(skb)->frag_list = frag_skb; + prev = frag_skb; + + total_size += frag_size; + total_true_size += frag_skb->truesize; + alloc_size += frag_size + shinfo_size; + } + + KUNIT_ASSERT_LE(test, alloc_size, PAGE_SIZE); + + skb->len += total_size; + skb->data_len += total_size; + skb->truesize += total_true_size; + + if (tcase->id == GSO_TEST_GSO_BY_FRAGS) + skb_shinfo(skb)->gso_size = GSO_BY_FRAGS; + } + features = NETIF_F_SG | NETIF_F_HW_CSUM; if (tcase->id == GSO_TEST_GSO_PARTIAL) features |= NETIF_F_GSO_PARTIAL; + /* TODO: this should also work with SG, + * rather than hit BUG_ON(i >= nfrags) + */ + if (tcase->id == GSO_TEST_FRAG_LIST_NON_UNIFORM) + features &= ~NETIF_F_SG; + segs = skb_segment(skb, features); if (IS_ERR(segs)) { KUNIT_FAIL(test, "segs error %lld", PTR_ERR(segs)); -- cgit v1.2.3 From fefba7d1ae198dcbf8b3b432de46a4e29f8dbd8c Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Wed, 11 Oct 2023 20:51:04 +0200 Subject: bpf: Propagate modified uaddrlen from cgroup sockaddr programs As prep for adding unix socket support to the cgroup sockaddr hooks, let's propagate the sockaddr length back to the caller after running a bpf cgroup sockaddr hook program. While not important for AF_INET or AF_INET6, the sockaddr length is important when working with AF_UNIX sockaddrs as the size of the sockaddr cannot be determined just from the address family or the sockaddr's contents. __cgroup_bpf_run_filter_sock_addr() is modified to take the uaddrlen as an input/output argument. After running the program, the modified sockaddr length is stored in the uaddrlen pointer. Signed-off-by: Daan De Meyer Link: https://lore.kernel.org/r/20231011185113.140426-3-daan.j.demeyer@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf-cgroup.h | 73 +++++++++++++++++++++++----------------------- include/linux/filter.h | 1 + kernel/bpf/cgroup.c | 17 +++++++++-- net/ipv4/af_inet.c | 7 +++-- net/ipv4/ping.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 9 ++++-- net/ipv6/af_inet6.c | 9 +++--- net/ipv6/ping.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 6 ++-- 11 files changed, 76 insertions(+), 54 deletions(-) (limited to 'net') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8506690dbb9c..31561e789715 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -120,6 +120,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags); @@ -230,22 +231,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, NULL); \ __ret; \ }) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ @@ -256,14 +257,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). */ -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \ +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \ ({ \ u32 __flags = 0; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, &__flags); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ @@ -276,29 +277,29 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \ (sk)->sk_prot->pre_connect) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by @@ -477,24 +478,24 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, } #define cgroup_bpf_enabled(atype) (0) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) diff --git a/include/linux/filter.h b/include/linux/filter.h index ff7ecc89d3dd..bcd2bc15ff56 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1335,6 +1335,7 @@ struct bpf_sock_addr_kern { */ u64 tmp_reg; void *t_ctx; /* Attach type specific context. */ + u32 uaddrlen; }; struct bpf_sock_ops_kern { diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 03b3d4492980..ac37bd53aee0 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1450,6 +1450,9 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * provided by user sockaddr * @sk: sock struct that will use sockaddr * @uaddr: sockaddr struct provided by user + * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is + * read-only for AF_INET[6] uaddr but can be modified for AF_UNIX + * uaddr. * @atype: The type of program to be executed * @t_ctx: Pointer to attach type specific context * @flags: Pointer to u32 which contains higher bits of BPF program @@ -1462,6 +1465,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); */ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags) @@ -1473,6 +1477,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, }; struct sockaddr_storage unspec; struct cgroup *cgrp; + int ret; /* Check socket family since not all sockets represent network * endpoint (e.g. AF_UNIX). @@ -1483,11 +1488,19 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, if (!ctx.uaddr) { memset(&unspec, 0, sizeof(unspec)); ctx.uaddr = (struct sockaddr *)&unspec; + ctx.uaddrlen = 0; + } else { + ctx.uaddrlen = *uaddrlen; } cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, - 0, flags); + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, + 0, flags); + + if (!ret && uaddr) + *uaddrlen = ctx.uaddrlen; + + return ret; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3d2e30e20473..7e27ad37b939 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -452,7 +452,7 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, + err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET4_BIND, &flags); if (err) return err; @@ -788,6 +788,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr); + int sin_addr_len = sizeof(*sin); sin->sin_family = AF_INET; lock_sock(sk); @@ -800,7 +801,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin_port = inet->inet_dport; sin->sin_addr.s_addr = inet->inet_daddr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETPEERNAME); } else { __be32 addr = inet->inet_rcv_saddr; @@ -808,7 +809,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, addr = inet->inet_saddr; sin->sin_port = inet->inet_sport; sin->sin_addr.s_addr = addr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETSOCKNAME); } release_sock(sk); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 4dd809b7b188..2887177822c9 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -301,7 +301,7 @@ static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f13eb7e23d03..7c18dd3ce011 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -194,7 +194,7 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, sock_owned_by_me(sk); - return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, &addr_len); } /* This will initiate an outgoing connection. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c3ff984b6354..7b21a51dd25a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1143,7 +1143,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, - (struct sockaddr *)usin, &ipc.addr); + (struct sockaddr *)usin, + &msg->msg_namelen, + &ipc.addr); if (err) goto out_free; if (usin) { @@ -1865,7 +1867,8 @@ try_again: *addr_len = sizeof(*sin); BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, - (struct sockaddr *)sin); + (struct sockaddr *)sin, + addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) @@ -1904,7 +1907,7 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } EXPORT_SYMBOL(udp_pre_connect); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 6337fb4504fd..c35d302a3da9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -454,7 +454,7 @@ int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, + err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET6_BIND, &flags); if (err) return err; @@ -520,6 +520,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; + int sin_addr_len = sizeof(*sin); struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -539,7 +540,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_addr = sk->sk_v6_daddr; if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = np->flow_label; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETPEERNAME); } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) @@ -547,13 +548,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, else sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETSOCKNAME); } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); release_sock(sk); - return sizeof(*sin); + return sin_addr_len; } EXPORT_SYMBOL(inet6_getname); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index e8fb0d275cc2..d2098dd4ceae 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -56,7 +56,7 @@ static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 94afb8d0f2d0..3a1e76a2d33e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -135,7 +135,7 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, sock_owned_by_me(sk); - return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5e9312eefed0..622b10a549f7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -410,7 +410,8 @@ try_again: *addr_len = sizeof(*sin6); BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, - (struct sockaddr *)sin6); + (struct sockaddr *)sin6, + addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) @@ -1157,7 +1158,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } /** @@ -1510,6 +1511,7 @@ do_udp_sendmsg: if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, (struct sockaddr *)sin6, + &addr_len, &fl6->saddr); if (err) goto out_no_dst; -- cgit v1.2.3 From 53e380d21441909b12b6e0782b77187ae4b971c4 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Wed, 11 Oct 2023 20:51:05 +0200 Subject: bpf: Add bpf_sock_addr_set_sun_path() to allow writing unix sockaddr from bpf As prep for adding unix socket support to the cgroup sockaddr hooks, let's add a kfunc bpf_sock_addr_set_sun_path() that allows modifying a unix sockaddr from bpf. While this is already possible for AF_INET and AF_INET6, we'll need this kfunc when we add unix socket support since modifying the address for those requires modifying both the address and the sockaddr length. Signed-off-by: Daan De Meyer Link: https://lore.kernel.org/r/20231011185113.140426-4-daan.j.demeyer@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/btf.c | 1 + net/core/filter.c | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 69101200c124..15d71d2986d3 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7850,6 +7850,7 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: return BTF_KFUNC_HOOK_CGROUP_SKB; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; diff --git a/net/core/filter.c b/net/core/filter.c index 3880bf0b740d..ff0bd9f20b95 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -81,6 +81,7 @@ #include #include #include +#include static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -11768,6 +11769,27 @@ __bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags, return 0; } + +__bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, + const u8 *sun_path, u32 sun_path__sz) +{ + struct sockaddr_un *un; + + if (sa_kern->sk->sk_family != AF_UNIX) + return -EINVAL; + + /* We do not allow changing the address to unnamed or larger than the + * maximum allowed address size for a unix sockaddr. + */ + if (sun_path__sz == 0 || sun_path__sz > UNIX_PATH_MAX) + return -EINVAL; + + un = (struct sockaddr_un *)sa_kern->uaddr; + memcpy(un->sun_path, sun_path, sun_path__sz); + sa_kern->uaddrlen = offsetof(struct sockaddr_un, sun_path) + sun_path__sz; + + return 0; +} __diag_pop(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, @@ -11792,6 +11814,10 @@ BTF_SET8_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) BTF_SET8_END(bpf_kfunc_check_set_xdp) +BTF_SET8_START(bpf_kfunc_check_set_sock_addr) +BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) +BTF_SET8_END(bpf_kfunc_check_set_sock_addr) + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -11802,6 +11828,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = { .set = &bpf_kfunc_check_set_xdp, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_sock_addr, +}; + static int __init bpf_kfunc_init(void) { int ret; @@ -11816,7 +11847,9 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + &bpf_kfunc_set_sock_addr); } late_initcall(bpf_kfunc_init); -- cgit v1.2.3 From 859051dd165ec6cc915f0f2114699021144fd249 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Wed, 11 Oct 2023 20:51:06 +0200 Subject: bpf: Implement cgroup sockaddr hooks for unix sockets These hooks allows intercepting connect(), getsockname(), getpeername(), sendmsg() and recvmsg() for unix sockets. The unix socket hooks get write access to the address length because the address length is not fixed when dealing with unix sockets and needs to be modified when a unix socket address is modified by the hook. Because abstract socket unix addresses start with a NUL byte, we cannot recalculate the socket address in kernelspace after running the hook by calculating the length of the unix socket path using strlen(). These hooks can be used when users want to multiplex syscall to a single unix socket to multiple different processes behind the scenes by redirecting the connect() and other syscalls to process specific sockets. We do not implement support for intercepting bind() because when using bind() with unix sockets with a pathname address, this creates an inode in the filesystem which must be cleaned up. If we rewrite the address, the user might try to clean up the wrong file, leaking the socket in the filesystem where it is never cleaned up. Until we figure out a solution for this (and a use case for intercepting bind()), we opt to not allow rewriting the sockaddr in bind() calls. We also implement recvmsg() support for connected streams so that after a connect() that is modified by a sockaddr hook, any corresponding recmvsg() on the connected socket can also be modified to make the connected program think it is connected to the "intended" remote. Reviewed-by: Kuniyuki Iwashima Signed-off-by: Daan De Meyer Link: https://lore.kernel.org/r/20231011185113.140426-5-daan.j.demeyer@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf-cgroup-defs.h | 5 +++++ include/linux/bpf-cgroup.h | 17 +++++++++++++++++ include/uapi/linux/bpf.h | 13 +++++++++---- kernel/bpf/cgroup.c | 11 +++++++++-- kernel/bpf/syscall.c | 15 +++++++++++++++ kernel/bpf/verifier.c | 5 ++++- net/core/filter.c | 14 ++++++++++++-- net/unix/af_unix.c | 35 ++++++++++++++++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 13 +++++++++---- 9 files changed, 114 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 7b121bd780eb..0985221d5478 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type { CGROUP_INET6_BIND, CGROUP_INET4_CONNECT, CGROUP_INET6_CONNECT, + CGROUP_UNIX_CONNECT, CGROUP_INET4_POST_BIND, CGROUP_INET6_POST_BIND, CGROUP_UDP4_SENDMSG, CGROUP_UDP6_SENDMSG, + CGROUP_UNIX_SENDMSG, CGROUP_SYSCTL, CGROUP_UDP4_RECVMSG, CGROUP_UDP6_RECVMSG, + CGROUP_UNIX_RECVMSG, CGROUP_GETSOCKOPT, CGROUP_SETSOCKOPT, CGROUP_INET4_GETPEERNAME, CGROUP_INET6_GETPEERNAME, + CGROUP_UNIX_GETPEERNAME, CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, + CGROUP_UNIX_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 31561e789715..98b8cea904fe 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET6_BIND); CGROUP_ATYPE(CGROUP_INET4_CONNECT); CGROUP_ATYPE(CGROUP_INET6_CONNECT); + CGROUP_ATYPE(CGROUP_UNIX_CONNECT); CGROUP_ATYPE(CGROUP_INET4_POST_BIND); CGROUP_ATYPE(CGROUP_INET6_POST_BIND); CGROUP_ATYPE(CGROUP_UDP4_SENDMSG); CGROUP_ATYPE(CGROUP_UDP6_SENDMSG); + CGROUP_ATYPE(CGROUP_UNIX_SENDMSG); CGROUP_ATYPE(CGROUP_SYSCTL); CGROUP_ATYPE(CGROUP_UDP4_RECVMSG); CGROUP_ATYPE(CGROUP_UDP6_RECVMSG); + CGROUP_ATYPE(CGROUP_UNIX_RECVMSG); CGROUP_ATYPE(CGROUP_GETSOCKOPT); CGROUP_ATYPE(CGROUP_SETSOCKOPT); CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); default: return CGROUP_BPF_ATTACH_TYPE_INVALID; @@ -289,18 +294,27 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL) + #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx) + #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL) + /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by * sk_to_full_sk(). @@ -492,10 +506,13 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e0aa457f94a9..7ba61b75bc0e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1047,6 +1047,11 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, __MAX_BPF_ATTACH_TYPE }; @@ -2704,8 +2709,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: @@ -2943,8 +2948,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. * It supports the same set of *optname*\ s that is supported by diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ac37bd53aee0..74ad2215e1ba 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1458,7 +1458,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * @flags: Pointer to u32 which contains higher bits of BPF program * return value (OR'ed together). * - * socket is expected to be of type INET or INET6. + * socket is expected to be of type INET, INET6 or UNIX. * * This function will return %-EPERM if an attached program is found and * returned value != 1 during execution. In all other cases, 0 is returned. @@ -1482,7 +1482,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, /* Check socket family since not all sockets represent network * endpoint (e.g. AF_UNIX). */ - if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) + if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 && + sk->sk_family != AF_UNIX) return 0; if (!ctx.uaddr) { @@ -2533,10 +2534,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return NULL; default: return &bpf_get_retval_proto; @@ -2548,10 +2552,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return NULL; default: return &bpf_set_retval_proto; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6b5280f14a53..8677837f3deb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2446,14 +2446,19 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: return 0; default: return -EINVAL; @@ -3678,14 +3683,19 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; case BPF_CGROUP_SOCK_OPS: return BPF_PROG_TYPE_SOCK_OPS; @@ -3942,14 +3952,19 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET6_POST_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: case BPF_CGROUP_SYSCTL: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eed7350e15f4..e777f50401b6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14797,10 +14797,13 @@ static int check_return_code(struct bpf_verifier_env *env, int regno) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG || env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME || env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME || env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME || - env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME) + env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME) range = tnum_range(1, 1); if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND || env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND) diff --git a/net/core/filter.c b/net/core/filter.c index ff0bd9f20b95..cc2e4babc85f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7875,14 +7875,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return &bpf_sock_addr_setsockopt_proto; default: return NULL; @@ -7893,14 +7898,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return &bpf_sock_addr_getsockopt_proto; default: return NULL; @@ -8948,8 +8958,8 @@ static bool sock_addr_is_valid_access(int off, int size, if (off % size != 0) return false; - /* Disallow access to IPv6 fields from IPv4 contex and vise - * versa. + /* Disallow access to fields not belonging to the attach type's address + * family. */ switch (off) { case bpf_ctx_range(struct bpf_sock_addr, user_ip4): diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3e8a04a13668..e10d07c76044 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -116,6 +116,7 @@ #include #include #include +#include #include "scm.h" @@ -1381,6 +1382,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen); + if (err) + goto out; + if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !unix_sk(sk)->addr) { @@ -1490,6 +1495,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (err) goto out; + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len); + if (err) + goto out; + if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { err = unix_autobind(sk); @@ -1770,6 +1779,13 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) } else { err = addr->len; memcpy(sunaddr, addr->name, addr->len); + + if (peer) + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, + CGROUP_UNIX_GETPEERNAME); + else + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, + CGROUP_UNIX_GETSOCKNAME); } sock_put(sk); out: @@ -1922,6 +1938,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = unix_validate_addr(sunaddr, msg->msg_namelen); if (err) goto out; + + err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, + msg->msg_name, + &msg->msg_namelen, + NULL); + if (err) + goto out; } else { sunaddr = NULL; err = -ENOTCONN; @@ -2390,9 +2413,14 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); - if (msg->msg_name) + if (msg->msg_name) { unix_copy_addr(msg, skb->sk); + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, + msg->msg_name, + &msg->msg_namelen); + } + if (size > skb->len - skip) size = skb->len - skip; else if (size < skb->len - skip) @@ -2744,6 +2772,11 @@ unlock: DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, state->msg->msg_name); unix_copy_addr(state->msg, skb->sk); + + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, + state->msg->msg_name, + &state->msg->msg_namelen); + sunaddr = NULL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e0aa457f94a9..7ba61b75bc0e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1047,6 +1047,11 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, __MAX_BPF_ATTACH_TYPE }; @@ -2704,8 +2709,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: @@ -2943,8 +2948,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. * It supports the same set of *optname*\ s that is supported by -- cgit v1.2.3 From 2f0968a030f2a5dd4897a0151c8395bf5babe5b0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 Oct 2023 14:08:56 +0200 Subject: net: gso_test: fix build with gcc-12 and earlier gcc 12 errors out with: net/core/gso_test.c:58:48: error: initializer element is not constant 58 | .segs = (const unsigned int[]) { gso_size }, This version isn't old (2022), so switch to preprocessor-bsaed constant instead of 'static const int'. Cc: Willem de Bruijn Reported-by: Tasmiya Nalatwad Closes: https://lore.kernel.org/netdev/79fbe35c-4dd1-4f27-acb2-7a60794bc348@linux.vnet.ibm.com/ Fixes: 1b4fa28a8b07 ("net: parametrize skb_segment unit test to expand coverage") Signed-off-by: Florian Westphal Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231012120901.10765-1-fw@strlen.de Signed-off-by: Paolo Abeni --- net/core/gso_test.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/core/gso_test.c b/net/core/gso_test.c index c1a6cffb6f96..c4b13de6abfb 100644 --- a/net/core/gso_test.c +++ b/net/core/gso_test.c @@ -4,7 +4,7 @@ #include static const char hdr[] = "abcdefgh"; -static const int gso_size = 1000; +#define GSO_TEST_SIZE 1000 static void __init_skb(struct sk_buff *skb) { @@ -18,7 +18,7 @@ static void __init_skb(struct sk_buff *skb) /* proto is arbitrary, as long as not ETH_P_TEB or vlan */ skb->protocol = htons(ETH_P_ATALK); - skb_shinfo(skb)->gso_size = gso_size; + skb_shinfo(skb)->gso_size = GSO_TEST_SIZE; } enum gso_test_nr { @@ -53,70 +53,70 @@ static struct gso_test_case cases[] = { { .id = GSO_TEST_NO_GSO, .name = "no_gso", - .linear_len = gso_size, + .linear_len = GSO_TEST_SIZE, .nr_segs = 1, - .segs = (const unsigned int[]) { gso_size }, + .segs = (const unsigned int[]) { GSO_TEST_SIZE }, }, { .id = GSO_TEST_LINEAR, .name = "linear", - .linear_len = gso_size + gso_size + 1, + .linear_len = GSO_TEST_SIZE + GSO_TEST_SIZE + 1, .nr_segs = 3, - .segs = (const unsigned int[]) { gso_size, gso_size, 1 }, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 1 }, }, { .id = GSO_TEST_FRAGS, .name = "frags", - .linear_len = gso_size, + .linear_len = GSO_TEST_SIZE, .nr_frags = 2, - .frags = (const unsigned int[]) { gso_size, 1 }, + .frags = (const unsigned int[]) { GSO_TEST_SIZE, 1 }, .nr_segs = 3, - .segs = (const unsigned int[]) { gso_size, gso_size, 1 }, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 1 }, }, { .id = GSO_TEST_FRAGS_PURE, .name = "frags_pure", .nr_frags = 3, - .frags = (const unsigned int[]) { gso_size, gso_size, 2 }, + .frags = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 2 }, .nr_segs = 3, - .segs = (const unsigned int[]) { gso_size, gso_size, 2 }, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 2 }, }, { .id = GSO_TEST_GSO_PARTIAL, .name = "gso_partial", - .linear_len = gso_size, + .linear_len = GSO_TEST_SIZE, .nr_frags = 2, - .frags = (const unsigned int[]) { gso_size, 3 }, + .frags = (const unsigned int[]) { GSO_TEST_SIZE, 3 }, .nr_segs = 2, - .segs = (const unsigned int[]) { 2 * gso_size, 3 }, + .segs = (const unsigned int[]) { 2 * GSO_TEST_SIZE, 3 }, }, { /* commit 89319d3801d1: frag_list on mss boundaries */ .id = GSO_TEST_FRAG_LIST, .name = "frag_list", - .linear_len = gso_size, + .linear_len = GSO_TEST_SIZE, .nr_frag_skbs = 2, - .frag_skbs = (const unsigned int[]) { gso_size, gso_size }, + .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE }, .nr_segs = 3, - .segs = (const unsigned int[]) { gso_size, gso_size, gso_size }, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, GSO_TEST_SIZE }, }, { .id = GSO_TEST_FRAG_LIST_PURE, .name = "frag_list_pure", .nr_frag_skbs = 2, - .frag_skbs = (const unsigned int[]) { gso_size, gso_size }, + .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE }, .nr_segs = 2, - .segs = (const unsigned int[]) { gso_size, gso_size }, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE }, }, { /* commit 43170c4e0ba7: GRO of frag_list trains */ .id = GSO_TEST_FRAG_LIST_NON_UNIFORM, .name = "frag_list_non_uniform", - .linear_len = gso_size, + .linear_len = GSO_TEST_SIZE, .nr_frag_skbs = 4, - .frag_skbs = (const unsigned int[]) { gso_size, 1, gso_size, 2 }, + .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, 1, GSO_TEST_SIZE, 2 }, .nr_segs = 4, - .segs = (const unsigned int[]) { gso_size, gso_size, gso_size, 3 }, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, GSO_TEST_SIZE, 3 }, }, { /* commit 3953c46c3ac7 ("sk_buff: allow segmenting based on frag sizes") and -- cgit v1.2.3 From 38985e8c278b82e6d4d62d4acd57c761cc23ce63 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 9 Oct 2023 13:06:08 +0300 Subject: net: Handle bulk delete policy in bridge driver The merge commit 92716869375b ("Merge branch 'br-flush-filtering'") added support for FDB flushing in bridge driver. The following patches will extend VXLAN driver to support FDB flushing as well. The netlink message for bulk delete is shared between the drivers. With the existing implementation, there is no way to prevent user from flushing with attributes that are not supported per driver. For example, when VNI will be added, user will not get an error for flush FDB entries in bridge with VNI, although this attribute is not relevant for bridge. As preparation for support of FDB flush in VXLAN driver, move the policy to be handled in bridge driver, later a new policy for VXLAN will be added in VXLAN driver. Do not pass 'vid' as part of ndo_fdb_del_bulk(), as this field is relevant only for bridge. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++------ net/bridge/br_fdb.c | 29 ++++++++++++++++++++++++----- net/bridge/br_private.h | 3 +-- net/core/rtnetlink.c | 27 ++++++++++----------------- 4 files changed, 37 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ae553f886796..1c7681263d30 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1287,9 +1287,7 @@ struct netdev_net_notifier { * struct net_device *dev, * const unsigned char *addr, u16 vid) * Deletes the FDB entry from dev coresponding to addr. - * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[], - * struct net_device *dev, - * u16 vid, + * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, @@ -1564,10 +1562,8 @@ struct net_device_ops { struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack); - int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, - struct nlattr *tb[], + int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, - u16 vid, struct netlink_ext_ack *extack); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e69a872bfc1d..a98ad763b368 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -661,14 +661,30 @@ static int __fdb_flush_validate_ifindex(const struct net_bridge *br, return 0; } -int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, u16 vid, +static const struct nla_policy br_fdb_del_bulk_policy[NDA_MAX + 1] = { + [NDA_VLAN] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), + [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), + [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, + [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, +}; + +int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack) { - u8 ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS; - struct net_bridge_fdb_flush_desc desc = { .vlan_id = vid }; + struct net_bridge_fdb_flush_desc desc = {}; + struct ndmsg *ndm = nlmsg_data(nlh); struct net_bridge_port *p = NULL; + struct nlattr *tb[NDA_MAX + 1]; struct net_bridge *br; + u8 ndm_flags; + int err; + + ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, + br_fdb_del_bulk_policy, extack); + if (err) + return err; if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); @@ -681,6 +697,9 @@ int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], br = p->br; } + if (tb[NDA_VLAN]) + desc.vlan_id = nla_get_u16(tb[NDA_VLAN]); + if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) { NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set"); return -EINVAL; @@ -703,7 +722,7 @@ int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask); } if (tb[NDA_IFINDEX]) { - int err, ifidx = nla_get_s32(tb[NDA_IFINDEX]); + int ifidx = nla_get_s32(tb[NDA_IFINDEX]); err = __fdb_flush_validate_ifindex(br, ifidx, extack); if (err) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a1f4acfa6994..cbbe35278459 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -847,8 +847,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack); -int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, u16 vid, +int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7452a6d190c5..eef7f7788996 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4367,13 +4367,6 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_del); -static const struct nla_policy fdb_del_bulk_policy[NDA_MAX + 1] = { - [NDA_VLAN] = { .type = NLA_U16 }, - [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), - [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, - [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, -}; - static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -4394,8 +4387,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); } else { - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, - fdb_del_bulk_policy, extack); + /* For bulk delete, the drivers will parse the message with + * policy. + */ + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); } if (err < 0) return err; @@ -4418,6 +4413,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } addr = nla_data(tb[NDA_LLADDR]); + + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); + if (err) + return err; } if (dev->type != ARPHRD_ETHER) { @@ -4425,10 +4424,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } - err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); - if (err) - return err; - err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ @@ -4442,8 +4437,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack); } else { if (ops->ndo_fdb_del_bulk) - err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, - extack); + err = ops->ndo_fdb_del_bulk(nlh, dev, extack); } if (err) @@ -4464,8 +4458,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, /* in case err was cleared by NTF_MASTER call */ err = -EOPNOTSUPP; if (ops->ndo_fdb_del_bulk) - err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, - extack); + err = ops->ndo_fdb_del_bulk(nlh, dev, extack); } if (!err) { -- cgit v1.2.3 From 3bab3ee0f95ebd2a897ac3205b4fdee50c3b5f96 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:41 +0200 Subject: tls: get salt using crypto_info_salt in tls_enc_skb I skipped this conversion in my previous series. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_device_fallback.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 1d743f310f4f..b4a65f53d9c0 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -340,10 +340,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, switch (tls_ctx->crypto_send.info.cipher_type) { case TLS_CIPHER_AES_GCM_128: - salt = tls_ctx->crypto_send.aes_gcm_128.salt; - break; case TLS_CIPHER_AES_GCM_256: - salt = tls_ctx->crypto_send.aes_gcm_256.salt; break; default: goto free_req; @@ -356,6 +353,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, goto free_req; iv = buf; + salt = crypto_info_salt(&tls_ctx->crypto_send.info, cipher_desc); memcpy(iv, salt, cipher_desc->salt); aad = buf + cipher_desc->salt + cipher_desc->iv; dummy_buf = aad + TLS_AAD_SPACE_SIZE; -- cgit v1.2.3 From 8f1d532b4a49e196696b0aa150962d7ce96985e4 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:42 +0200 Subject: tls: drop unnecessary cipher_type checks in tls offload We should never reach tls_device_reencrypt, tls_enc_record, or tls_enc_skb with a cipher_type that can't be offloaded. Replace those checks with a DEBUG_NET_WARN_ON_ONCE, and use cipher_desc instead of hard-coding offloadable cipher types. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_device.c | 8 +------- net/tls/tls_device_fallback.c | 17 +++-------------- 2 files changed, 4 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 8c94c926606a..fbd687a0c66f 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -891,14 +891,8 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx) struct strp_msg *rxm; char *orig_buf, *buf; - switch (tls_ctx->crypto_recv.info.cipher_type) { - case TLS_CIPHER_AES_GCM_128: - case TLS_CIPHER_AES_GCM_256: - break; - default: - return -EINVAL; - } cipher_desc = get_cipher_desc(tls_ctx->crypto_recv.info.cipher_type); + DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); rxm = strp_msg(tls_strp_msg(sw_ctx)); orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv, diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index b4a65f53d9c0..1d2b4d83ccab 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -62,14 +62,8 @@ static int tls_enc_record(struct aead_request *aead_req, u16 len; int rc; - switch (prot->cipher_type) { - case TLS_CIPHER_AES_GCM_128: - case TLS_CIPHER_AES_GCM_256: - break; - default: - return -EINVAL; - } cipher_desc = get_cipher_desc(prot->cipher_type); + DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); buf_size = TLS_HEADER_SIZE + cipher_desc->iv; len = min_t(int, *in_len, buf_size); @@ -338,14 +332,9 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, if (!aead_req) return NULL; - switch (tls_ctx->crypto_send.info.cipher_type) { - case TLS_CIPHER_AES_GCM_128: - case TLS_CIPHER_AES_GCM_256: - break; - default: - goto free_req; - } cipher_desc = get_cipher_desc(tls_ctx->crypto_send.info.cipher_type); + DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); + buf_len = cipher_desc->salt + cipher_desc->iv + TLS_AAD_SPACE_SIZE + sync_size + cipher_desc->tag; buf = kmalloc(buf_len, GFP_ATOMIC); -- cgit v1.2.3 From 6d5029e54700b2427581513c533232b02ce05043 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:43 +0200 Subject: tls: store rec_seq directly within cipher_context TLS_MAX_REC_SEQ_SIZE is 8B, we don't get anything by using kmalloc. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/tls.h | 2 +- net/tls/tls_device.c | 11 ++--------- net/tls/tls_main.c | 1 - net/tls/tls_sw.c | 13 ++----------- 4 files changed, 5 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/include/net/tls.h b/include/net/tls.h index a2b44578dcb7..f3f22b08af26 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -194,7 +194,7 @@ enum tls_context_flags { struct cipher_context { char *iv; - char *rec_seq; + char rec_seq[TLS_MAX_REC_SEQ_SIZE]; }; union tls_crypto_context { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index fbd687a0c66f..525d7b813869 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -58,7 +58,6 @@ static void tls_device_free_ctx(struct tls_context *ctx) { if (ctx->tx_conf == TLS_HW) { kfree(tls_offload_ctx_tx(ctx)); - kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); } @@ -1098,16 +1097,12 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) memcpy(ctx->tx.iv + cipher_desc->salt, iv, cipher_desc->iv); prot->rec_seq_size = cipher_desc->rec_seq; - ctx->tx.rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL); - if (!ctx->tx.rec_seq) { - rc = -ENOMEM; - goto free_iv; - } + memcpy(ctx->tx.rec_seq, rec_seq, cipher_desc->rec_seq); start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL); if (!start_marker_record) { rc = -ENOMEM; - goto free_rec_seq; + goto free_iv; } offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL); @@ -1192,8 +1187,6 @@ free_offload_ctx: ctx->priv_ctx_tx = NULL; free_marker_record: kfree(start_marker_record); -free_rec_seq: - kfree(ctx->tx.rec_seq); free_iv: kfree(ctx->tx.iv); release_netdev: diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 02f583ff9239..f705d812fc36 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -344,7 +344,6 @@ static void tls_sk_proto_cleanup(struct sock *sk, /* We need these for tls_sw_fallback handling of other packets */ if (ctx->tx_conf == TLS_SW) { - kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_release_resources_tx(sk); TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 270712b8d391..93d40c9a6823 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2467,7 +2467,6 @@ void tls_sw_release_resources_rx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - kfree(tls_ctx->rx.rec_seq); kfree(tls_ctx->rx.iv); if (ctx->aead_recv) { @@ -2692,19 +2691,14 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) prot->rec_seq_size = cipher_desc->rec_seq; memcpy(cctx->iv, salt, cipher_desc->salt); memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv); - - cctx->rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL); - if (!cctx->rec_seq) { - rc = -ENOMEM; - goto free_iv; - } + memcpy(cctx->rec_seq, rec_seq, cipher_desc->rec_seq); if (!*aead) { *aead = crypto_alloc_aead(cipher_desc->cipher_name, 0, 0); if (IS_ERR(*aead)) { rc = PTR_ERR(*aead); *aead = NULL; - goto free_rec_seq; + goto free_iv; } } @@ -2736,9 +2730,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) free_aead: crypto_free_aead(*aead); *aead = NULL; -free_rec_seq: - kfree(cctx->rec_seq); - cctx->rec_seq = NULL; free_iv: kfree(cctx->iv); cctx->iv = NULL; -- cgit v1.2.3 From bee6b7b30706e7693d91cb28c8ff3cb69e094f65 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:44 +0200 Subject: tls: rename MAX_IV_SIZE to TLS_MAX_IV_SIZE It's defined in include/net/tls.h, avoid using an overly generic name. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/tls.h | 2 +- net/tls/tls.h | 2 +- net/tls/tls_device_fallback.c | 2 +- net/tls/tls_main.c | 2 +- net/tls/tls_sw.c | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/tls.h b/include/net/tls.h index f3f22b08af26..5200ce27db91 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -61,7 +61,7 @@ struct tls_rec; #define TLS_AAD_SPACE_SIZE 13 -#define MAX_IV_SIZE 16 +#define TLS_MAX_IV_SIZE 16 #define TLS_TAG_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 #define TLS_MAX_AAD_SIZE TLS_AAD_SPACE_SIZE diff --git a/net/tls/tls.h b/net/tls/tls.h index 28a8c0e80e3c..16830aa2d6ec 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -127,7 +127,7 @@ struct tls_rec { struct sock *sk; char aad_space[TLS_AAD_SPACE_SIZE]; - u8 iv_data[MAX_IV_SIZE]; + u8 iv_data[TLS_MAX_IV_SIZE]; struct aead_request aead_req; u8 aead_req_ctx[]; }; diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 1d2b4d83ccab..4e7228f275fa 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -54,7 +54,7 @@ static int tls_enc_record(struct aead_request *aead_req, struct scatter_walk *out, int *in_len, struct tls_prot_info *prot) { - unsigned char buf[TLS_HEADER_SIZE + MAX_IV_SIZE]; + unsigned char buf[TLS_HEADER_SIZE + TLS_MAX_IV_SIZE]; const struct tls_cipher_desc *cipher_desc; struct scatterlist sg_in[3]; struct scatterlist sg_out[3]; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index f705d812fc36..58f13660fe6b 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -59,7 +59,7 @@ enum { }; #define CHECK_CIPHER_DESC(cipher,ci) \ - static_assert(cipher ## _IV_SIZE <= MAX_IV_SIZE); \ + static_assert(cipher ## _IV_SIZE <= TLS_MAX_IV_SIZE); \ static_assert(cipher ## _REC_SEQ_SIZE <= TLS_MAX_REC_SEQ_SIZE); \ static_assert(cipher ## _TAG_SIZE == TLS_TAG_SIZE); \ static_assert(sizeof_field(struct ci, iv) == cipher ## _IV_SIZE); \ diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 93d40c9a6823..5b6175f9b9a6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -60,7 +60,7 @@ struct tls_decrypt_arg { struct tls_decrypt_ctx { struct sock *sk; - u8 iv[MAX_IV_SIZE]; + u8 iv[TLS_MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; struct scatterlist sg[]; @@ -2319,7 +2319,7 @@ int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_prot_info *prot = &tls_ctx->prot_info; - char header[TLS_HEADER_SIZE + MAX_IV_SIZE]; + char header[TLS_HEADER_SIZE + TLS_MAX_IV_SIZE]; size_t cipher_overhead; size_t data_len = 0; int ret; @@ -2669,7 +2669,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) } /* Sanity-check the sizes for stack allocations. */ - if (nonce_size > MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) { + if (nonce_size > TLS_MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) { rc = -EINVAL; goto free_priv; } -- cgit v1.2.3 From 1c1cb3110d7ed2897e65d9a352a8fb709723e057 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:45 +0200 Subject: tls: store iv directly within cipher_context TLS_MAX_IV_SIZE + TLS_MAX_SALT_SIZE is 20B, we don't get much benefit in cipher_context's size and can simplify the init code a bit. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/tls.h | 3 ++- net/tls/tls_device.c | 13 ++----------- net/tls/tls_main.c | 2 +- net/tls/tls_sw.c | 13 ++----------- 4 files changed, 7 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/include/net/tls.h b/include/net/tls.h index 5200ce27db91..28cc40d7b945 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -62,6 +62,7 @@ struct tls_rec; #define TLS_AAD_SPACE_SIZE 13 #define TLS_MAX_IV_SIZE 16 +#define TLS_MAX_SALT_SIZE 4 #define TLS_TAG_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 #define TLS_MAX_AAD_SIZE TLS_AAD_SPACE_SIZE @@ -193,7 +194,7 @@ enum tls_context_flags { }; struct cipher_context { - char *iv; + char iv[TLS_MAX_IV_SIZE + TLS_MAX_SALT_SIZE]; char rec_seq[TLS_MAX_REC_SEQ_SIZE]; }; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 525d7b813869..0981496c6294 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -56,10 +56,8 @@ static struct page *dummy_page; static void tls_device_free_ctx(struct tls_context *ctx) { - if (ctx->tx_conf == TLS_HW) { + if (ctx->tx_conf == TLS_HW) kfree(tls_offload_ctx_tx(ctx)); - kfree(ctx->tx.iv); - } if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx)); @@ -1088,11 +1086,6 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) prot->overhead_size = prot->prepend_size + prot->tag_size; prot->iv_size = cipher_desc->iv; prot->salt_size = cipher_desc->salt; - ctx->tx.iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL); - if (!ctx->tx.iv) { - rc = -ENOMEM; - goto release_netdev; - } memcpy(ctx->tx.iv + cipher_desc->salt, iv, cipher_desc->iv); @@ -1102,7 +1095,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL); if (!start_marker_record) { rc = -ENOMEM; - goto free_iv; + goto release_netdev; } offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL); @@ -1187,8 +1180,6 @@ free_offload_ctx: ctx->priv_ctx_tx = NULL; free_marker_record: kfree(start_marker_record); -free_iv: - kfree(ctx->tx.iv); release_netdev: dev_put(netdev); return rc; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 58f13660fe6b..b91524ac1009 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -60,6 +60,7 @@ enum { #define CHECK_CIPHER_DESC(cipher,ci) \ static_assert(cipher ## _IV_SIZE <= TLS_MAX_IV_SIZE); \ + static_assert(cipher ## _SALT_SIZE <= TLS_MAX_SALT_SIZE); \ static_assert(cipher ## _REC_SEQ_SIZE <= TLS_MAX_REC_SEQ_SIZE); \ static_assert(cipher ## _TAG_SIZE == TLS_TAG_SIZE); \ static_assert(sizeof_field(struct ci, iv) == cipher ## _IV_SIZE); \ @@ -344,7 +345,6 @@ static void tls_sk_proto_cleanup(struct sock *sk, /* We need these for tls_sw_fallback handling of other packets */ if (ctx->tx_conf == TLS_SW) { - kfree(ctx->tx.iv); tls_sw_release_resources_tx(sk); TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); } else if (ctx->tx_conf == TLS_HW) { diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5b6175f9b9a6..c3da937b8207 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2467,8 +2467,6 @@ void tls_sw_release_resources_rx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - kfree(tls_ctx->rx.iv); - if (ctx->aead_recv) { __skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); @@ -2682,11 +2680,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) prot->tag_size + prot->tail_size; prot->iv_size = cipher_desc->iv; prot->salt_size = cipher_desc->salt; - cctx->iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL); - if (!cctx->iv) { - rc = -ENOMEM; - goto free_priv; - } + /* Note: 128 & 256 bit salt are the same size */ prot->rec_seq_size = cipher_desc->rec_seq; memcpy(cctx->iv, salt, cipher_desc->salt); @@ -2698,7 +2692,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) if (IS_ERR(*aead)) { rc = PTR_ERR(*aead); *aead = NULL; - goto free_iv; + goto free_priv; } } @@ -2730,9 +2724,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) free_aead: crypto_free_aead(*aead); *aead = NULL; -free_iv: - kfree(cctx->iv); - cctx->iv = NULL; free_priv: if (tx) { kfree(ctx->priv_ctx_tx); -- cgit v1.2.3 From 615580cbc99af0da2d1c7226fab43a3d5003eb97 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:46 +0200 Subject: tls: extract context alloc/initialization out of tls_set_sw_offload Simplify tls_set_sw_offload a bit. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 86 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c3da937b8207..b5428f543d17 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2578,6 +2578,48 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx) tls_ctx->prot_info.version != TLS_1_3_VERSION; } +static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk) +{ + struct tls_sw_context_tx *sw_ctx_tx; + + if (!ctx->priv_ctx_tx) { + sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); + if (!sw_ctx_tx) + return NULL; + } else { + sw_ctx_tx = ctx->priv_ctx_tx; + } + + crypto_init_wait(&sw_ctx_tx->async_wait); + spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + INIT_LIST_HEAD(&sw_ctx_tx->tx_list); + INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); + sw_ctx_tx->tx_work.sk = sk; + + return sw_ctx_tx; +} + +static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) +{ + struct tls_sw_context_rx *sw_ctx_rx; + + if (!ctx->priv_ctx_rx) { + sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); + if (!sw_ctx_rx) + return NULL; + } else { + sw_ctx_rx = ctx->priv_ctx_rx; + } + + crypto_init_wait(&sw_ctx_rx->async_wait); + spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); + init_waitqueue_head(&sw_ctx_rx->wq); + skb_queue_head_init(&sw_ctx_rx->rx_list); + skb_queue_head_init(&sw_ctx_rx->async_hold); + + return sw_ctx_rx; +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2599,48 +2641,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) } if (tx) { - if (!ctx->priv_ctx_tx) { - sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); - if (!sw_ctx_tx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_tx = sw_ctx_tx; - } else { - sw_ctx_tx = - (struct tls_sw_context_tx *)ctx->priv_ctx_tx; - } - } else { - if (!ctx->priv_ctx_rx) { - sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); - if (!sw_ctx_rx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_rx = sw_ctx_rx; - } else { - sw_ctx_rx = - (struct tls_sw_context_rx *)ctx->priv_ctx_rx; - } - } + ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); + if (!ctx->priv_ctx_tx) + return -ENOMEM; - if (tx) { - crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; - INIT_LIST_HEAD(&sw_ctx_tx->tx_list); - INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); - sw_ctx_tx->tx_work.sk = sk; } else { - crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); - init_waitqueue_head(&sw_ctx_rx->wq); + ctx->priv_ctx_rx = init_ctx_rx(ctx); + if (!ctx->priv_ctx_rx) + return -ENOMEM; + + sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; - skb_queue_head_init(&sw_ctx_rx->rx_list); - skb_queue_head_init(&sw_ctx_rx->async_hold); aead = &sw_ctx_rx->aead_recv; } -- cgit v1.2.3 From a9937816edde95575fb777703b82f85b1d6cd5b1 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:47 +0200 Subject: tls: move tls_prot_info initialization out of tls_set_sw_offload Simplify tls_set_sw_offload, and allow reuse for the tls_device code. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 62 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index b5428f543d17..b8e89bbb4a49 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2620,6 +2620,37 @@ static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) return sw_ctx_rx; } +static int init_prot_info(struct tls_prot_info *prot, + const struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc) +{ + u16 nonce_size = cipher_desc->nonce; + + if (crypto_info->version == TLS_1_3_VERSION) { + nonce_size = 0; + prot->aad_size = TLS_HEADER_SIZE; + prot->tail_size = 1; + } else { + prot->aad_size = TLS_AAD_SPACE_SIZE; + prot->tail_size = 0; + } + + /* Sanity-check the sizes for stack allocations. */ + if (nonce_size > TLS_MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) + return -EINVAL; + + prot->version = crypto_info->version; + prot->cipher_type = crypto_info->cipher_type; + prot->prepend_size = TLS_HEADER_SIZE + nonce_size; + prot->tag_size = cipher_desc->tag; + prot->overhead_size = prot->prepend_size + prot->tag_size + prot->tail_size; + prot->iv_size = cipher_desc->iv; + prot->salt_size = cipher_desc->salt; + prot->rec_seq_size = cipher_desc->rec_seq; + + return 0; +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2632,7 +2663,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) struct crypto_tfm *tfm; char *iv, *rec_seq, *key, *salt; const struct tls_cipher_desc *cipher_desc; - u16 nonce_size; int rc = 0; if (!ctx) { @@ -2666,39 +2696,15 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) goto free_priv; } - nonce_size = cipher_desc->nonce; + rc = init_prot_info(prot, crypto_info, cipher_desc); + if (rc) + goto free_priv; iv = crypto_info_iv(crypto_info, cipher_desc); key = crypto_info_key(crypto_info, cipher_desc); salt = crypto_info_salt(crypto_info, cipher_desc); rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc); - if (crypto_info->version == TLS_1_3_VERSION) { - nonce_size = 0; - prot->aad_size = TLS_HEADER_SIZE; - prot->tail_size = 1; - } else { - prot->aad_size = TLS_AAD_SPACE_SIZE; - prot->tail_size = 0; - } - - /* Sanity-check the sizes for stack allocations. */ - if (nonce_size > TLS_MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) { - rc = -EINVAL; - goto free_priv; - } - - prot->version = crypto_info->version; - prot->cipher_type = crypto_info->cipher_type; - prot->prepend_size = TLS_HEADER_SIZE + nonce_size; - prot->tag_size = cipher_desc->tag; - prot->overhead_size = prot->prepend_size + - prot->tag_size + prot->tail_size; - prot->iv_size = cipher_desc->iv; - prot->salt_size = cipher_desc->salt; - - /* Note: 128 & 256 bit salt are the same size */ - prot->rec_seq_size = cipher_desc->rec_seq; memcpy(cctx->iv, salt, cipher_desc->salt); memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv); memcpy(cctx->rec_seq, rec_seq, cipher_desc->rec_seq); -- cgit v1.2.3 From 1a074f7618e8b82a7cebf45df6e005d2284446ce Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:48 +0200 Subject: tls: also use init_prot_info in tls_set_device_offload Most values are shared. Nonce size turns out to be equal to IV size for all offloadable ciphers. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls.h | 4 ++++ net/tls/tls_device.c | 14 ++++---------- net/tls/tls_sw.c | 14 ++++++++++---- 3 files changed, 18 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/tls/tls.h b/net/tls/tls.h index 16830aa2d6ec..756ed6cbc3df 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -142,6 +142,10 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx); int wait_on_pending_writer(struct sock *sk, long *timeo); void tls_err_abort(struct sock *sk, int err); +int init_prot_info(struct tls_prot_info *prot, + const struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc, + int mode); int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 0981496c6294..3d73dd97e903 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1076,20 +1076,14 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) goto release_netdev; } + rc = init_prot_info(prot, crypto_info, cipher_desc, TLS_HW); + if (rc) + goto release_netdev; + iv = crypto_info_iv(crypto_info, cipher_desc); rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc); - prot->version = crypto_info->version; - prot->cipher_type = crypto_info->cipher_type; - prot->prepend_size = TLS_HEADER_SIZE + cipher_desc->iv; - prot->tag_size = cipher_desc->tag; - prot->overhead_size = prot->prepend_size + prot->tag_size; - prot->iv_size = cipher_desc->iv; - prot->salt_size = cipher_desc->salt; - memcpy(ctx->tx.iv + cipher_desc->salt, iv, cipher_desc->iv); - - prot->rec_seq_size = cipher_desc->rec_seq; memcpy(ctx->tx.rec_seq, rec_seq, cipher_desc->rec_seq); start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index b8e89bbb4a49..0995d3d14f4b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2620,9 +2620,10 @@ static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) return sw_ctx_rx; } -static int init_prot_info(struct tls_prot_info *prot, - const struct tls_crypto_info *crypto_info, - const struct tls_cipher_desc *cipher_desc) +int init_prot_info(struct tls_prot_info *prot, + const struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc, + int mode) { u16 nonce_size = cipher_desc->nonce; @@ -2635,6 +2636,11 @@ static int init_prot_info(struct tls_prot_info *prot, prot->tail_size = 0; } + if (mode == TLS_HW) { + prot->aad_size = 0; + prot->tail_size = 0; + } + /* Sanity-check the sizes for stack allocations. */ if (nonce_size > TLS_MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) return -EINVAL; @@ -2696,7 +2702,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) goto free_priv; } - rc = init_prot_info(prot, crypto_info, cipher_desc); + rc = init_prot_info(prot, crypto_info, cipher_desc, TLS_SW); if (rc) goto free_priv; -- cgit v1.2.3 From 0137407999879f992b9b9a7d0949168d3d010130 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:49 +0200 Subject: tls: add a helper to allocate/initialize offload_ctx_tx Simplify tls_set_device_offload a bit. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_device.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 3d73dd97e903..0184426251b0 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1033,6 +1033,30 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk, } } +static struct tls_offload_context_tx *alloc_offload_ctx_tx(struct tls_context *ctx) +{ + struct tls_offload_context_tx *offload_ctx; + __be64 rcd_sn; + + offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL); + if (!offload_ctx) + return NULL; + + INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task); + INIT_LIST_HEAD(&offload_ctx->records_list); + spin_lock_init(&offload_ctx->lock); + sg_init_table(offload_ctx->sg_tx_data, + ARRAY_SIZE(offload_ctx->sg_tx_data)); + + /* start at rec_seq - 1 to account for the start marker record */ + memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn)); + offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1; + + offload_ctx->ctx = ctx; + + return offload_ctx; +} + int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -1044,7 +1068,6 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) struct net_device *netdev; char *iv, *rec_seq; struct sk_buff *skb; - __be64 rcd_sn; int rc; if (!ctx) @@ -1092,7 +1115,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) goto release_netdev; } - offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL); + offload_ctx = alloc_offload_ctx_tx(ctx); if (!offload_ctx) { rc = -ENOMEM; goto free_marker_record; @@ -1102,22 +1125,10 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) if (rc) goto free_offload_ctx; - /* start at rec_seq - 1 to account for the start marker record */ - memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn)); - offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1; - start_marker_record->end_seq = tcp_sk(sk)->write_seq; start_marker_record->len = 0; start_marker_record->num_frags = 0; - - INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task); - offload_ctx->ctx = ctx; - - INIT_LIST_HEAD(&offload_ctx->records_list); list_add_tail(&start_marker_record->list, &offload_ctx->records_list); - spin_lock_init(&offload_ctx->lock); - sg_init_table(offload_ctx->sg_tx_data, - ARRAY_SIZE(offload_ctx->sg_tx_data)); clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked); ctx->push_pending_record = tls_device_push_pending_record; -- cgit v1.2.3 From b6a30ec9239a1fa1a622608176bb78646a539608 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:50 +0200 Subject: tls: remove tls_context argument from tls_set_sw_offload It's not really needed since we end up refetching it as tls_ctx. We can also remove the NULL check, since we have already dereferenced ctx in do_tls_setsockopt_conf. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls.h | 2 +- net/tls/tls_device.c | 2 +- net/tls/tls_main.c | 4 ++-- net/tls/tls_sw.c | 18 ++++++++---------- 4 files changed, 12 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/tls/tls.h b/net/tls/tls.h index 756ed6cbc3df..d9e8cd73b20e 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -146,7 +146,7 @@ int init_prot_info(struct tls_prot_info *prot, const struct tls_crypto_info *crypto_info, const struct tls_cipher_desc *cipher_desc, int mode); -int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); +int tls_set_sw_offload(struct sock *sk, int tx); void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 0184426251b0..1dc217870f9d 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1233,7 +1233,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) context->resync_nh_reset = 1; ctx->priv_ctx_rx = context; - rc = tls_set_sw_offload(sk, ctx, 0); + rc = tls_set_sw_offload(sk, 0); if (rc) goto release_ctx; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index b91524ac1009..6c5e0cad89e8 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -663,7 +663,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE); } else { - rc = tls_set_sw_offload(sk, ctx, 1); + rc = tls_set_sw_offload(sk, 1); if (rc) goto err_crypto_info; TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); @@ -677,7 +677,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICE); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE); } else { - rc = tls_set_sw_offload(sk, ctx, 0); + rc = tls_set_sw_offload(sk, 0); if (rc) goto err_crypto_info; TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 0995d3d14f4b..0f6da4ce3ed7 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2657,24 +2657,22 @@ int init_prot_info(struct tls_prot_info *prot, return 0; } -int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) +int tls_set_sw_offload(struct sock *sk, int tx) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - struct tls_crypto_info *crypto_info; struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; + const struct tls_cipher_desc *cipher_desc; + struct tls_crypto_info *crypto_info; + char *iv, *rec_seq, *key, *salt; struct cipher_context *cctx; + struct tls_prot_info *prot; struct crypto_aead **aead; + struct tls_context *ctx; struct crypto_tfm *tfm; - char *iv, *rec_seq, *key, *salt; - const struct tls_cipher_desc *cipher_desc; int rc = 0; - if (!ctx) { - rc = -EINVAL; - goto out; - } + ctx = tls_get_ctx(sk); + prot = &ctx->prot_info; if (tx) { ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); -- cgit v1.2.3 From 4f4866991847738a216bb5920b3d3902cee13fd0 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:51 +0200 Subject: tls: remove tls_context argument from tls_set_device_offload It's not really needed since we end up refetching it as tls_ctx. We can also remove the NULL check, since we have already dereferenced ctx in do_tls_setsockopt_conf. While at it, fix up the reverse xmas tree ordering. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls.h | 4 ++-- net/tls/tls_device.c | 14 +++++++------- net/tls/tls_main.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/tls/tls.h b/net/tls/tls.h index d9e8cd73b20e..478b2c0060aa 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -227,7 +227,7 @@ static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx) #ifdef CONFIG_TLS_DEVICE int tls_device_init(void); void tls_device_cleanup(void); -int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); +int tls_set_device_offload(struct sock *sk); void tls_device_free_resources_tx(struct sock *sk); int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx); void tls_device_offload_cleanup_rx(struct sock *sk); @@ -238,7 +238,7 @@ static inline int tls_device_init(void) { return 0; } static inline void tls_device_cleanup(void) {} static inline int -tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +tls_set_device_offload(struct sock *sk) { return -EOPNOTSUPP; } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 1dc217870f9d..fe52765beaee 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1057,21 +1057,21 @@ static struct tls_offload_context_tx *alloc_offload_ctx_tx(struct tls_context *c return offload_ctx; } -int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +int tls_set_device_offload(struct sock *sk) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - const struct tls_cipher_desc *cipher_desc; struct tls_record_info *start_marker_record; struct tls_offload_context_tx *offload_ctx; + const struct tls_cipher_desc *cipher_desc; struct tls_crypto_info *crypto_info; + struct tls_prot_info *prot; struct net_device *netdev; - char *iv, *rec_seq; + struct tls_context *ctx; struct sk_buff *skb; + char *iv, *rec_seq; int rc; - if (!ctx) - return -EINVAL; + ctx = tls_get_ctx(sk); + prot = &ctx->prot_info; if (ctx->priv_ctx_tx) return -EEXIST; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 6c5e0cad89e8..a342853ab6ae 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -657,7 +657,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, } if (tx) { - rc = tls_set_device_offload(sk, ctx); + rc = tls_set_device_offload(sk); conf = TLS_HW; if (!rc) { TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE); -- cgit v1.2.3 From 1cf7fbcee60af932f815af5fc0ca5e7e8544ef82 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:52 +0200 Subject: tls: validate crypto_info in a separate helper Simplify do_tls_setsockopt_conf a bit. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_main.c | 51 +++++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index a342853ab6ae..b125a08a618a 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -580,6 +580,31 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, return do_tls_getsockopt(sk, optname, optval, optlen); } +static int validate_crypto_info(const struct tls_crypto_info *crypto_info, + const struct tls_crypto_info *alt_crypto_info) +{ + if (crypto_info->version != TLS_1_2_VERSION && + crypto_info->version != TLS_1_3_VERSION) + return -EINVAL; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_ARIA_GCM_128: + case TLS_CIPHER_ARIA_GCM_256: + if (crypto_info->version != TLS_1_2_VERSION) + return -EINVAL; + break; + } + + /* Ensure that TLS version and ciphers are same in both directions */ + if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) { + if (alt_crypto_info->version != crypto_info->version || + alt_crypto_info->cipher_type != crypto_info->cipher_type) + return -EINVAL; + } + + return 0; +} + static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, unsigned int optlen, int tx) { @@ -611,21 +636,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, goto err_crypto_info; } - /* check version */ - if (crypto_info->version != TLS_1_2_VERSION && - crypto_info->version != TLS_1_3_VERSION) { - rc = -EINVAL; + rc = validate_crypto_info(crypto_info, alt_crypto_info); + if (rc) goto err_crypto_info; - } - - /* Ensure that TLS version and ciphers are same in both directions */ - if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) { - if (alt_crypto_info->version != crypto_info->version || - alt_crypto_info->cipher_type != crypto_info->cipher_type) { - rc = -EINVAL; - goto err_crypto_info; - } - } cipher_desc = get_cipher_desc(crypto_info->cipher_type); if (!cipher_desc) { @@ -633,16 +646,6 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, goto err_crypto_info; } - switch (crypto_info->cipher_type) { - case TLS_CIPHER_ARIA_GCM_128: - case TLS_CIPHER_ARIA_GCM_256: - if (crypto_info->version != TLS_1_2_VERSION) { - rc = -EINVAL; - goto err_crypto_info; - } - break; - } - if (optlen != cipher_desc->crypto_info) { rc = -EINVAL; goto err_crypto_info; -- cgit v1.2.3 From 9f0c8245516bc30cff770c3a69a6baaf8eef8810 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 9 Oct 2023 22:50:54 +0200 Subject: tls: use fixed size for tls_offload_context_{tx,rx}.driver_state driver_state is a flex array, but is always allocated by the tls core to a fixed size (TLS_DRIVER_STATE_SIZE_{TX,RX}). Simplify the code by making that size explicit so that sizeof(struct tls_offload_context_{tx,rx}) works. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/tls.h | 14 ++++---------- net/tls/tls_device.c | 4 ++-- 2 files changed, 6 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/tls.h b/include/net/tls.h index 28cc40d7b945..962f0c501111 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -150,6 +150,7 @@ struct tls_record_info { skb_frag_t frags[MAX_SKB_FRAGS]; }; +#define TLS_DRIVER_STATE_SIZE_TX 16 struct tls_offload_context_tx { struct crypto_aead *aead_send; spinlock_t lock; /* protects records list */ @@ -163,17 +164,13 @@ struct tls_offload_context_tx { void (*sk_destruct)(struct sock *sk); struct work_struct destruct_work; struct tls_context *ctx; - u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough * driver specific state to justify another layer of indirection */ -#define TLS_DRIVER_STATE_SIZE_TX 16 + u8 driver_state[TLS_DRIVER_STATE_SIZE_TX] __aligned(8); }; -#define TLS_OFFLOAD_CONTEXT_SIZE_TX \ - (sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX) - enum tls_context_flags { /* tls_device_down was called after the netdev went down, device state * was released, and kTLS works in software, even though rx_conf is @@ -303,6 +300,7 @@ struct tls_offload_resync_async { u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX]; }; +#define TLS_DRIVER_STATE_SIZE_RX 8 struct tls_offload_context_rx { /* sw must be the first member of tls_offload_context_rx */ struct tls_sw_context_rx sw; @@ -326,17 +324,13 @@ struct tls_offload_context_rx { struct tls_offload_resync_async *resync_async; }; }; - u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough * driver specific state to justify another layer of indirection */ -#define TLS_DRIVER_STATE_SIZE_RX 8 + u8 driver_state[TLS_DRIVER_STATE_SIZE_RX] __aligned(8); }; -#define TLS_OFFLOAD_CONTEXT_SIZE_RX \ - (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX) - struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, u32 seq, u64 *p_record_sn); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index fe52765beaee..f01543557a60 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1038,7 +1038,7 @@ static struct tls_offload_context_tx *alloc_offload_ctx_tx(struct tls_context *c struct tls_offload_context_tx *offload_ctx; __be64 rcd_sn; - offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL); + offload_ctx = kzalloc(sizeof(*offload_ctx), GFP_KERNEL); if (!offload_ctx) return NULL; @@ -1225,7 +1225,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) goto release_lock; } - context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL); + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) { rc = -ENOMEM; goto release_lock; -- cgit v1.2.3 From cf8b49fbd0418168b55c807b7fb62d7031026183 Mon Sep 17 00:00:00 2001 From: Heng Guo Date: Wed, 11 Oct 2023 09:51:37 +0800 Subject: net: fix IPSTATS_MIB_OUTFORWDATAGRAMS increment after fragment check Reproduce environment: network with 3 VM linuxs is connected as below: VM1<---->VM2(latest kernel 6.5.0-rc7)<---->VM3 VM1: eth0 ip: 192.168.122.207 MTU 1800 VM2: eth0 ip: 192.168.122.208, eth1 ip: 192.168.123.224 MTU 1500 VM3: eth0 ip: 192.168.123.240 MTU 1800 Reproduce: VM1 send 1600 bytes UDP data to VM3 using tools scapy with flags='DF'. scapy command: send(IP(dst="192.168.123.240",flags='DF')/UDP()/str('0'*1600),count=1, inter=1.000000) Result: Before IP data is sent. ---------------------------------------------------------------------- root@qemux86-64:~# cat /proc/net/snmp Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqdss Ip: 1 64 6 0 2 2 0 0 2 4 0 0 0 0 0 0 0 0 0 ...... root@qemux86-64:~# ---------------------------------------------------------------------- After IP data is sent. ---------------------------------------------------------------------- root@qemux86-64:~# cat /proc/net/snmp Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqdss Ip: 1 64 7 0 2 2 0 0 2 5 0 0 0 0 0 0 0 1 0 ...... root@qemux86-64:~# ---------------------------------------------------------------------- ForwDatagrams is always keeping 2 without increment. Issue description and patch: ip_exceeds_mtu() in ip_forward() drops this IP datagram because skb len (1600 sending by scapy) is over MTU(1500 in VM2) if "DF" is set. According to RFC 4293 "3.2.3. IP Statistics Tables", +-------+------>------+----->-----+----->-----+ | InForwDatagrams (6) | OutForwDatagrams (6) | | V +->-+ OutFragReqds | InNoRoutes | | (packets) / (local packet (3) | | | IF is that of the address | +--> OutFragFails | and may not be the receiving IF) | | (packets) the IPSTATS_MIB_OUTFORWDATAGRAMS should be counted before fragment check. The existing implementation, instead, would incease the counter after fragment check: ip_exceeds_mtu() in ipv4 and ip6_pkt_too_big() in ipv6. So do patch to move IPSTATS_MIB_OUTFORWDATAGRAMS counter to ip_forward() for ipv4 and ip6_forward() for ipv6. Test result with patch: Before IP data is sent. ---------------------------------------------------------------------- root@qemux86-64:~# cat /proc/net/snmp Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqdss Ip: 1 64 6 0 2 2 0 0 2 4 0 0 0 0 0 0 0 0 0 ...... root@qemux86-64:~# ---------------------------------------------------------------------- After IP data is sent. ---------------------------------------------------------------------- root@qemux86-64:~# cat /proc/net/snmp Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqdss Ip: 1 64 7 0 2 3 0 0 2 5 0 0 0 0 0 0 0 1 0 ...... root@qemux86-64:~# ---------------------------------------------------------------------- ForwDatagrams is updated from 2 to 3. Reviewed-by: Filip Pudak Signed-off-by: Heng Guo Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20231011015137.27262-1-heng.guo@windriver.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_forward.c | 4 ++-- net/ipv6/ip6_output.c | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 66fac1216d46..8b65f12583eb 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -66,8 +66,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s { struct ip_options *opt = &(IPCB(skb)->opt); - __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); - #ifdef CONFIG_NET_SWITCHDEV if (skb->offload_l3_fwd_mark) { consume_skb(skb); @@ -130,6 +128,8 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; + __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); + IPCB(skb)->flags |= IPSKB_FORWARDED; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); if (ip_exceeds_mtu(skb, mtu)) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cdaa9275e990..a471c7e91761 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -446,10 +446,6 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) static inline int ip6_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - - __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); - #ifdef CONFIG_NET_SWITCHDEV if (skb->offload_l3_fwd_mark) { consume_skb(skb); @@ -617,6 +613,8 @@ int ip6_forward(struct sk_buff *skb) } } + __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + mtu = ip6_dst_mtu_maybe_forward(dst, true); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; -- cgit v1.2.3 From 9c1292eca243821249fa99f40175b0660d9329e3 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 13 Oct 2023 11:57:02 -0700 Subject: net/bpf: Avoid unused "sin_addr_len" warning when CONFIG_CGROUP_BPF is not set It was reported that there is a compiler warning on the unused variable "sin_addr_len" in af_inet.c when CONFIG_CGROUP_BPF is not set. This patch is to address it similar to the ipv6 counterpart in inet6_getname(). It is to "return sin_addr_len;" instead of "return sizeof(*sin);". Fixes: fefba7d1ae19 ("bpf: Propagate modified uaddrlen from cgroup sockaddr programs") Reported-by: Stephen Rothwell Signed-off-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/bpf/20231013185702.3993710-1-martin.lau@linux.dev Closes: https://lore.kernel.org/bpf/20231013114007.2fb09691@canb.auug.org.au/ --- net/ipv4/af_inet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7e27ad37b939..5ce275b2d7ef 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -814,7 +814,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, } release_sock(sk); memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - return sizeof(*sin); + return sin_addr_len; } EXPORT_SYMBOL(inet_getname); -- cgit v1.2.3 From 85605fb694f084ba017c93c150e668882445ce73 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 12 Oct 2023 08:34:43 +0200 Subject: appletalk: remove special handling code for ipddp After commit 1dab47139e61 ("appletalk: remove ipddp driver") removes the config IPDDP, there is some minor code clean-up possible in the appletalk network layer. Remove some code in appletalk layer after the ipddp driver is gone. Signed-off-by: Lukas Bulwahn Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231012063443.22368-1-lukas.bulwahn@gmail.com Signed-off-by: Jakub Kicinski --- net/appletalk/ddp.c | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'net') diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 8978fb6212ff..9ba04a69ec2a 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1284,39 +1284,6 @@ out: return err; } -#if IS_ENABLED(CONFIG_IPDDP) -static __inline__ int is_ip_over_ddp(struct sk_buff *skb) -{ - return skb->data[12] == 22; -} - -static int handle_ip_over_ddp(struct sk_buff *skb) -{ - struct net_device *dev = __dev_get_by_name(&init_net, "ipddp0"); - struct net_device_stats *stats; - - /* This needs to be able to handle ipddp"N" devices */ - if (!dev) { - kfree_skb(skb); - return NET_RX_DROP; - } - - skb->protocol = htons(ETH_P_IP); - skb_pull(skb, 13); - skb->dev = dev; - skb_reset_transport_header(skb); - - stats = netdev_priv(dev); - stats->rx_packets++; - stats->rx_bytes += skb->len + 13; - return netif_rx(skb); /* Send the SKB up to a higher place. */ -} -#else -/* make it easy for gcc to optimize this test out, i.e. kill the code */ -#define is_ip_over_ddp(skb) 0 -#define handle_ip_over_ddp(skb) 0 -#endif - static int atalk_route_packet(struct sk_buff *skb, struct net_device *dev, struct ddpehdr *ddp, __u16 len_hops, int origlen) { @@ -1480,9 +1447,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, return atalk_route_packet(skb, dev, ddp, len_hops, origlen); } - /* if IP over DDP is not selected this code will be optimized out */ - if (is_ip_over_ddp(skb)) - return handle_ip_over_ddp(skb); /* * Which socket - atalk_search_socket() looks for a *full match* * of the tuple. -- cgit v1.2.3 From 0064cfb44084ba98927d8e72340ab78e5887462b Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 10 Oct 2023 22:15:13 +0300 Subject: vsock: set EPOLLERR on non-empty error queue If socket's error queue is not empty, EPOLLERR must be set. Otherwise, reader of error queue won't detect data in it using EPOLLERR bit. Currently for AF_VSOCK this is actual only with MSG_ZEROCOPY, as this feature is the only user of an error queue of the socket. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 013b65241b65..d841f4de33b0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1030,7 +1030,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, poll_wait(file, sk_sleep(sk), wait); mask = 0; - if (sk->sk_err) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) /* Signify that there has been an error on this socket. */ mask |= EPOLLERR; -- cgit v1.2.3 From 49dbe25adac42d3e06f65d1420946bec65896222 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 10 Oct 2023 22:15:14 +0300 Subject: vsock: read from socket's error queue This adds handling of MSG_ERRQUEUE input flag in receive call. This flag is used to read socket's error queue instead of data queue. Possible scenario of error queue usage is receiving completions for transmission with MSG_ZEROCOPY flag. This patch also adds new defines: 'SOL_VSOCK' and 'VSOCK_RECVERR'. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/linux/socket.h | 1 + include/uapi/linux/vm_sockets.h | 17 +++++++++++++++++ net/vmw_vsock/af_vsock.c | 6 ++++++ 3 files changed, 24 insertions(+) (limited to 'net') diff --git a/include/linux/socket.h b/include/linux/socket.h index 39b74d83c7c4..cfcb7e2c3813 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -383,6 +383,7 @@ struct ucred { #define SOL_MPTCP 284 #define SOL_MCTP 285 #define SOL_SMC 286 +#define SOL_VSOCK 287 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index c60ca33eac59..ed07181d4eff 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -191,4 +191,21 @@ struct sockaddr_vm { #define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) +/* MSG_ZEROCOPY notifications are encoded in the standard error format, + * sock_extended_err. See Documentation/networking/msg_zerocopy.rst in + * kernel source tree for more details. + */ + +/* 'cmsg_level' field value of 'struct cmsghdr' for notification parsing + * when MSG_ZEROCOPY flag is used on transmissions. + */ + +#define SOL_VSOCK 287 + +/* 'cmsg_type' field value of 'struct cmsghdr' for notification parsing + * when MSG_ZEROCOPY flag is used on transmissions. + */ + +#define VSOCK_RECVERR 1 + #endif /* _UAPI_VM_SOCKETS_H */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d841f4de33b0..38486efd3d05 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,7 @@ #include #include #include +#include static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); static void vsock_sk_destruct(struct sock *sk); @@ -2137,6 +2139,10 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int err; sk = sock->sk; + + if (unlikely(flags & MSG_ERRQUEUE)) + return sock_recv_errqueue(sk, msg, len, SOL_VSOCK, VSOCK_RECVERR); + vsk = vsock_sk(sk); err = 0; -- cgit v1.2.3 From 5fbfc7d243343917793ae95a6011f03b5aac4735 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 10 Oct 2023 22:15:15 +0300 Subject: vsock: check for MSG_ZEROCOPY support on send This feature totally depends on transport, so if transport doesn't support it, return error. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/net/af_vsock.h | 7 +++++++ net/vmw_vsock/af_vsock.c | 6 ++++++ 2 files changed, 13 insertions(+) (limited to 'net') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index b01cf9ac2437..e302c0e804d0 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -177,6 +177,9 @@ struct vsock_transport { /* Read a single skb */ int (*read_skb)(struct vsock_sock *, skb_read_actor_t); + + /* Zero-copy. */ + bool (*msgzerocopy_allow)(void); }; /**** CORE ****/ @@ -241,4 +244,8 @@ static inline void __init vsock_bpf_build_proto(void) {} #endif +static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t) +{ + return t->msgzerocopy_allow && t->msgzerocopy_allow(); +} #endif /* __AF_VSOCK_H__ */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 38486efd3d05..71108b1f0dfc 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1824,6 +1824,12 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } + if (msg->msg_flags & MSG_ZEROCOPY && + !vsock_msgzerocopy_allow(transport)) { + err = -EOPNOTSUPP; + goto out; + } + /* Wait for room in the produce queue to enqueue our user's data. */ timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); -- cgit v1.2.3 From dcc55d7bb23016e7ae335c8558e1937d7a551b35 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 10 Oct 2023 22:15:16 +0300 Subject: vsock: enable SOCK_SUPPORT_ZC bit This bit is used by io_uring in case of zerocopy tx mode. io_uring code checks, that socket has this feature. This patch sets it in two places: 1) For socket in 'connect()' call. 2) For new socket which is returned by 'accept()' call. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 71108b1f0dfc..37b1c0432941 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1406,6 +1406,9 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, goto out; } + if (vsock_msgzerocopy_allow(transport)) + set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + err = vsock_auto_bind(vsk); if (err) goto out; @@ -1560,6 +1563,9 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, } else { newsock->state = SS_CONNECTED; sock_graft(connected, newsock); + if (vsock_msgzerocopy_allow(vconnected->transport)) + set_bit(SOCK_SUPPORT_ZC, + &connected->sk_socket->flags); } release_sock(connected); -- cgit v1.2.3 From e2fcc326b4986b6f557acb244b5be218cc10951e Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 10 Oct 2023 22:15:18 +0300 Subject: vsock/virtio: support MSG_ZEROCOPY for transport Add 'msgzerocopy_allow()' callback for virtio transport. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 09ba3128e759..d324ae13e2f5 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -486,6 +486,11 @@ static bool virtio_transport_can_msgzerocopy(int bufs_num) return res; } +static bool virtio_transport_msgzerocopy_allow(void) +{ + return true; +} + static bool virtio_transport_seqpacket_allow(u32 remote_cid); static struct virtio_transport virtio_transport = { @@ -519,6 +524,8 @@ static struct virtio_transport virtio_transport = { .seqpacket_allow = virtio_transport_seqpacket_allow, .seqpacket_has_data = virtio_transport_seqpacket_has_data, + .msgzerocopy_allow = virtio_transport_msgzerocopy_allow, + .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, -- cgit v1.2.3 From cfdca3904687d851436076080779c271bc31eb20 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 10 Oct 2023 22:15:19 +0300 Subject: vsock/loopback: support MSG_ZEROCOPY for transport Add 'msgzerocopy_allow()' callback for loopback transport. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/vsock_loopback.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c index 5c6360df1f31..048640167411 100644 --- a/net/vmw_vsock/vsock_loopback.c +++ b/net/vmw_vsock/vsock_loopback.c @@ -47,6 +47,10 @@ static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk) } static bool vsock_loopback_seqpacket_allow(u32 remote_cid); +static bool vsock_loopback_msgzerocopy_allow(void) +{ + return true; +} static struct virtio_transport loopback_transport = { .transport = { @@ -79,6 +83,8 @@ static struct virtio_transport loopback_transport = { .seqpacket_allow = vsock_loopback_seqpacket_allow, .seqpacket_has_data = virtio_transport_seqpacket_has_data, + .msgzerocopy_allow = vsock_loopback_msgzerocopy_allow, + .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, -- cgit v1.2.3 From e0718bd82e27d85086ada18e7f04847ee84b710a Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 10 Oct 2023 22:15:20 +0300 Subject: vsock: enable setting SO_ZEROCOPY For AF_VSOCK, zerocopy tx mode depends on transport, so this option must be set in AF_VSOCK implementation where transport is accessible (if transport is not set during setting SO_ZEROCOPY: for example socket is not connected, then SO_ZEROCOPY will be enabled, but once transport will be assigned, support of this type of transmission will be checked). To handle SO_ZEROCOPY, AF_VSOCK implementation uses SOCK_CUSTOM_SOCKOPT bit, thus handling SOL_SOCKET option operations, but all of them except SO_ZEROCOPY will be forwarded to the generic handler by calling 'sock_setsockopt()'. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 37b1c0432941..816725af281f 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1406,8 +1406,16 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, goto out; } - if (vsock_msgzerocopy_allow(transport)) + if (vsock_msgzerocopy_allow(transport)) { set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + } else if (sock_flag(sk, SOCK_ZEROCOPY)) { + /* If this option was set before 'connect()', + * when transport was unknown, check that this + * feature is supported here. + */ + err = -EOPNOTSUPP; + goto out; + } err = vsock_auto_bind(vsk); if (err) @@ -1643,7 +1651,7 @@ static int vsock_connectible_setsockopt(struct socket *sock, const struct vsock_transport *transport; u64 val; - if (level != AF_VSOCK) + if (level != AF_VSOCK && level != SOL_SOCKET) return -ENOPROTOOPT; #define COPY_IN(_v) \ @@ -1666,6 +1674,33 @@ static int vsock_connectible_setsockopt(struct socket *sock, transport = vsk->transport; + if (level == SOL_SOCKET) { + int zerocopy; + + if (optname != SO_ZEROCOPY) { + release_sock(sk); + return sock_setsockopt(sock, level, optname, optval, optlen); + } + + /* Use 'int' type here, because variable to + * set this option usually has this type. + */ + COPY_IN(zerocopy); + + if (zerocopy < 0 || zerocopy > 1) { + err = -EINVAL; + goto exit; + } + + if (transport && !vsock_msgzerocopy_allow(transport)) { + err = -EOPNOTSUPP; + goto exit; + } + + sock_valbool_flag(sk, SOCK_ZEROCOPY, zerocopy); + goto exit; + } + switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: COPY_IN(val); @@ -2322,6 +2357,12 @@ static int vsock_create(struct net *net, struct socket *sock, } } + /* SOCK_DGRAM doesn't have 'setsockopt' callback set in its + * proto_ops, so there is no handler for custom logic. + */ + if (sock_type_connectible(sock->type)) + set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); + vsock_insert_unbound(vsk); return 0; -- cgit v1.2.3 From bf3fcbf7e7a08015d3b169bad6281b29d45c272d Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Mon, 16 Oct 2023 09:15:20 +0200 Subject: ipv4: rename and move ip_route_output_tunnel() At the moment ip_route_output_tunnel() is used only by bareudp. Ideally, other UDP tunnel implementations should use it, but to do so the function needs to accept new parameters that are specific for UDP tunnels, such as the ports. Prepare for these changes by renaming the function to udp_tunnel_dst_lookup() and move it to file net/ipv4/udp_tunnel_core.c. Suggested-by: Guillaume Nault Signed-off-by: Beniamino Galvani Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bareudp.c | 8 ++++---- include/net/route.h | 6 ------ include/net/udp_tunnel.h | 6 ++++++ net/ipv4/route.c | 48 ---------------------------------------------- net/ipv4/udp_tunnel_core.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 58 deletions(-) (limited to 'net') diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 683203f87ae2..63fc32fa1af5 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -306,8 +306,8 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (!sock) return -ESHUTDOWN; - rt = ip_route_output_tunnel(skb, dev, bareudp->net, &saddr, info, - IPPROTO_UDP, use_cache); + rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, &saddr, info, + IPPROTO_UDP, use_cache); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -483,8 +483,8 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, struct rtable *rt; __be32 saddr; - rt = ip_route_output_tunnel(skb, dev, bareudp->net, &saddr, - info, IPPROTO_UDP, use_cache); + rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, &saddr, + info, IPPROTO_UDP, use_cache); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/include/net/route.h b/include/net/route.h index 5c248a8e3d0e..980ab474eabd 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -136,12 +136,6 @@ static inline struct rtable *__ip_route_output_key(struct net *net, struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, const struct sock *sk); -struct rtable *ip_route_output_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, __be32 *saddr, - const struct ip_tunnel_info *info, - u8 protocol, bool use_cache); - struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 21ba0a25f936..11e810ca5088 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -162,6 +162,12 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, void udp_tunnel_sock_release(struct socket *sock); +struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, __be32 *saddr, + const struct ip_tunnel_info *info, + u8 protocol, bool use_cache); + struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, int md_size); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e2bf4602b559..3290a4442b4a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2885,54 +2885,6 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); -struct rtable *ip_route_output_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, __be32 *saddr, - const struct ip_tunnel_info *info, - u8 protocol, bool use_cache) -{ -#ifdef CONFIG_DST_CACHE - struct dst_cache *dst_cache; -#endif - struct rtable *rt = NULL; - struct flowi4 fl4; - __u8 tos; - -#ifdef CONFIG_DST_CACHE - dst_cache = (struct dst_cache *)&info->dst_cache; - if (use_cache) { - rt = dst_cache_get_ip4(dst_cache, saddr); - if (rt) - return rt; - } -#endif - memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_mark = skb->mark; - fl4.flowi4_proto = protocol; - fl4.daddr = info->key.u.ipv4.dst; - fl4.saddr = info->key.u.ipv4.src; - tos = info->key.tos; - fl4.flowi4_tos = RT_TOS(tos); - - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { - netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); - return ERR_PTR(-ENETUNREACH); - } - if (rt->dst.dev == dev) { /* is this necessary? */ - netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); - ip_rt_put(rt); - return ERR_PTR(-ELOOP); - } -#ifdef CONFIG_DST_CACHE - if (use_cache) - dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); -#endif - *saddr = fl4.saddr; - return rt; -} -EXPORT_SYMBOL_GPL(ip_route_output_tunnel); - /* called with rcu_read_lock held */ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, struct rtable *rt, u32 table_id, struct flowi4 *fl4, diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 1e7e4aecdc48..96f93f92b6ce 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -204,4 +204,52 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, } EXPORT_SYMBOL_GPL(udp_tun_rx_dst); +struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, __be32 *saddr, + const struct ip_tunnel_info *info, + u8 protocol, bool use_cache) +{ +#ifdef CONFIG_DST_CACHE + struct dst_cache *dst_cache; +#endif + struct rtable *rt = NULL; + struct flowi4 fl4; + __u8 tos; + +#ifdef CONFIG_DST_CACHE + dst_cache = (struct dst_cache *)&info->dst_cache; + if (use_cache) { + rt = dst_cache_get_ip4(dst_cache, saddr); + if (rt) + return rt; + } +#endif + memset(&fl4, 0, sizeof(fl4)); + fl4.flowi4_mark = skb->mark; + fl4.flowi4_proto = protocol; + fl4.daddr = info->key.u.ipv4.dst; + fl4.saddr = info->key.u.ipv4.src; + tos = info->key.tos; + fl4.flowi4_tos = RT_TOS(tos); + + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); + return ERR_PTR(-ENETUNREACH); + } + if (rt->dst.dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); + ip_rt_put(rt); + return ERR_PTR(-ELOOP); + } +#ifdef CONFIG_DST_CACHE + if (use_cache) + dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); +#endif + *saddr = fl4.saddr; + return rt; +} +EXPORT_SYMBOL_GPL(udp_tunnel_dst_lookup); + MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 78f3655adcb52412275f282267ee771421731632 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Mon, 16 Oct 2023 09:15:21 +0200 Subject: ipv4: remove "proto" argument from udp_tunnel_dst_lookup() The function is now UDP-specific, the protocol is always IPPROTO_UDP. Suggested-by: Guillaume Nault Signed-off-by: Beniamino Galvani Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bareudp.c | 4 ++-- include/net/udp_tunnel.h | 2 +- net/ipv4/udp_tunnel_core.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 63fc32fa1af5..6af67cac6bde 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -307,7 +307,7 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, return -ESHUTDOWN; rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, &saddr, info, - IPPROTO_UDP, use_cache); + use_cache); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -484,7 +484,7 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, __be32 saddr; rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, &saddr, - info, IPPROTO_UDP, use_cache); + info, use_cache); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 11e810ca5088..8f110dbd3784 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -166,7 +166,7 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, struct net_device *dev, struct net *net, __be32 *saddr, const struct ip_tunnel_info *info, - u8 protocol, bool use_cache); + bool use_cache); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 96f93f92b6ce..9b0cfd72d5fd 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -208,7 +208,7 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, struct net_device *dev, struct net *net, __be32 *saddr, const struct ip_tunnel_info *info, - u8 protocol, bool use_cache) + bool use_cache) { #ifdef CONFIG_DST_CACHE struct dst_cache *dst_cache; @@ -227,7 +227,7 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, #endif memset(&fl4, 0, sizeof(fl4)); fl4.flowi4_mark = skb->mark; - fl4.flowi4_proto = protocol; + fl4.flowi4_proto = IPPROTO_UDP; fl4.daddr = info->key.u.ipv4.dst; fl4.saddr = info->key.u.ipv4.src; tos = info->key.tos; -- cgit v1.2.3 From 72fc68c6356b663a8763f02d9b0ec773d59a4949 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Mon, 16 Oct 2023 09:15:22 +0200 Subject: ipv4: add new arguments to udp_tunnel_dst_lookup() We want to make the function more generic so that it can be used by other UDP tunnel implementations such as geneve and vxlan. To do that, add the following arguments: - source and destination UDP port; - ifindex of the output interface, needed by vxlan; - the tos, because in some cases it is not taken from struct ip_tunnel_info (for example, when it's inherited from the inner packet); - the dst cache, because not all tunnel types (e.g. vxlan) want to use the one from struct ip_tunnel_info. With these parameters, the function no longer needs the full struct ip_tunnel_info as argument and we can pass only the relevant part of it (struct ip_tunnel_key). Suggested-by: Guillaume Nault Signed-off-by: Beniamino Galvani Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bareudp.c | 11 +++++++---- include/net/udp_tunnel.h | 8 +++++--- net/ipv4/udp_tunnel_core.c | 26 +++++++++++++------------- 3 files changed, 25 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 6af67cac6bde..47a9c2a5583c 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -306,8 +306,10 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (!sock) return -ESHUTDOWN; - rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, &saddr, info, - use_cache); + rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, 0, &saddr, &info->key, + 0, 0, key->tos, + use_cache ? + (struct dst_cache *)&info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -483,8 +485,9 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, struct rtable *rt; __be32 saddr; - rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, &saddr, - info, use_cache); + rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, 0, &saddr, + &info->key, 0, 0, info->key.tos, + use_cache ? &info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 8f110dbd3784..4d0578fab01a 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -164,9 +164,11 @@ void udp_tunnel_sock_release(struct socket *sock); struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, struct net_device *dev, - struct net *net, __be32 *saddr, - const struct ip_tunnel_info *info, - bool use_cache); + struct net *net, int oif, + __be32 *saddr, + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 tos, + struct dst_cache *dst_cache); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 9b0cfd72d5fd..494685e82856 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -206,31 +206,31 @@ EXPORT_SYMBOL_GPL(udp_tun_rx_dst); struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, struct net_device *dev, - struct net *net, __be32 *saddr, - const struct ip_tunnel_info *info, - bool use_cache) + struct net *net, int oif, + __be32 *saddr, + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 tos, + struct dst_cache *dst_cache) { -#ifdef CONFIG_DST_CACHE - struct dst_cache *dst_cache; -#endif struct rtable *rt = NULL; struct flowi4 fl4; - __u8 tos; #ifdef CONFIG_DST_CACHE - dst_cache = (struct dst_cache *)&info->dst_cache; - if (use_cache) { + if (dst_cache) { rt = dst_cache_get_ip4(dst_cache, saddr); if (rt) return rt; } #endif + memset(&fl4, 0, sizeof(fl4)); fl4.flowi4_mark = skb->mark; fl4.flowi4_proto = IPPROTO_UDP; - fl4.daddr = info->key.u.ipv4.dst; - fl4.saddr = info->key.u.ipv4.src; - tos = info->key.tos; + fl4.flowi4_oif = oif; + fl4.daddr = key->u.ipv4.dst; + fl4.saddr = key->u.ipv4.src; + fl4.fl4_dport = dport; + fl4.fl4_sport = sport; fl4.flowi4_tos = RT_TOS(tos); rt = ip_route_output_key(net, &fl4); @@ -244,7 +244,7 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, return ERR_PTR(-ELOOP); } #ifdef CONFIG_DST_CACHE - if (use_cache) + if (dst_cache) dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); #endif *saddr = fl4.saddr; -- cgit v1.2.3 From 3ae983a603a49e6c80763f747a170b7e987531f3 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Mon, 16 Oct 2023 09:15:23 +0200 Subject: ipv4: use tunnel flow flags for tunnel route lookups Commit 451ef36bd229 ("ip_tunnels: Add new flow flags field to ip_tunnel_key") added a new field to struct ip_tunnel_key to control route lookups. Currently the flag is used by vxlan and geneve tunnels; use it also in udp_tunnel_dst_lookup() so that it affects all tunnel types relying on this function. Signed-off-by: Beniamino Galvani Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/udp_tunnel_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 494685e82856..a87defb2b167 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -232,6 +232,7 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, fl4.fl4_dport = dport; fl4.fl4_sport = sport; fl4.flowi4_tos = RT_TOS(tos); + fl4.flowi4_flags = key->flow_flags; rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { -- cgit v1.2.3 From 54a59aed395ce0f4177b5212e5746a6462de3ad9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 9 Oct 2023 11:26:54 +0200 Subject: net, sched: Make tc-related drop reason more flexible Currently, the kfree_skb_reason() in sch_handle_{ingress,egress}() can only express a basic SKB_DROP_REASON_TC_INGRESS or SKB_DROP_REASON_TC_EGRESS reason. Victor kicked-off an initial proposal to make this more flexible by disambiguating verdict from return code by moving the verdict into struct tcf_result and letting tcf_classify() return a negative error. If hit, then two new drop reasons were added in the proposal, that is SKB_DROP_REASON_TC_INGRESS_ERROR as well as SKB_DROP_REASON_TC_EGRESS_ERROR. Further analysis of the actual error codes would have required to attach to tcf_classify via kprobe/kretprobe to more deeply debug skb and the returned error. In order to make the kfree_skb_reason() in sch_handle_{ingress,egress}() more extensible, it can be addressed in a more straight forward way, that is: Instead of placing the verdict into struct tcf_result, we can just put the drop reason in there, which does not require changes throughout various classful schedulers given the existing verdict logic can stay as is. Then, SKB_DROP_REASON_TC_ERROR{,_*} can be added to the enum skb_drop_reason to disambiguate between an error or an intentional drop. New drop reason error codes can be added successively to the tc code base. For internal error locations which have not yet been annotated with a SKB_DROP_REASON_TC_ERROR{,_*}, the fallback is SKB_DROP_REASON_TC_INGRESS and SKB_DROP_REASON_TC_EGRESS, respectively. Generic errors could be marked with a SKB_DROP_REASON_TC_ERROR code until they are converted to more specific ones if it is found that they would be useful for troubleshooting. While drop reasons have infrastructure for subsystem specific error codes which are currently used by mac80211 and ovs, Jakub mentioned that it is preferred for tc to use the enum skb_drop_reason core codes given it is a better fit and currently the tooling support is better, too. With regards to the latter: [...] I think Alastair (bpftrace) is working on auto-prettifying enums when bpftrace outputs maps. So we can do something like: $ bpftrace -e 'tracepoint:skb:kfree_skb { @[args->reason] = count(); }' Attaching 1 probe... ^C @[SKB_DROP_REASON_TC_INGRESS]: 2 @[SKB_CONSUMED]: 34 ^^^^^^^^^^^^ names!! Auto-magically. [...] Add a small helper tcf_set_drop_reason() which can be used to set the drop reason into the tcf_result. Signed-off-by: Daniel Borkmann Cc: Jamal Hadi Salim Cc: Victor Nogueira Link: https://lore.kernel.org/netdev/20231006063233.74345d36@kernel.org Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20231009092655.22025-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- include/net/pkt_cls.h | 6 ++++++ include/net/sch_generic.h | 3 +-- net/core/dev.c | 15 ++++++++++----- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f308e8268651..a76c9171db0e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -154,6 +154,12 @@ __cls_set_class(unsigned long *clp, unsigned long cl) return xchg(clp, cl); } +static inline void tcf_set_drop_reason(struct tcf_result *res, + enum skb_drop_reason reason) +{ + res->drop_reason = reason; +} + static inline void __tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c7318c73cfd6..dcb9160e6467 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -324,7 +324,6 @@ struct Qdisc_ops { struct module *owner; }; - struct tcf_result { union { struct { @@ -332,8 +331,8 @@ struct tcf_result { u32 classid; }; const struct tcf_proto *goto_tp; - }; + enum skb_drop_reason drop_reason; }; struct tcf_chain; diff --git a/net/core/dev.c b/net/core/dev.c index 3ca746a5f0ad..97e7b9833db9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3914,7 +3914,8 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); #endif /* CONFIG_NET_EGRESS */ #ifdef CONFIG_NET_XGRESS -static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) +static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, + enum skb_drop_reason *drop_reason) { int ret = TC_ACT_UNSPEC; #ifdef CONFIG_NET_CLS_ACT @@ -3926,12 +3927,14 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->post_ct = false; + res.drop_reason = *drop_reason; mini_qdisc_bstats_cpu_update(miniq, skb); ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false); /* Only tcf related quirks below. */ switch (ret) { case TC_ACT_SHOT: + *drop_reason = res.drop_reason; mini_qdisc_qstats_cpu_drop(miniq); break; case TC_ACT_OK: @@ -3981,6 +3984,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev, bool *another) { struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress); + enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS; int sch_ret; if (!entry) @@ -3998,7 +4002,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, if (sch_ret != TC_ACT_UNSPEC) goto ingress_verdict; } - sch_ret = tc_run(tcx_entry(entry), skb); + sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason); ingress_verdict: switch (sch_ret) { case TC_ACT_REDIRECT: @@ -4015,7 +4019,7 @@ ingress_verdict: *ret = NET_RX_SUCCESS; return NULL; case TC_ACT_SHOT: - kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS); + kfree_skb_reason(skb, drop_reason); *ret = NET_RX_DROP; return NULL; /* used by tc_run */ @@ -4036,6 +4040,7 @@ static __always_inline struct sk_buff * sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress); + enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS; int sch_ret; if (!entry) @@ -4049,7 +4054,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) if (sch_ret != TC_ACT_UNSPEC) goto egress_verdict; } - sch_ret = tc_run(tcx_entry(entry), skb); + sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason); egress_verdict: switch (sch_ret) { case TC_ACT_REDIRECT: @@ -4058,7 +4063,7 @@ egress_verdict: *ret = NET_XMIT_SUCCESS; return NULL; case TC_ACT_SHOT: - kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS); + kfree_skb_reason(skb, drop_reason); *ret = NET_XMIT_DROP; return NULL; /* used by tc_run */ -- cgit v1.2.3 From 39d08b91646d83e87f7cbcd846b3ef33b1a53b79 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 9 Oct 2023 11:26:55 +0200 Subject: net, sched: Add tcf_set_drop_reason for {__,}tcf_classify Add an initial user for the newly added tcf_set_drop_reason() helper to set the drop reason for internal errors leading to TC_ACT_SHOT inside {__,}tcf_classify(). Right now this only adds a very basic SKB_DROP_REASON_TC_ERROR as a generic fallback indicator to mark drop locations. Where needed, such locations can be converted to more specific codes, for example, when hitting the reclassification limit, etc. Signed-off-by: Daniel Borkmann Cc: Jamal Hadi Salim Cc: Victor Nogueira Link: https://lore.kernel.org/r/20231009092655.22025-2-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 3 +++ net/sched/cls_api.c | 26 ++++++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index a587e83fc169..845dce805de7 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -80,6 +80,7 @@ FN(IPV6_NDISC_BAD_OPTIONS) \ FN(IPV6_NDISC_NS_OTHERHOST) \ FN(QUEUE_PURGE) \ + FN(TC_ERROR) \ FNe(MAX) /** @@ -345,6 +346,8 @@ enum skb_drop_reason { SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST, /** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */ SKB_DROP_REASON_QUEUE_PURGE, + /** @SKB_DROP_REASON_TC_ERROR: generic internal tc error. */ + SKB_DROP_REASON_TC_ERROR, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a193cc7b3241..1daeb2182b70 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1681,12 +1681,16 @@ reclassify: * time we got here with a cookie from hardware. */ if (unlikely(n->tp != tp || n->tp->chain != n->chain || - !tp->ops->get_exts)) + !tp->ops->get_exts)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } exts = tp->ops->get_exts(tp, n->handle); - if (unlikely(!exts || n->exts != exts)) + if (unlikely(!exts || n->exts != exts)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } n = NULL; err = tcf_exts_exec_ex(skb, exts, act_index, res); @@ -1712,8 +1716,10 @@ reclassify: return err; } - if (unlikely(n)) + if (unlikely(n)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } return TC_ACT_UNSPEC; /* signal: continue lookup */ #ifdef CONFIG_NET_CLS_ACT @@ -1723,6 +1729,7 @@ reset: tp->chain->block->index, tp->prio & 0xffff, ntohs(tp->protocol)); + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; } @@ -1759,8 +1766,10 @@ int tcf_classify(struct sk_buff *skb, if (ext->act_miss) { n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie, &act_index); - if (!n) + if (!n) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } chain = n->chain_index; } else { @@ -1768,8 +1777,10 @@ int tcf_classify(struct sk_buff *skb, } fchain = tcf_chain_lookup_rcu(block, chain); - if (!fchain) + if (!fchain) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } /* Consume, so cloned/redirect skbs won't inherit ext */ skb_ext_del(skb, TC_SKB_EXT); @@ -1788,8 +1799,11 @@ int tcf_classify(struct sk_buff *skb, struct tc_skb_cb *cb = tc_skb_cb(skb); ext = tc_skb_ext_alloc(skb); - if (WARN_ON_ONCE(!ext)) + if (WARN_ON_ONCE(!ext)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } + ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; -- cgit v1.2.3 From 562b1fdf061bff9394ccd884456ed1173c224fdc Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 11 Oct 2023 13:30:44 -0700 Subject: tcp: Set pingpong threshold via sysctl TCP pingpong threshold is 1 by default. But some applications, like SQL DB may prefer a higher pingpong threshold to activate delayed acks in quick ack mode for better performance. The pingpong threshold and related code were changed to 3 in the year 2019 in: commit 4a41f453bedf ("tcp: change pingpong threshold to 3") And reverted to 1 in the year 2022 in: commit 4d8f24eeedc5 ("Revert "tcp: change pingpong threshold to 3"") There is no single value that fits all applications. Add net.ipv4.tcp_pingpong_thresh sysctl tunable, so it can be tuned for optimal performance based on the application needs. Signed-off-by: Haiyang Zhang Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Acked-by: Neal Cardwell Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/1697056244-21888-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 13 +++++++++++++ include/net/inet_connection_sock.h | 16 ++++++++++++---- include/net/netns/ipv4.h | 2 ++ net/ipv4/sysctl_net_ipv4.c | 8 ++++++++ net/ipv4/tcp_ipv4.c | 2 ++ net/ipv4/tcp_output.c | 4 ++-- 6 files changed, 39 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index f7dfde3b09a9..e7ec9026e5db 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1183,6 +1183,19 @@ tcp_plb_cong_thresh - INTEGER Default: 128 +tcp_pingpong_thresh - INTEGER + The number of estimated data replies sent for estimated incoming data + requests that must happen before TCP considers that a connection is a + "ping-pong" (request-response) connection for which delayed + acknowledgments can provide benefits. + + This threshold is 1 by default, but some applications may need a higher + threshold for optimal performance. + + Possible Values: 1 - 255 + + Default: 1 + UDP variables ============= diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index d6d9d1c1985a..086d1193c9ef 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -328,11 +328,10 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); -#define TCP_PINGPONG_THRESH 1 - static inline void inet_csk_enter_pingpong_mode(struct sock *sk) { - inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH; + inet_csk(sk)->icsk_ack.pingpong = + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh); } static inline void inet_csk_exit_pingpong_mode(struct sock *sk) @@ -342,7 +341,16 @@ static inline void inet_csk_exit_pingpong_mode(struct sock *sk) static inline bool inet_csk_in_pingpong_mode(struct sock *sk) { - return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; + return inet_csk(sk)->icsk_ack.pingpong >= + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh); +} + +static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ack.pingpong < U8_MAX) + icsk->icsk_ack.pingpong++; } static inline bool inet_csk_has_ulp(const struct sock *sk) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d96d05b08819..73f43f699199 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -133,6 +133,8 @@ struct netns_ipv4 { u8 sysctl_tcp_migrate_req; u8 sysctl_tcp_comp_sack_nr; u8 sysctl_tcp_backlog_ack_defer; + u8 sysctl_tcp_pingpong_thresh; + int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e7f024d93572..f63a545a7374 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1498,6 +1498,14 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "tcp_pingpong_thresh", + .data = &init_net.ipv4.sysctl_tcp_pingpong_thresh, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ONE, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a441740616d7..f603ad9307af 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3288,6 +3288,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_syn_linear_timeouts = 4; net->ipv4.sysctl_tcp_shrink_window = 0; + net->ipv4.sysctl_tcp_pingpong_thresh = 1; + return 0; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d5961a82a9e8..8e6ebf35ed58 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -170,10 +170,10 @@ static void tcp_event_data_sent(struct tcp_sock *tp, tp->lsndtime = now; /* If it is a reply for ato after last received - * packet, enter pingpong mode. + * packet, increase pingpong count. */ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - inet_csk_enter_pingpong_mode(sk); + inet_csk_inc_pingpong_cnt(sk); } /* Account for an ACK we sent. */ -- cgit v1.2.3 From c60991f8e187eb73dbea2375c08ccba8f544bd49 Mon Sep 17 00:00:00 2001 From: Liansen Zhai Date: Thu, 12 Oct 2023 17:03:30 +0800 Subject: cgroup, netclassid: on modifying netclassid in cgroup, only consider the main process. When modifying netclassid, the command("echo 0x100001 > net_cls.classid") will take more time on many threads of one process, because the process create many fds. for example, one process exists 28000 fds and 60000 threads, echo command will task 45 seconds. Now, we only consider the main process when exec "iterate_fd", and the time is about 52 milliseconds. Signed-off-by: Liansen Zhai Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231012090330.29636-1-zhailiansen@kuaishou.com Signed-off-by: Jakub Kicinski --- net/core/netclassid_cgroup.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index d6a70aeaa503..d22f0919821e 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -88,6 +88,12 @@ static void update_classid_task(struct task_struct *p, u32 classid) }; unsigned int fd = 0; + /* Only update the leader task, when many threads in this task, + * so it can avoid the useless traversal. + */ + if (p != p->group_leader) + return; + do { task_lock(p); fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); -- cgit v1.2.3 From 90de47f020db086f7929e09f64efd0cf627d6869 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 13 Oct 2023 14:48:21 +0800 Subject: page_pool: fragment API support for 32-bit arch with 64-bit DMA Currently page_pool_alloc_frag() is not supported in 32-bit arch with 64-bit DMA because of the overlap issue between pp_frag_count and dma_addr_upper in 'struct page' for those arches, which seems to be quite common, see [1], which means driver may need to handle it when using fragment API. It is assumed that the combination of the above arch with an address space >16TB does not exist, as all those arches have 64b equivalent, it seems logical to use the 64b version for a system with a large address space. It is also assumed that dma address is page aligned when we are dma mapping a page aligned buffer, see [2]. That means we're storing 12 bits of 0 at the lower end for a dma address, we can reuse those bits for the above arches to support 32b+12b, which is 16TB of memory. If we make a wrong assumption, a warning is emitted so that user can report to us. 1. https://lore.kernel.org/all/20211117075652.58299-1-linyunsheng@huawei.com/ 2. https://lore.kernel.org/all/20230818145145.4b357c89@kernel.org/ Tested-by: Alexander Lobakin Signed-off-by: Yunsheng Lin CC: Lorenzo Bianconi CC: Alexander Duyck CC: Liang Chen CC: Guillaume Tucker CC: Matthew Wilcox CC: Linux-MM Link: https://lore.kernel.org/r/20231013064827.61135-2-linyunsheng@huawei.com Signed-off-by: Jakub Kicinski --- include/linux/mm_types.h | 13 +------------ include/net/page_pool/helpers.h | 20 ++++++++++++++------ net/core/page_pool.c | 14 +++++++++----- 3 files changed, 24 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 36c5b43999e6..74b49c4c7a52 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -125,18 +125,7 @@ struct page { struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; - union { - /** - * dma_addr_upper: might require a 64-bit - * value on 32-bit architectures. - */ - unsigned long dma_addr_upper; - /** - * For frag page support, not supported in - * 32-bit architectures with 64-bit DMA. - */ - atomic_long_t pp_frag_count; - }; + atomic_long_t pp_frag_count; }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 8e7751464ff5..8f64adf86f5b 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -197,7 +197,7 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } -#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ +#define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \ (sizeof(dma_addr_t) > sizeof(unsigned long)) /** @@ -211,17 +211,25 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { dma_addr_t ret = page->dma_addr; - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) - ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) + ret <<= PAGE_SHIFT; return ret; } -static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) { + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { + page->dma_addr = addr >> PAGE_SHIFT; + + /* We assume page alignment to shave off bottom bits, + * if this "compression" doesn't work we need to drop. + */ + return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT; + } + page->dma_addr = addr; - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) - page->dma_addr_upper = upper_32_bits(addr); + return false; } static inline bool page_pool_put(struct page_pool *pool) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 77cb75e63aca..8a9868ea5067 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -211,10 +211,6 @@ static int page_pool_init(struct page_pool *pool, */ } - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT && - pool->p.flags & PP_FLAG_PAGE_FRAG) - return -EINVAL; - #ifdef CONFIG_PAGE_POOL_STATS pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); if (!pool->recycle_stats) @@ -359,12 +355,20 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) if (dma_mapping_error(pool->p.dev, dma)) return false; - page_pool_set_dma_addr(page, dma); + if (page_pool_set_dma_addr(page, dma)) + goto unmap_failed; if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) page_pool_dma_sync_for_device(pool, page, pool->p.max_len); return true; + +unmap_failed: + WARN_ON_ONCE("unexpected DMA address, please report to netdev@"); + dma_unmap_page_attrs(pool->p.dev, dma, + PAGE_SIZE << pool->p.order, pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + return false; } static void page_pool_set_pp_info(struct page_pool *pool, -- cgit v1.2.3 From 1b2d3b45c1941453703d70f46b70ab8985303b5d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 Oct 2023 14:02:37 +0200 Subject: net: gso_test: release each segment individually consume_skb() doesn't walk the segment list, so segments other than the first are leaked. Move this skb_consume call into the loop. Cc: Willem de Bruijn Fixes: b3098d32ed6e ("net: add skb_segment kunit test") Signed-off-by: Florian Westphal Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/gso_test.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/gso_test.c b/net/core/gso_test.c index c4b13de6abfb..ceb684be4cbf 100644 --- a/net/core/gso_test.c +++ b/net/core/gso_test.c @@ -144,8 +144,8 @@ KUNIT_ARRAY_PARAM(gso_test, cases, gso_test_case_to_desc); static void gso_test_func(struct kunit *test) { const int shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + struct sk_buff *skb, *segs, *cur, *next, *last; const struct gso_test_case *tcase; - struct sk_buff *skb, *segs, *cur; netdev_features_t features; struct page *page; int i; @@ -236,7 +236,10 @@ static void gso_test_func(struct kunit *test) goto free_gso_skb; } - for (cur = segs, i = 0; cur; cur = cur->next, i++) { + last = segs->prev; + for (cur = segs, i = 0; cur; cur = next, i++) { + next = cur->next; + KUNIT_ASSERT_EQ(test, cur->len, sizeof(hdr) + tcase->segs[i]); /* segs have skb->data pointing to the mac header */ @@ -247,13 +250,14 @@ static void gso_test_func(struct kunit *test) KUNIT_ASSERT_EQ(test, memcmp(skb_mac_header(cur), hdr, sizeof(hdr)), 0); /* last seg can be found through segs->prev pointer */ - if (!cur->next) - KUNIT_ASSERT_PTR_EQ(test, cur, segs->prev); + if (!next) + KUNIT_ASSERT_PTR_EQ(test, cur, last); + + consume_skb(cur); } KUNIT_ASSERT_EQ(test, i, tcase->nr_segs); - consume_skb(segs); free_gso_skb: consume_skb(skb); } -- cgit v1.2.3 From df3bf90fef281c630ef06a3d03efb9fe56c8a0fb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 14 Oct 2023 08:34:52 +0200 Subject: net: openvswitch: Use struct_size() Use struct_size() instead of hand writing it. This is less verbose and more robust. Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/e5122b4ff878cbf3ed72653a395ad5c4da04dc1e.1697264974.git.christophe.jaillet@wanadoo.fr Signed-off-by: Paolo Abeni --- net/openvswitch/flow_table.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 4f3b1798e0b2..d108ae0bd0ee 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -220,16 +220,13 @@ static struct mask_array *tbl_mask_array_alloc(int size) struct mask_array *new; size = max(MASK_ARRAY_SIZE_MIN, size); - new = kzalloc(sizeof(struct mask_array) + - sizeof(struct sw_flow_mask *) * size + + new = kzalloc(struct_size(new, masks, size) + sizeof(u64) * size, GFP_KERNEL); if (!new) return NULL; new->masks_usage_zero_cntr = (u64 *)((u8 *)new + - sizeof(struct mask_array) + - sizeof(struct sw_flow_mask *) * - size); + struct_size(new, masks, size)); new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) + sizeof(u64) * size, -- cgit v1.2.3 From 7713ec844756a9883ba9a91381369256275de4fb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 14 Oct 2023 08:34:53 +0200 Subject: net: openvswitch: Annotate struct mask_array with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). Signed-off-by: Christophe JAILLET Reviewed-by: Kees Cook Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ca5c8049f58bb933f231afd0816e30a5aaa0eddd.1697264974.git.christophe.jaillet@wanadoo.fr Signed-off-by: Paolo Abeni --- net/openvswitch/flow_table.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 9e659db78c05..f524dc3e4862 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -48,7 +48,7 @@ struct mask_array { int count, max; struct mask_array_stats __percpu *masks_usage_stats; u64 *masks_usage_zero_cntr; - struct sw_flow_mask __rcu *masks[]; + struct sw_flow_mask __rcu *masks[] __counted_by(max); }; struct table_instance { -- cgit v1.2.3 From 9a675ba55a96a45a9fb69e6a5c43f80c6682e541 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 16 Oct 2023 14:57:38 +0200 Subject: net, bpf: Add a warning if NAPI cb missed xdp_do_flush(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few drivers were missing a xdp_do_flush() invocation after XDP_REDIRECT. Add three helper functions each for one of the per-CPU lists. Return true if the per-CPU list is non-empty and flush the list. Add xdp_do_check_flushed() which invokes each helper functions and creates a warning if one of the functions had a non-empty list. Hide everything behind CONFIG_DEBUG_NET. Suggested-by: Jesper Dangaard Brouer Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Daniel Borkmann Reviewed-by: Toke Høiland-Jørgensen Acked-by: Jakub Kicinski Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20231016125738.Yt79p1uF@linutronix.de --- include/linux/bpf.h | 3 +++ include/net/xdp_sock.h | 9 +++++++++ kernel/bpf/cpumap.c | 10 ++++++++++ kernel/bpf/devmap.c | 10 ++++++++++ net/core/dev.c | 2 ++ net/core/dev.h | 6 ++++++ net/core/filter.c | 16 ++++++++++++++++ net/xdp/xsk.c | 10 ++++++++++ 8 files changed, 66 insertions(+) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d3c51a507508..b4b40b45962b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2478,6 +2478,9 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size); void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); + +bool dev_check_flush(void); +bool cpu_map_check_flush(void); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 69b472604b86..7dd0df2f6f8e 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -109,4 +109,13 @@ static inline void __xsk_map_flush(void) #endif /* CONFIG_XDP_SOCKETS */ +#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET) +bool xsk_map_check_flush(void); +#else +static inline bool xsk_map_check_flush(void) +{ + return false; +} +#endif + #endif /* _LINUX_XDP_SOCK_H */ diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index e42a1bdb7f53..8a0bb80fe48a 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -764,6 +764,16 @@ void __cpu_map_flush(void) } } +#ifdef CONFIG_DEBUG_NET +bool cpu_map_check_flush(void) +{ + if (list_empty(this_cpu_ptr(&cpu_map_flush_list))) + return false; + __cpu_map_flush(); + return true; +} +#endif + static int __init cpu_map_init(void) { int cpu; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 4d42f6ed6c11..a936c704d4e7 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -418,6 +418,16 @@ void __dev_flush(void) } } +#ifdef CONFIG_DEBUG_NET +bool dev_check_flush(void) +{ + if (list_empty(this_cpu_ptr(&dev_flush_list))) + return false; + __dev_flush(); + return true; +} +#endif + /* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or * by local_bh_disable() (from XDP calls inside NAPI). The * rcu_read_lock_bh_held() below makes lockdep accept both. diff --git a/net/core/dev.c b/net/core/dev.c index 97e7b9833db9..4420831180c6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6535,6 +6535,8 @@ static int __napi_poll(struct napi_struct *n, bool *repoll) if (test_bit(NAPI_STATE_SCHED, &n->state)) { work = n->poll(n, weight); trace_napi_poll(n, work, weight); + + xdp_do_check_flushed(n); } if (unlikely(work > weight)) diff --git a/net/core/dev.h b/net/core/dev.h index e075e198092c..f66125857af7 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -136,4 +136,10 @@ static inline void netif_set_gro_ipv4_max_size(struct net_device *dev, } int rps_cpumask_housekeeping(struct cpumask *mask); + +#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) +void xdp_do_check_flushed(struct napi_struct *napi); +#else +static inline void xdp_do_check_flushed(struct napi_struct *napi) { } +#endif #endif diff --git a/net/core/filter.c b/net/core/filter.c index cc2e4babc85f..21d75108c2e9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -83,6 +83,8 @@ #include #include +#include "dev.h" + static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -4208,6 +4210,20 @@ void xdp_do_flush(void) } EXPORT_SYMBOL_GPL(xdp_do_flush); +#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) +void xdp_do_check_flushed(struct napi_struct *napi) +{ + bool ret; + + ret = dev_check_flush(); + ret |= cpu_map_check_flush(); + ret |= xsk_map_check_flush(); + + WARN_ONCE(ret, "Missing xdp_do_flush() invocation after NAPI by %ps\n", + napi->poll); +} +#endif + void bpf_clear_redirect_map(struct bpf_map *map) { struct bpf_redirect_info *ri; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index f5e96e0d6e01..ba070fd37d24 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -391,6 +391,16 @@ void __xsk_map_flush(void) } } +#ifdef CONFIG_DEBUG_NET +bool xsk_map_check_flush(void) +{ + if (list_empty(this_cpu_ptr(&xskmap_flush_list))) + return false; + __xsk_map_flush(); + return true; +} +#endif + void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries) { xskq_prod_submit_n(pool->cq, nb_entries); -- cgit v1.2.3 From cbf51acbc5d50341290c79c97bda8cf46f5c4f22 Mon Sep 17 00:00:00 2001 From: Johannes Nixdorf Date: Mon, 16 Oct 2023 15:27:20 +0200 Subject: net: bridge: Set BR_FDB_ADDED_BY_USER early in fdb_add_entry In preparation of the following fdb limit for dynamically learned entries, allow fdb_create to detect that the entry was added by the user. This way it can skip applying the limit in this case. Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: Johannes Nixdorf Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-1-32cddff87758@avm.de Signed-off-by: Jakub Kicinski --- net/bridge/br_fdb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a98ad763b368..7738e1b56452 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1075,7 +1075,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, if (!(flags & NLM_F_CREATE)) return -ENOENT; - fdb = fdb_create(br, source, addr, vid, 0); + fdb = fdb_create(br, source, addr, vid, + BIT(BR_FDB_ADDED_BY_USER)); if (!fdb) return -ENOMEM; @@ -1088,6 +1089,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, WRITE_ONCE(fdb->dst, source); modified = true; } + + set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); } if (fdb_to_nud(br, fdb) != state) { @@ -1119,8 +1122,6 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, if (fdb_handle_notify(fdb, notify)) modified = true; - set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); - fdb->used = jiffies; if (modified) { if (refresh) -- cgit v1.2.3 From bdb4dfda3b41649c41cc2222857c9207fc47a950 Mon Sep 17 00:00:00 2001 From: Johannes Nixdorf Date: Mon, 16 Oct 2023 15:27:21 +0200 Subject: net: bridge: Track and limit dynamically learned FDB entries A malicious actor behind one bridge port may spam the kernel with packets with a random source MAC address, each of which will create an FDB entry, each of which is a dynamic allocation in the kernel. There are roughly 2^48 different MAC addresses, further limited by the rhashtable they are stored in to 2^31. Each entry is of the type struct net_bridge_fdb_entry, which is currently 128 bytes big. This means the maximum amount of memory allocated for FDB entries is 2^31 * 128B = 256GiB, which is too much for most computers. Mitigate this by maintaining a per bridge count of those automatically generated entries in fdb_n_learned, and a limit in fdb_max_learned. If the limit is hit new entries are not learned anymore. For backwards compatibility the default setting of 0 disables the limit. User-added entries by netlink or from bridge or bridge port addresses are never blocked and do not count towards that limit. Introduce a new fdb entry flag BR_FDB_DYNAMIC_LEARNED to keep track of whether an FDB entry is included in the count. The flag is enabled for dynamically learned entries, and disabled for all other entries. This should be equivalent to BR_FDB_ADDED_BY_USER and BR_FDB_LOCAL being unset, but contrary to the two flags it can be toggled atomically. Atomicity is required here, as there are multiple callers that modify the flags, but are not under a common lock (br_fdb_update is the exception for br->hash_lock, br_fdb_external_learn_add for RTNL). Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: Johannes Nixdorf Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-2-32cddff87758@avm.de Signed-off-by: Jakub Kicinski --- net/bridge/br_fdb.c | 35 +++++++++++++++++++++++++++++++++-- net/bridge/br_private.h | 4 ++++ 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 7738e1b56452..c622de5eccd0 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -329,11 +329,18 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f, hlist_del_init_rcu(&f->fdb_node); rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode, br_fdb_rht_params); + if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &f->flags)) + atomic_dec(&br->fdb_n_learned); fdb_notify(br, f, RTM_DELNEIGH, swdev_notify); call_rcu(&f->rcu, fdb_rcu_free); } -/* Delete a local entry if no other port had the same address. */ +/* Delete a local entry if no other port had the same address. + * + * This function should only be called on entries with BR_FDB_LOCAL set, + * so even with BR_FDB_ADDED_BY_USER cleared we never need to increase + * the accounting for dynamically learned entries again. + */ static void fdb_delete_local(struct net_bridge *br, const struct net_bridge_port *p, struct net_bridge_fdb_entry *f) @@ -388,9 +395,20 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, __u16 vid, unsigned long flags) { + bool learned = !test_bit(BR_FDB_ADDED_BY_USER, &flags) && + !test_bit(BR_FDB_LOCAL, &flags); + u32 max_learned = READ_ONCE(br->fdb_max_learned); struct net_bridge_fdb_entry *fdb; int err; + if (likely(learned)) { + int n_learned = atomic_read(&br->fdb_n_learned); + + if (unlikely(max_learned && n_learned >= max_learned)) + return NULL; + __set_bit(BR_FDB_DYNAMIC_LEARNED, &flags); + } + fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); if (!fdb) return NULL; @@ -407,6 +425,9 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, return NULL; } + if (likely(learned)) + atomic_inc(&br->fdb_n_learned); + hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list); return fdb; @@ -912,8 +933,12 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, clear_bit(BR_FDB_LOCKED, &fdb->flags); } - if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags))) + if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags))) { set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, + &fdb->flags)) + atomic_dec(&br->fdb_n_learned); + } if (unlikely(fdb_modified)) { trace_br_fdb_update(br, source, addr, vid, flags); fdb_notify(br, fdb, RTM_NEWNEIGH, true); @@ -1091,6 +1116,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, } set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags)) + atomic_dec(&br->fdb_n_learned); } if (fdb_to_nud(br, fdb) != state) { @@ -1465,6 +1492,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (!p) set_bit(BR_FDB_LOCAL, &fdb->flags); + if ((swdev_notify || !p) && + test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags)) + atomic_dec(&br->fdb_n_learned); + if (modified) fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cbbe35278459..27a7a06660f3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -274,6 +274,7 @@ enum { BR_FDB_NOTIFY, BR_FDB_NOTIFY_INACTIVE, BR_FDB_LOCKED, + BR_FDB_DYNAMIC_LEARNED, }; struct net_bridge_fdb_key { @@ -555,6 +556,9 @@ struct net_bridge { struct kobject *ifobj; u32 auto_cnt; + atomic_t fdb_n_learned; + u32 fdb_max_learned; + #ifdef CONFIG_NET_SWITCHDEV /* Counter used to make sure that hardware domains get unique * identifiers in case a bridge spans multiple switchdev instances. -- cgit v1.2.3 From ddd1ad68826d8ff61a2e47733959570aa4d39a16 Mon Sep 17 00:00:00 2001 From: Johannes Nixdorf Date: Mon, 16 Oct 2023 15:27:22 +0200 Subject: net: bridge: Add netlink knobs for number / max learned FDB entries The previous patch added accounting and a limit for the number of dynamically learned FDB entries per bridge. However it did not provide means to actually configure those bounds or read back the count. This patch does that. Two new netlink attributes are added for the accounting and limit of dynamically learned FDB entries: - IFLA_BR_FDB_N_LEARNED (RO) for the number of entries accounted for a single bridge. - IFLA_BR_FDB_MAX_LEARNED (RW) for the configured limit of entries for the bridge. The new attributes are used like this: # ip link add name br up type bridge fdb_max_learned 256 # ip link add name v1 up master br type veth peer v2 # ip link set up dev v2 # mausezahn -a rand -c 1024 v2 0.01 seconds (90877 packets per second # bridge fdb | grep -v permanent | wc -l 256 # ip -d link show dev br 13: br: mtu 1500 [...] [...] fdb_n_learned 256 fdb_max_learned 256 Signed-off-by: Johannes Nixdorf Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-3-32cddff87758@avm.de Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 2 ++ net/bridge/br_netlink.c | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index fac351a93aed..9f8a3da0f14f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -510,6 +510,8 @@ enum { IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, IFLA_BR_MCAST_QUERIER_STATE, + IFLA_BR_FDB_N_LEARNED, + IFLA_BR_FDB_MAX_LEARNED, __IFLA_BR_MAX, }; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 10f0d33d8ccf..0c3cf6e6dea2 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1265,6 +1265,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 }, [IFLA_BR_MULTI_BOOLOPT] = NLA_POLICY_EXACT_LEN(sizeof(struct br_boolopt_multi)), + [IFLA_BR_FDB_N_LEARNED] = { .type = NLA_REJECT }, + [IFLA_BR_FDB_MAX_LEARNED] = { .type = NLA_U32 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -1539,6 +1541,12 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return err; } + if (data[IFLA_BR_FDB_MAX_LEARNED]) { + u32 val = nla_get_u32(data[IFLA_BR_FDB_MAX_LEARNED]); + + WRITE_ONCE(br->fdb_max_learned, val); + } + return 0; } @@ -1593,6 +1601,8 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */ nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_GC_TIMER */ nla_total_size(ETH_ALEN) + /* IFLA_BR_GROUP_ADDR */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_FDB_N_LEARNED */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_FDB_MAX_LEARNED */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_ROUTER */ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_SNOOPING */ @@ -1668,7 +1678,10 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE_DETECTED, br->topology_change_detected) || nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr) || - nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm)) + nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm) || + nla_put_u32(skb, IFLA_BR_FDB_N_LEARNED, + atomic_read(&br->fdb_n_learned)) || + nla_put_u32(skb, IFLA_BR_FDB_MAX_LEARNED, br->fdb_max_learned)) return -EMSGSIZE; #ifdef CONFIG_BRIDGE_VLAN_FILTERING -- cgit v1.2.3 From 19297c3ab23c4b2fe4abd13a992b7d7d10b07258 Mon Sep 17 00:00:00 2001 From: Johannes Nixdorf Date: Mon, 16 Oct 2023 15:27:23 +0200 Subject: net: bridge: Set strict_start_type for br_policy Set any new attributes added to br_policy to be parsed strictly, to prevent userspace from passing garbage. Signed-off-by: Johannes Nixdorf Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-4-32cddff87758@avm.de Signed-off-by: Jakub Kicinski --- net/bridge/br_netlink.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0c3cf6e6dea2..5ad4abfcb7ba 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1229,6 +1229,8 @@ static size_t br_port_get_slave_size(const struct net_device *brdev, } static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { + [IFLA_BR_UNSPEC] = { .strict_start_type = + IFLA_BR_FDB_N_LEARNED }, [IFLA_BR_FORWARD_DELAY] = { .type = NLA_U32 }, [IFLA_BR_HELLO_TIME] = { .type = NLA_U32 }, [IFLA_BR_MAX_AGE] = { .type = NLA_U32 }, -- cgit v1.2.3 From c503bc7df602257e9d03851654a347649a33f3c3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2023 14:10:24 +0200 Subject: devlink: call peernet2id_alloc() with net pointer under RCU read lock peernet2id_alloc() allows to be called lockless with peer net pointer obtained in RCU critical section and makes sure to return ns ID if net namespaces is not being removed concurrently. Benefit from read_pnet_rcu() helper addition, use it to obtain net pointer under RCU read lock and pass it to peernet2id_alloc() to get ns ID. Fixes: c137743bce02 ("devlink: introduce object and nested devlink relationship infra") Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/devlink/netlink.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index 499304d9de49..809bfc3ba8c4 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -86,18 +86,24 @@ int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, struct devlink *devlink, int attrtype) { struct nlattr *nested_attr; + struct net *devl_net; nested_attr = nla_nest_start(msg, attrtype); if (!nested_attr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; - if (!net_eq(net, devlink_net(devlink))) { - int id = peernet2id_alloc(net, devlink_net(devlink), - GFP_KERNEL); + rcu_read_lock(); + devl_net = read_pnet_rcu(&devlink->_net); + if (!net_eq(net, devl_net)) { + int id = peernet2id_alloc(net, devl_net, GFP_ATOMIC); + + rcu_read_unlock(); if (nla_put_s32(msg, DEVLINK_ATTR_NETNS_ID, id)) return -EMSGSIZE; + } else { + rcu_read_unlock(); } nla_nest_end(msg, nested_attr); -- cgit v1.2.3 From a380687200e0f7f0e00d745796fd8b8ea4bcb746 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2023 14:10:25 +0200 Subject: devlink: take device reference for devlink object In preparation to allow to access device pointer without devlink instance lock held, make sure the device pointer is usable until devlink_release() is called. Fixes: c137743bce02 ("devlink: introduce object and nested devlink relationship infra") Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/devlink/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/devlink/core.c b/net/devlink/core.c index bcbbb952569f..c47c9e6c744f 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -310,6 +310,7 @@ static void devlink_release(struct work_struct *work) mutex_destroy(&devlink->lock); lockdep_unregister_key(&devlink->lock_key); + put_device(devlink->dev); kfree(devlink); } @@ -425,7 +426,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, if (ret < 0) goto err_xa_alloc; - devlink->dev = dev; + devlink->dev = get_device(dev); devlink->ops = ops; xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); xa_init_flags(&devlink->params, XA_FLAGS_ALLOC); -- cgit v1.2.3 From b5f4e371336a62a48f6ae51abb8366e968a8f88f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2023 14:10:26 +0200 Subject: devlink: don't take instance lock for nested handle put Lockdep reports following issue: WARNING: possible circular locking dependency detected ------------------------------------------------------ devlink/8191 is trying to acquire lock: ffff88813f32c250 (&devlink->lock_key#14){+.+.}-{3:3}, at: devlink_rel_devlink_handle_put+0x11e/0x2d0 but task is already holding lock: ffffffff8511eca8 (rtnl_mutex){+.+.}-{3:3}, at: unregister_netdev+0xe/0x20 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (rtnl_mutex){+.+.}-{3:3}: lock_acquire+0x1c3/0x500 __mutex_lock+0x14c/0x1b20 register_netdevice_notifier_net+0x13/0x30 mlx5_lag_add_mdev+0x51c/0xa00 [mlx5_core] mlx5_load+0x222/0xc70 [mlx5_core] mlx5_init_one_devl_locked+0x4a0/0x1310 [mlx5_core] mlx5_init_one+0x3b/0x60 [mlx5_core] probe_one+0x786/0xd00 [mlx5_core] local_pci_probe+0xd7/0x180 pci_device_probe+0x231/0x720 really_probe+0x1e4/0xb60 __driver_probe_device+0x261/0x470 driver_probe_device+0x49/0x130 __driver_attach+0x215/0x4c0 bus_for_each_dev+0xf0/0x170 bus_add_driver+0x21d/0x590 driver_register+0x133/0x460 vdpa_match_remove+0x89/0xc0 [vdpa] do_one_initcall+0xc4/0x360 do_init_module+0x22d/0x760 load_module+0x51d7/0x6750 init_module_from_file+0xd2/0x130 idempotent_init_module+0x326/0x5a0 __x64_sys_finit_module+0xc1/0x130 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 -> #2 (mlx5_intf_mutex){+.+.}-{3:3}: lock_acquire+0x1c3/0x500 __mutex_lock+0x14c/0x1b20 mlx5_register_device+0x3e/0xd0 [mlx5_core] mlx5_init_one_devl_locked+0x8fa/0x1310 [mlx5_core] mlx5_devlink_reload_up+0x147/0x170 [mlx5_core] devlink_reload+0x203/0x380 devlink_nl_cmd_reload+0xb84/0x10e0 genl_family_rcv_msg_doit+0x1cc/0x2a0 genl_rcv_msg+0x3c9/0x670 netlink_rcv_skb+0x12c/0x360 genl_rcv+0x24/0x40 netlink_unicast+0x435/0x6f0 netlink_sendmsg+0x7a0/0xc70 sock_sendmsg+0xc5/0x190 __sys_sendto+0x1c8/0x290 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 -> #1 (&dev->lock_key#8){+.+.}-{3:3}: lock_acquire+0x1c3/0x500 __mutex_lock+0x14c/0x1b20 mlx5_init_one_devl_locked+0x45/0x1310 [mlx5_core] mlx5_devlink_reload_up+0x147/0x170 [mlx5_core] devlink_reload+0x203/0x380 devlink_nl_cmd_reload+0xb84/0x10e0 genl_family_rcv_msg_doit+0x1cc/0x2a0 genl_rcv_msg+0x3c9/0x670 netlink_rcv_skb+0x12c/0x360 genl_rcv+0x24/0x40 netlink_unicast+0x435/0x6f0 netlink_sendmsg+0x7a0/0xc70 sock_sendmsg+0xc5/0x190 __sys_sendto+0x1c8/0x290 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 -> #0 (&devlink->lock_key#14){+.+.}-{3:3}: check_prev_add+0x1af/0x2300 __lock_acquire+0x31d7/0x4eb0 lock_acquire+0x1c3/0x500 __mutex_lock+0x14c/0x1b20 devlink_rel_devlink_handle_put+0x11e/0x2d0 devlink_nl_port_fill+0xddf/0x1b00 devlink_port_notify+0xb5/0x220 __devlink_port_type_set+0x151/0x510 devlink_port_netdevice_event+0x17c/0x220 notifier_call_chain+0x97/0x240 unregister_netdevice_many_notify+0x876/0x1790 unregister_netdevice_queue+0x274/0x350 unregister_netdev+0x18/0x20 mlx5e_vport_rep_unload+0xc5/0x1c0 [mlx5_core] __esw_offloads_unload_rep+0xd8/0x130 [mlx5_core] mlx5_esw_offloads_rep_unload+0x52/0x70 [mlx5_core] mlx5_esw_offloads_unload_rep+0x85/0xc0 [mlx5_core] mlx5_eswitch_unload_sf_vport+0x41/0x90 [mlx5_core] mlx5_devlink_sf_port_del+0x120/0x280 [mlx5_core] genl_family_rcv_msg_doit+0x1cc/0x2a0 genl_rcv_msg+0x3c9/0x670 netlink_rcv_skb+0x12c/0x360 genl_rcv+0x24/0x40 netlink_unicast+0x435/0x6f0 netlink_sendmsg+0x7a0/0xc70 sock_sendmsg+0xc5/0x190 __sys_sendto+0x1c8/0x290 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 other info that might help us debug this: Chain exists of: &devlink->lock_key#14 --> mlx5_intf_mutex --> rtnl_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(rtnl_mutex); lock(mlx5_intf_mutex); lock(rtnl_mutex); lock(&devlink->lock_key#14); Problem is taking the devlink instance lock of nested instance when RTNL is already held. To fix this, don't take the devlink instance lock when putting nested handle. Instead, rely on the preparations done by previous two patches to be able to access device pointer and obtain netns id without devlink instance lock held. Fixes: c137743bce02 ("devlink: introduce object and nested devlink relationship infra") Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/devlink/core.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/devlink/core.c b/net/devlink/core.c index c47c9e6c744f..655903ddbdfd 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -183,9 +183,8 @@ static struct devlink_rel *devlink_rel_find(unsigned long rel_index) DEVLINK_REL_IN_USE); } -static struct devlink *devlink_rel_devlink_get_lock(u32 rel_index) +static struct devlink *devlink_rel_devlink_get(u32 rel_index) { - struct devlink *devlink; struct devlink_rel *rel; u32 devlink_index; @@ -198,16 +197,7 @@ static struct devlink *devlink_rel_devlink_get_lock(u32 rel_index) xa_unlock(&devlink_rels); if (!rel) return NULL; - devlink = devlinks_xa_get(devlink_index); - if (!devlink) - return NULL; - devl_lock(devlink); - if (!devl_is_registered(devlink)) { - devl_unlock(devlink); - devlink_put(devlink); - return NULL; - } - return devlink; + return devlinks_xa_get(devlink_index); } int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink, @@ -218,11 +208,10 @@ int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink, struct devlink *rel_devlink; int err; - rel_devlink = devlink_rel_devlink_get_lock(rel_index); + rel_devlink = devlink_rel_devlink_get(rel_index); if (!rel_devlink) return 0; err = devlink_nl_put_nested_handle(msg, net, rel_devlink, attrtype); - devl_unlock(rel_devlink); devlink_put(rel_devlink); if (!err && msg_updated) *msg_updated = true; -- cgit v1.2.3 From 5d77371e8c85abbe0f9fab7dacf3bc2c3214ada5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2023 14:10:29 +0200 Subject: devlink: document devlink_rel_nested_in_notify() function Add a documentation for devlink_rel_nested_in_notify() describing the devlink instance locking consequences. Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/devlink/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/devlink/core.c b/net/devlink/core.c index 655903ddbdfd..6984877e9f10 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -168,6 +168,20 @@ int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index, return 0; } +/** + * devlink_rel_nested_in_notify - Notify the object this devlink + * instance is nested in. + * @devlink: devlink + * + * This is called upon network namespace change of devlink instance. + * In case this devlink instance is nested in another devlink object, + * a notification of a change of this object should be sent + * over netlink. The parent devlink instance lock needs to be + * taken during the notification preparation. + * However, since the devlink lock of nested instance is held here, + * we would end with wrong devlink instance lock ordering and + * deadlock. Therefore the work is utilized to avoid that. + */ void devlink_rel_nested_in_notify(struct devlink *devlink) { struct devlink_rel *rel = devlink->rel; -- cgit v1.2.3 From e15e5027106f3f6009d2fb46b3a1bb3d9e6a1b77 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2023 09:59:34 +0200 Subject: netfilter: xt_mangle: only check verdict part of return value These checks assume that the caller only returns NF_DROP without any errno embedded in the upper bits. This is fine right now, but followup patches will start to propagate such errors to allow kfree_skb_drop_reason() in the called functions, those would then indicate 'errno << 8 | NF_STOLEN'. To not break things we have to mask those parts out. Signed-off-by: Florian Westphal --- net/ipv4/netfilter/iptable_mangle.c | 9 +++++---- net/ipv6/netfilter/ip6table_mangle.c | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 3abb430af9e6..385d945d8ebe 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -36,12 +36,12 @@ static const struct xt_table packet_mangler = { static unsigned int ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - unsigned int ret; + unsigned int ret, verdict; const struct iphdr *iph; - u_int8_t tos; __be32 saddr, daddr; - u_int32_t mark; + u32 mark; int err; + u8 tos; /* Save things which could affect route */ mark = skb->mark; @@ -51,8 +51,9 @@ ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat tos = iph->tos; ret = ipt_do_table(priv, skb, state); + verdict = ret & NF_VERDICT_MASK; /* Reroute for ANY change. */ - if (ret != NF_DROP && ret != NF_STOLEN) { + if (verdict != NF_DROP && verdict != NF_STOLEN) { iph = ip_hdr(skb); if (iph->saddr != saddr || diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index a88b2ce4a3cb..8dd4cd0c47bd 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -31,10 +31,10 @@ static const struct xt_table packet_mangler = { static unsigned int ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - unsigned int ret; struct in6_addr saddr, daddr; - u_int8_t hop_limit; - u_int32_t flowlabel, mark; + unsigned int ret, verdict; + u32 flowlabel, mark; + u8 hop_limit; int err; /* save source/dest address, mark, hoplimit, flowlabel, priority, */ @@ -47,8 +47,9 @@ ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *sta flowlabel = *((u_int32_t *)ipv6_hdr(skb)); ret = ip6t_do_table(priv, skb, state); + verdict = ret & NF_VERDICT_MASK; - if (ret != NF_DROP && ret != NF_STOLEN && + if (verdict != NF_DROP && verdict != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || skb->mark != mark || -- cgit v1.2.3 From 4d26ab0086aab2d77c54e54020e47737dc6ed165 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2023 09:59:35 +0200 Subject: netfilter: nf_tables: mask out non-verdict bits when checking return value nftables trace infra must mask out the non-verdict bit parts of the return value, else followup changes that 'return errno << 8 | NF_STOLEN' will cause breakage. Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nf_tables_trace.c | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 4d0ce12221f6..6009b423f60a 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -115,7 +115,7 @@ static noinline void __nft_trace_verdict(const struct nft_pktinfo *pkt, { enum nft_trace_types type; - switch (regs->verdict.code) { + switch (regs->verdict.code & NF_VERDICT_MASK) { case NFT_CONTINUE: case NFT_RETURN: type = NFT_TRACETYPE_RETURN; diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 6d41c0bd3d78..a83637e3f455 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -258,17 +258,21 @@ void nft_trace_notify(const struct nft_pktinfo *pkt, case __NFT_TRACETYPE_MAX: break; case NFT_TRACETYPE_RETURN: - case NFT_TRACETYPE_RULE: + case NFT_TRACETYPE_RULE: { + unsigned int v; + if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, verdict)) goto nla_put_failure; /* pkt->skb undefined iff NF_STOLEN, disable dump */ - if (verdict->code == NF_STOLEN) + v = verdict->code & NF_VERDICT_MASK; + if (v == NF_STOLEN) info->packet_dumped = true; else mark = pkt->skb->mark; break; + } case NFT_TRACETYPE_POLICY: mark = pkt->skb->mark; -- cgit v1.2.3 From 6291b3a67ad55102f163f6a636bc540e460f892d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2023 09:59:36 +0200 Subject: netfilter: conntrack: convert nf_conntrack_update to netfilter verdicts This function calls helpers that can return nf-verdicts, but then those get converted to -1/0 as thats what the caller expects. Theoretically NF_DROP could have an errno number set in the upper 24 bits of the return value. Or any of those helpers could return NF_STOLEN, which would result in use-after-free. This is fine as-is, the called functions don't do this yet. But its better to avoid possible future problems if the upcoming patchset to add NF_DROP_REASON() support gains further users, so remove the 0/-1 translation from the picture and pass the verdicts down to the caller. Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_core.c | 58 ++++++++++++++++++++++----------------- net/netfilter/nfnetlink_queue.c | 15 ++++++---- 2 files changed, 42 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 124136b5a79a..2e5f3864d353 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2169,11 +2169,11 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb, dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num); if (dataoff <= 0) - return -1; + return NF_DROP; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, l4num, net, &tuple)) - return -1; + return NF_DROP; if (ct->status & IPS_SRC_NAT) { memcpy(tuple.src.u3.all, @@ -2193,7 +2193,7 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb, h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple); if (!h) - return 0; + return NF_ACCEPT; /* Store status bits of the conntrack that is clashing to re-do NAT * mangling according to what it has been done already to this packet. @@ -2206,19 +2206,25 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb, nat_hook = rcu_dereference(nf_nat_hook); if (!nat_hook) - return 0; + return NF_ACCEPT; - if (status & IPS_SRC_NAT && - nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC, - IP_CT_DIR_ORIGINAL) == NF_DROP) - return -1; + if (status & IPS_SRC_NAT) { + unsigned int verdict = nat_hook->manip_pkt(skb, ct, + NF_NAT_MANIP_SRC, + IP_CT_DIR_ORIGINAL); + if (verdict != NF_ACCEPT) + return verdict; + } - if (status & IPS_DST_NAT && - nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST, - IP_CT_DIR_ORIGINAL) == NF_DROP) - return -1; + if (status & IPS_DST_NAT) { + unsigned int verdict = nat_hook->manip_pkt(skb, ct, + NF_NAT_MANIP_DST, + IP_CT_DIR_ORIGINAL); + if (verdict != NF_ACCEPT) + return verdict; + } - return 0; + return NF_ACCEPT; } /* This packet is coming from userspace via nf_queue, complete the packet @@ -2233,14 +2239,14 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct, help = nfct_help(ct); if (!help) - return 0; + return NF_ACCEPT; helper = rcu_dereference(help->helper); if (!helper) - return 0; + return NF_ACCEPT; if (!(helper->flags & NF_CT_HELPER_F_USERSPACE)) - return 0; + return NF_ACCEPT; switch (nf_ct_l3num(ct)) { case NFPROTO_IPV4: @@ -2255,42 +2261,44 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct, protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off); if (protoff < 0 || (frag_off & htons(~0x7)) != 0) - return 0; + return NF_ACCEPT; break; } #endif default: - return 0; + return NF_ACCEPT; } if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && !nf_is_loopback_packet(skb)) { if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); - return -1; + return NF_DROP; } } /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(skb) == NF_DROP ? - 1 : 0; + return nf_conntrack_confirm(skb); } static int nf_conntrack_update(struct net *net, struct sk_buff *skb) { enum ip_conntrack_info ctinfo; struct nf_conn *ct; - int err; ct = nf_ct_get(skb, &ctinfo); if (!ct) - return 0; + return NF_ACCEPT; if (!nf_ct_is_confirmed(ct)) { - err = __nf_conntrack_update(net, skb, ct, ctinfo); - if (err < 0) - return err; + int ret = __nf_conntrack_update(net, skb, ct, ctinfo); + + if (ret != NF_ACCEPT) + return ret; ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return NF_ACCEPT; } return nf_confirm_cthelper(skb, ct, ctinfo); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 556bc902af00..171d1f52d3dd 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -228,19 +228,22 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict) { const struct nf_ct_hook *ct_hook; - int err; if (verdict == NF_ACCEPT || verdict == NF_REPEAT || verdict == NF_STOP) { rcu_read_lock(); ct_hook = rcu_dereference(nf_ct_hook); - if (ct_hook) { - err = ct_hook->update(entry->state.net, entry->skb); - if (err < 0) - verdict = NF_DROP; - } + if (ct_hook) + verdict = ct_hook->update(entry->state.net, entry->skb); rcu_read_unlock(); + + switch (verdict & NF_VERDICT_MASK) { + case NF_STOLEN: + nf_queue_entry_free(entry); + return; + } + } nf_reinject(entry, verdict); } -- cgit v1.2.3 From 35c038b0a4be197679deefaf96998241cb7efc88 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2023 09:59:37 +0200 Subject: netfilter: nf_nat: mask out non-verdict bits when checking return value Same as previous change: we need to mask out the non-verdict bits, as upcoming patches may embed an errno value in NF_STOLEN verdicts too. NF_DROP could already do this, but not all called functions do this. Checks that only test ret vs NF_ACCEPT are fine, the 'errno parts' are always 0 for those. Signed-off-by: Florian Westphal --- net/netfilter/nf_nat_proto.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 5a049740758f..6d969468c779 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -999,11 +999,12 @@ static unsigned int nf_nat_ipv6_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - unsigned int ret; + unsigned int ret, verdict; struct in6_addr daddr = ipv6_hdr(skb)->daddr; ret = nf_nat_ipv6_fn(priv, skb, state); - if (ret != NF_DROP && ret != NF_STOLEN && + verdict = ret & NF_VERDICT_MASK; + if (verdict != NF_DROP && verdict != NF_STOLEN && ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) skb_dst_drop(skb); -- cgit v1.2.3 From e0d4593140b01b8da513a0c88c26da28b4906413 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2023 09:59:38 +0200 Subject: netfilter: make nftables drops visible in net dropmonitor net_dropmonitor blames core.c:nf_hook_slow. Add NF_DROP_REASON() helper and use it in nft_do_chain(). The helper releases the skb, so exact drop location becomes available. Calling code will observe the NF_STOLEN verdict instead. Adjust nf_hook_slow so we can embed an erro value wih NF_STOLEN verdicts, just like we do for NF_DROP. After this, drop in nftables can be pinpointed to a drop due to a rule or the chain policy. Signed-off-by: Florian Westphal --- include/linux/netfilter.h | 10 ++++++++++ net/netfilter/core.c | 6 +++--- net/netfilter/nf_tables_core.c | 6 +++++- 3 files changed, 18 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index d68644b7c299..80900d910992 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -22,6 +22,16 @@ static inline int NF_DROP_GETERR(int verdict) return -(verdict >> NF_VERDICT_QBITS); } +static __always_inline int +NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err) +{ + BUILD_BUG_ON(err > 0xffff); + + kfree_skb_reason(skb, reason); + + return ((err << 16) | NF_STOLEN); +} + static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *a2) { diff --git a/net/netfilter/core.c b/net/netfilter/core.c index ef4e76e5aef9..3126911f5042 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -639,10 +639,10 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, if (ret == 1) continue; return ret; + case NF_STOLEN: + return NF_DROP_GETERR(verdict); default: - /* Implicit handling for NF_STOLEN, as well as any other - * non conventional verdicts. - */ + WARN_ON_ONCE(1); return 0; } } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 6009b423f60a..8b536d7ef6c2 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -308,10 +308,11 @@ next_rule: switch (regs.verdict.code & NF_VERDICT_MASK) { case NF_ACCEPT: - case NF_DROP: case NF_QUEUE: case NF_STOLEN: return regs.verdict.code; + case NF_DROP: + return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM); } switch (regs.verdict.code) { @@ -342,6 +343,9 @@ next_rule: if (static_branch_unlikely(&nft_counters_enabled)) nft_update_chain_stats(basechain, pkt); + if (nft_base_chain(basechain)->policy == NF_DROP) + return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM); + return nft_base_chain(basechain)->policy; } EXPORT_SYMBOL_GPL(nft_do_chain); -- cgit v1.2.3 From cf8b7c1a5be7ef2850c46a17fea5f867f71922ff Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Oct 2023 09:59:39 +0200 Subject: netfilter: bridge: convert br_netfilter to NF_DROP_REASON errno is 0 because these hooks are called from prerouting and forward. There is no socket that the errno would ever be propagated to. Other netfilter modules (e.g. nf_nat, conntrack, ...) can be converted in a similar way. Signed-off-by: Florian Westphal --- net/bridge/br_netfilter_hooks.c | 26 +++++++++++++------------- net/bridge/br_netfilter_ipv6.c | 6 +++--- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 033034d68f1f..4c0c9f838f5c 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -486,11 +486,11 @@ static unsigned int br_nf_pre_routing(void *priv, struct brnf_net *brnet; if (unlikely(!pskb_may_pull(skb, len))) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0); p = br_port_get_rcu(state->in); if (p == NULL) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); br = p->br; brnet = net_generic(state->net, brnf_net_id); @@ -501,7 +501,7 @@ static unsigned int br_nf_pre_routing(void *priv, return NF_ACCEPT; if (!ipv6_mod_enabled()) { pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported."); - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_IPV6DISABLED, 0); } nf_bridge_pull_encap_header_rcsum(skb); @@ -518,12 +518,12 @@ static unsigned int br_nf_pre_routing(void *priv, nf_bridge_pull_encap_header_rcsum(skb); if (br_validate_ipv4(state->net, skb)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); if (!nf_bridge_alloc(skb)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); if (!setup_pre_routing(skb, state->net)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); nf_bridge = nf_bridge_info_get(skb); nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; @@ -590,15 +590,15 @@ static unsigned int br_nf_forward_ip(void *priv, /* Need exclusive nf_bridge_info since we might have multiple * different physoutdevs. */ if (!nf_bridge_unshare(skb)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); nf_bridge = nf_bridge_info_get(skb); if (!nf_bridge) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); parent = bridge_parent(state->out); if (!parent) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); if (IS_IP(skb) || is_vlan_ip(skb, state->net) || is_pppoe_ip(skb, state->net)) @@ -618,13 +618,13 @@ static unsigned int br_nf_forward_ip(void *priv, if (pf == NFPROTO_IPV4) { if (br_validate_ipv4(state->net, skb)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; } if (pf == NFPROTO_IPV6) { if (br_validate_ipv6(state->net, skb)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; } @@ -666,7 +666,7 @@ static unsigned int br_nf_forward_arp(void *priv, } if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0); if (arp_hdr(skb)->ar_pln != 4) { if (is_vlan_arp(skb, state->net)) @@ -831,7 +831,7 @@ static unsigned int br_nf_post_routing(void *priv, return NF_ACCEPT; if (!realoutdev) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); if (IS_IP(skb) || is_vlan_ip(skb, state->net) || is_pppoe_ip(skb, state->net)) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 550039dfc31a..2e24a743f917 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -161,13 +161,13 @@ unsigned int br_nf_pre_routing_ipv6(void *priv, struct nf_bridge_info *nf_bridge; if (br_validate_ipv6(state->net, skb)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); nf_bridge = nf_bridge_alloc(skb); if (!nf_bridge) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); if (!setup_pre_routing(skb, state->net)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); nf_bridge = nf_bridge_info_get(skb); nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; -- cgit v1.2.3 From 256001672153af5786c6ca148114693d7d76d836 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 13 Oct 2023 14:18:14 +0200 Subject: netfilter: nf_tables: de-constify set commit ops function argument The set backend using this already has to work around this via ugly cast, don't spread this pattern. Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nft_set_pipapo.c | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 9fb16485d08f..8de040d2d2cf 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -462,7 +462,7 @@ struct nft_set_ops { const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags); - void (*commit)(const struct nft_set *set); + void (*commit)(struct nft_set *set); void (*abort)(const struct nft_set *set); u64 (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index c0dcc40de358..75a9dee353e2 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1549,12 +1549,11 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, /** * pipapo_gc() - Drop expired entries from set, destroy start and end elements - * @_set: nftables API set representation + * @set: nftables API set representation * @m: Matching data */ -static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) +static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) { - struct nft_set *set = (struct nft_set *) _set; struct nft_pipapo *priv = nft_set_priv(set); struct net *net = read_pnet(&set->net); int rules_f0, first_rule = 0; @@ -1672,7 +1671,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) * We also need to create a new working copy for subsequent insertions and * deletions. */ -static void nft_pipapo_commit(const struct nft_set *set) +static void nft_pipapo_commit(struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *new_clone, *old; -- cgit v1.2.3 From c4eee56e14fe001e1cff54f0b438a5e2d0dd7454 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 17 Oct 2023 11:39:06 +0200 Subject: net: skb_find_text: Ignore patterns extending past 'to' Assume that caller's 'to' offset really represents an upper boundary for the pattern search, so patterns extending past this offset are to be rejected. The old behaviour also was kind of inconsistent when it comes to fragmentation (or otherwise non-linear skbs): If the pattern started in between 'to' and 'from' offsets but extended to the next fragment, it was not found if 'to' offset was still within the current fragment. Test the new behaviour in a kselftest using iptables' string match. Suggested-by: Pablo Neira Ayuso Fixes: f72b948dcbb8 ("[NET]: skb_find_text ignores to argument") Signed-off-by: Phil Sutter Reviewed-by: Florian Westphal Reviewed-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 +- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/xt_string.sh | 128 +++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 2 deletions(-) create mode 100755 tools/testing/selftests/netfilter/xt_string.sh (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0401f40973a5..975c9a6ffb4a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4267,6 +4267,7 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config) { + unsigned int patlen = config->ops->get_pattern_len(config); struct ts_state state; unsigned int ret; @@ -4278,7 +4279,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state)); ret = textsearch_find(config, &state); - return (ret <= to - from ? ret : UINT_MAX); + return (ret + patlen <= to - from ? ret : UINT_MAX); } EXPORT_SYMBOL(skb_find_text); diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index ef90aca4cc96..bced422b78f7 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -7,7 +7,7 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \ - conntrack_sctp_collision.sh + conntrack_sctp_collision.sh xt_string.sh HOSTPKG_CONFIG := pkg-config diff --git a/tools/testing/selftests/netfilter/xt_string.sh b/tools/testing/selftests/netfilter/xt_string.sh new file mode 100755 index 000000000000..1802653a4728 --- /dev/null +++ b/tools/testing/selftests/netfilter/xt_string.sh @@ -0,0 +1,128 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# return code to signal skipped test +ksft_skip=4 +rc=0 + +if ! iptables --version >/dev/null 2>&1; then + echo "SKIP: Test needs iptables" + exit $ksft_skip +fi +if ! ip -V >/dev/null 2>&1; then + echo "SKIP: Test needs iproute2" + exit $ksft_skip +fi +if ! nc -h >/dev/null 2>&1; then + echo "SKIP: Test needs netcat" + exit $ksft_skip +fi + +pattern="foo bar baz" +patlen=11 +hdrlen=$((20 + 8)) # IPv4 + UDP +ns="ns-$(mktemp -u XXXXXXXX)" +trap 'ip netns del $ns' EXIT +ip netns add "$ns" +ip -net "$ns" link add d0 type dummy +ip -net "$ns" link set d0 up +ip -net "$ns" addr add 10.1.2.1/24 dev d0 + +#ip netns exec "$ns" tcpdump -npXi d0 & +#tcpdump_pid=$! +#trap 'kill $tcpdump_pid; ip netns del $ns' EXIT + +add_rule() { # (alg, from, to) + ip netns exec "$ns" \ + iptables -A OUTPUT -o d0 -m string \ + --string "$pattern" --algo $1 --from $2 --to $3 +} +showrules() { # () + ip netns exec "$ns" iptables -v -S OUTPUT | grep '^-A' +} +zerorules() { + ip netns exec "$ns" iptables -Z OUTPUT +} +countrule() { # (pattern) + showrules | grep -c -- "$*" +} +send() { # (offset) + ( for ((i = 0; i < $1 - $hdrlen; i++)); do + printf " " + done + printf "$pattern" + ) | ip netns exec "$ns" nc -w 1 -u 10.1.2.2 27374 +} + +add_rule bm 1000 1500 +add_rule bm 1400 1600 +add_rule kmp 1000 1500 +add_rule kmp 1400 1600 + +zerorules +send 0 +send $((1000 - $patlen)) +if [ $(countrule -c 0 0) -ne 4 ]; then + echo "FAIL: rules match data before --from" + showrules + ((rc--)) +fi + +zerorules +send 1000 +send $((1400 - $patlen)) +if [ $(countrule -c 2) -ne 2 ]; then + echo "FAIL: only two rules should match at low offset" + showrules + ((rc--)) +fi + +zerorules +send $((1500 - $patlen)) +if [ $(countrule -c 1) -ne 4 ]; then + echo "FAIL: all rules should match at end of packet" + showrules + ((rc--)) +fi + +zerorules +send 1495 +if [ $(countrule -c 1) -ne 1 ]; then + echo "FAIL: only kmp with proper --to should match pattern spanning fragments" + showrules + ((rc--)) +fi + +zerorules +send 1500 +if [ $(countrule -c 1) -ne 2 ]; then + echo "FAIL: two rules should match pattern at start of second fragment" + showrules + ((rc--)) +fi + +zerorules +send $((1600 - $patlen)) +if [ $(countrule -c 1) -ne 2 ]; then + echo "FAIL: two rules should match pattern at end of largest --to" + showrules + ((rc--)) +fi + +zerorules +send $((1600 - $patlen + 1)) +if [ $(countrule -c 1) -ne 0 ]; then + echo "FAIL: no rules should match pattern extending largest --to" + showrules + ((rc--)) +fi + +zerorules +send 1600 +if [ $(countrule -c 1) -ne 0 ]; then + echo "FAIL: no rule should match pattern past largest --to" + showrules + ((rc--)) +fi + +exit $rc -- cgit v1.2.3 From 878d951c6712b655c38e78ac1ee63c35cd913b22 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Oct 2023 09:00:13 +0000 Subject: inet: lock the socket in ip_sock_set_tos() Christoph Paasch reported a panic in TCP stack [1] Indeed, we should not call sk_dst_reset() without holding the socket lock, as __sk_dst_get() callers do not all rely on bare RCU. [1] BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 12bad6067 P4D 12bad6067 PUD 12bad5067 PMD 0 Oops: 0000 [#1] PREEMPT SMP CPU: 1 PID: 2750 Comm: syz-executor.5 Not tainted 6.6.0-rc4-g7a5720a344e7 #49 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 RIP: 0010:tcp_get_metrics+0x118/0x8f0 net/ipv4/tcp_metrics.c:321 Code: c7 44 24 70 02 00 8b 03 89 44 24 48 c7 44 24 4c 00 00 00 00 66 c7 44 24 58 02 00 66 ba 02 00 b1 01 89 4c 24 04 4c 89 7c 24 10 <49> 8b 0f 48 8b 89 50 05 00 00 48 89 4c 24 30 33 81 00 02 00 00 69 RSP: 0018:ffffc90000af79b8 EFLAGS: 00010293 RAX: 000000000100007f RBX: ffff88812ae8f500 RCX: ffff88812b5f8f01 RDX: 0000000000000002 RSI: ffffffff8300f080 RDI: 0000000000000002 RBP: 0000000000000002 R08: 0000000000000003 R09: ffffffff8205eca0 R10: 0000000000000002 R11: ffff88812b5f8f00 R12: ffff88812a9e0580 R13: 0000000000000000 R14: ffff88812ae8fbd2 R15: 0000000000000000 FS: 00007f70a006b640(0000) GS:ffff88813bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000012bad7003 CR4: 0000000000170ee0 Call Trace: tcp_fastopen_cache_get+0x32/0x140 net/ipv4/tcp_metrics.c:567 tcp_fastopen_cookie_check+0x28/0x180 net/ipv4/tcp_fastopen.c:419 tcp_connect+0x9c8/0x12a0 net/ipv4/tcp_output.c:3839 tcp_v4_connect+0x645/0x6e0 net/ipv4/tcp_ipv4.c:323 __inet_stream_connect+0x120/0x590 net/ipv4/af_inet.c:676 tcp_sendmsg_fastopen+0x2d6/0x3a0 net/ipv4/tcp.c:1021 tcp_sendmsg_locked+0x1957/0x1b00 net/ipv4/tcp.c:1073 tcp_sendmsg+0x30/0x50 net/ipv4/tcp.c:1336 __sock_sendmsg+0x83/0xd0 net/socket.c:730 __sys_sendto+0x20a/0x2a0 net/socket.c:2194 __do_sys_sendto net/socket.c:2206 [inline] Fixes: e08d0b3d1723 ("inet: implement lockless IP_TOS") Reported-by: Christoph Paasch Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20231018090014.345158-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/ip.h | 1 + net/ipv4/ip_sockglue.c | 11 +++++++++-- net/mptcp/sockopt.c | 4 ++-- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +- 4 files changed, 13 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/ip.h b/include/net/ip.h index 6fbc0dcf4b97..1fc4c8d69e33 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -810,5 +810,6 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val); void ip_sock_set_pktinfo(struct sock *sk); void ip_sock_set_recverr(struct sock *sk); void ip_sock_set_tos(struct sock *sk, int val); +void __ip_sock_set_tos(struct sock *sk, int val); #endif /* _IP_H */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 0b74ac49d6a6..9c68b6b74d9f 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -585,9 +585,9 @@ out: return err; } -void ip_sock_set_tos(struct sock *sk, int val) +void __ip_sock_set_tos(struct sock *sk, int val) { - u8 old_tos = READ_ONCE(inet_sk(sk)->tos); + u8 old_tos = inet_sk(sk)->tos; if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; @@ -599,6 +599,13 @@ void ip_sock_set_tos(struct sock *sk, int val) sk_dst_reset(sk); } } + +void ip_sock_set_tos(struct sock *sk, int val) +{ + lock_sock(sk); + __ip_sock_set_tos(sk, val); + release_sock(sk); +} EXPORT_SYMBOL(ip_sock_set_tos); void ip_sock_set_freebind(struct sock *sk) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 18ce624bfde2..59bd5e114392 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -738,7 +738,7 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - ip_sock_set_tos(ssk, val); + __ip_sock_set_tos(ssk, val); } release_sock(sk); @@ -1411,7 +1411,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) ssk->sk_bound_dev_if = sk->sk_bound_dev_if; ssk->sk_incoming_cpu = sk->sk_incoming_cpu; ssk->sk_ipv6only = sk->sk_ipv6only; - ip_sock_set_tos(ssk, inet_sk(sk)->tos); + __ip_sock_set_tos(ssk, inet_sk(sk)->tos); if (sk->sk_userlocks & tx_rx_locks) { ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 61a2a1988ce6..b1fc8afd072d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -716,7 +716,7 @@ run_test_transparent() # the required infrastructure in MPTCP sockopt code. To support TOS, the # following function has been exported (T). Not great but better than # checking for a specific kernel version. - if ! mptcp_lib_kallsyms_has "T ip_sock_set_tos$"; then + if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then echo "INFO: ${msg} not supported by the kernel: SKIP" mptcp_lib_result_skip "${TEST_GROUP}" return -- cgit v1.2.3 From 7f3eb2174512fe6c9c0f062e96eccb0d3cc6d5cd Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Wed, 18 Oct 2023 14:35:47 +0200 Subject: net: introduce napi_is_scheduled helper We currently have napi_if_scheduled_mark_missed that can be used to check if napi is scheduled but that does more thing than simply checking it and return a bool. Some driver already implement custom function to check if napi is scheduled. Drop these custom function and introduce napi_is_scheduled that simply check if napi is scheduled atomically. Update any driver and code that implement a similar check and instead use this new helper. Signed-off-by: Christian Marangi Signed-off-by: Paolo Abeni --- drivers/net/ethernet/chelsio/cxgb3/sge.c | 8 -------- drivers/net/wireless/realtek/rtw89/core.c | 2 +- include/linux/netdevice.h | 23 +++++++++++++++++++++++ net/core/dev.c | 2 +- 4 files changed, 25 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index dfe4e0102960..6268f96cb4aa 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -2501,14 +2501,6 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) return work_done; } -/* - * Returns true if the device is already scheduled for polling. - */ -static inline int napi_is_scheduled(struct napi_struct *napi) -{ - return test_bit(NAPI_STATE_SCHED, &napi->state); -} - /** * process_pure_responses - process pure responses from a response queue * @adap: the adapter diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 4bfb4188de72..3d75165e48be 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -2005,7 +2005,7 @@ static void rtw89_core_rx_to_mac80211(struct rtw89_dev *rtwdev, struct napi_struct *napi = &rtwdev->napi; /* In low power mode, napi isn't scheduled. Receive it to netif. */ - if (unlikely(!test_bit(NAPI_STATE_SCHED, &napi->state))) + if (unlikely(!napi_is_scheduled(napi))) napi = NULL; rtw89_core_hw_to_sband_rate(rx_status); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1c7681263d30..b8bf669212cc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -482,6 +482,29 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n) return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); } +/** + * napi_is_scheduled - test if NAPI is scheduled + * @n: NAPI context + * + * This check is "best-effort". With no locking implemented, + * a NAPI can be scheduled or terminate right after this check + * and produce not precise results. + * + * NAPI_STATE_SCHED is an internal state, napi_is_scheduled + * should not be used normally and napi_schedule should be + * used instead. + * + * Use only if the driver really needs to check if a NAPI + * is scheduled for example in the context of delayed timer + * that can be skipped if a NAPI is already scheduled. + * + * Return True if NAPI is scheduled, False otherwise. + */ +static inline bool napi_is_scheduled(struct napi_struct *n) +{ + return test_bit(NAPI_STATE_SCHED, &n->state); +} + bool napi_schedule_prep(struct napi_struct *n); /** diff --git a/net/core/dev.c b/net/core/dev.c index 97e7b9833db9..e7f61f5a5322 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6532,7 +6532,7 @@ static int __napi_poll(struct napi_struct *n, bool *repoll) * accidentally calling ->poll() when NAPI is not scheduled. */ work = 0; - if (test_bit(NAPI_STATE_SCHED, &n->state)) { + if (napi_is_scheduled(n)) { work = n->poll(n, weight); trace_napi_poll(n, work, weight); } -- cgit v1.2.3 From db80d3b2558fcc6d18fbcb1452cdf6df65cec151 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Wed, 18 Oct 2023 22:26:37 +0200 Subject: devlink: retain error in struct devlink_fmsg Retain error value in struct devlink_fmsg, to relieve drivers from checking it after each call. Note that fmsg is an in-memory builder/buffer of formatted message, so it's not the case that half baked message was sent somewhere. We could find following scheme in multiple drivers: err = devlink_fmsg_obj_nest_start(fmsg); if (err) return err; err = devlink_fmsg_string_pair_put(fmsg, "src", src); if (err) return err; err = devlink_fmsg_something(fmsg, foo, bar); if (err) return err; // and so on... err = devlink_fmsg_obj_nest_end(fmsg); With retaining error API that translates to: devlink_fmsg_obj_nest_start(fmsg); devlink_fmsg_string_pair_put(fmsg, "src", src); devlink_fmsg_something(fmsg, foo, bar); // and so on... devlink_fmsg_obj_nest_end(fmsg); What means we check error just when is time to send. Possible error scenarios are developer error (API misuse) and memory exhaustion, both cases are good candidates to choose readability over fastest possible exit. Note that this patch keeps returning errors, to allow per-driver conversion to the new API, but those are not needed at this point already. This commit itself is an illustration of benefits for the dev-user, more of it will be in separate commits of the series. Reviewed-by: Jesse Brandeburg Reviewed-by: Jiri Pirko Signed-off-by: Przemek Kitszel Signed-off-by: David S. Miller --- net/devlink/health.c | 247 ++++++++++++++++----------------------------------- 1 file changed, 76 insertions(+), 171 deletions(-) (limited to 'net') diff --git a/net/devlink/health.c b/net/devlink/health.c index 51e6e81e31bb..3858a436598e 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -19,6 +19,7 @@ struct devlink_fmsg_item { struct devlink_fmsg { struct list_head item_list; + int err; /* first error encountered on some devlink_fmsg_XXX() call */ bool putting_binary; /* This flag forces enclosing of binary data * in an array brackets. It forces using * of designated API: @@ -562,10 +563,8 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter, return 0; reporter->dump_fmsg = devlink_fmsg_alloc(); - if (!reporter->dump_fmsg) { - err = -ENOMEM; - return err; - } + if (!reporter->dump_fmsg) + return -ENOMEM; err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg); if (err) @@ -670,14 +669,24 @@ int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, return devlink_health_reporter_recover(reporter, NULL, info->extack); } -static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, - int attrtype) +static void devlink_fmsg_err_if_binary(struct devlink_fmsg *fmsg) +{ + if (!fmsg->err && fmsg->putting_binary) + fmsg->err = -EINVAL; +} + +static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, int attrtype) { struct devlink_fmsg_item *item; + if (fmsg->err) + return fmsg->err; + item = kzalloc(sizeof(*item), GFP_KERNEL); - if (!item) - return -ENOMEM; + if (!item) { + fmsg->err = -ENOMEM; + return fmsg->err; + } item->attrtype = attrtype; list_add_tail(&item->list, &fmsg->item_list); @@ -687,26 +696,19 @@ static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) { - if (fmsg->putting_binary) - return -EINVAL; - + devlink_fmsg_err_if_binary(fmsg); return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START); } EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start); static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg) { - if (fmsg->putting_binary) - return -EINVAL; - + devlink_fmsg_err_if_binary(fmsg); return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END); } int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) { - if (fmsg->putting_binary) - return -EINVAL; - return devlink_fmsg_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end); @@ -717,15 +719,20 @@ static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) { struct devlink_fmsg_item *item; - if (fmsg->putting_binary) - return -EINVAL; + devlink_fmsg_err_if_binary(fmsg); + if (fmsg->err) + return fmsg->err; - if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE) - return -EMSGSIZE; + if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE) { + fmsg->err = -EMSGSIZE; + return fmsg->err; + } item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL); - if (!item) - return -ENOMEM; + if (!item) { + fmsg->err = -ENOMEM; + return fmsg->err; + } item->nla_type = NLA_NUL_STRING; item->len = strlen(name) + 1; @@ -738,28 +745,14 @@ static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) { - int err; - - if (fmsg->putting_binary) - return -EINVAL; - - err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START); - if (err) - return err; - - err = devlink_fmsg_put_name(fmsg, name); - if (err) - return err; - - return 0; + devlink_fmsg_err_if_binary(fmsg); + devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START); + return devlink_fmsg_put_name(fmsg, name); } EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start); int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) { - if (fmsg->putting_binary) - return -EINVAL; - return devlink_fmsg_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end); @@ -767,39 +760,15 @@ EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end); int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) { - int err; - - if (fmsg->putting_binary) - return -EINVAL; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START); - if (err) - return err; - - return 0; + devlink_fmsg_pair_nest_start(fmsg, name); + return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START); } EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start); int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) { - int err; - - if (fmsg->putting_binary) - return -EINVAL; - - err = devlink_fmsg_nest_end(fmsg); - if (err) - return err; - - err = devlink_fmsg_nest_end(fmsg); - if (err) - return err; - - return 0; + devlink_fmsg_nest_end(fmsg); + return devlink_fmsg_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end); @@ -813,14 +782,19 @@ int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, return err; fmsg->putting_binary = true; - return err; + return 0; } EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_start); int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg) { - if (!fmsg->putting_binary) - return -EINVAL; + if (fmsg->err) + return fmsg->err; + + if (!fmsg->putting_binary) { + fmsg->err = -EINVAL; + return fmsg->err; + } fmsg->putting_binary = false; return devlink_fmsg_arr_pair_nest_end(fmsg); @@ -833,12 +807,16 @@ static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, { struct devlink_fmsg_item *item; - if (value_len > DEVLINK_FMSG_MAX_SIZE) - return -EMSGSIZE; + if (value_len > DEVLINK_FMSG_MAX_SIZE) { + fmsg->err = -EMSGSIZE; + return fmsg->err; + } item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL); - if (!item) - return -ENOMEM; + if (!item) { + fmsg->err = -ENOMEM; + return fmsg->err; + } item->nla_type = value_nla_type; item->len = value_len; @@ -851,42 +829,32 @@ static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) { - if (fmsg->putting_binary) - return -EINVAL; - + devlink_fmsg_err_if_binary(fmsg); return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); } static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) { - if (fmsg->putting_binary) - return -EINVAL; - + devlink_fmsg_err_if_binary(fmsg); return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); } int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) { - if (fmsg->putting_binary) - return -EINVAL; - + devlink_fmsg_err_if_binary(fmsg); return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32); } EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put); static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) { - if (fmsg->putting_binary) - return -EINVAL; - + devlink_fmsg_err_if_binary(fmsg); return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); } int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) { - if (fmsg->putting_binary) - return -EINVAL; - + devlink_fmsg_err_if_binary(fmsg); return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, NLA_NUL_STRING); } @@ -905,105 +873,45 @@ EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put); int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, bool value) { - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_bool_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; + devlink_fmsg_pair_nest_start(fmsg, name); + devlink_fmsg_bool_put(fmsg, value); + return devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put); int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, u8 value) { - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_u8_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; + devlink_fmsg_pair_nest_start(fmsg, name); + devlink_fmsg_u8_put(fmsg, value); + return devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put); int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, u32 value) { - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_u32_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; + devlink_fmsg_pair_nest_start(fmsg, name); + devlink_fmsg_u32_put(fmsg, value); + return devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put); int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, u64 value) { - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_u64_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; + devlink_fmsg_pair_nest_start(fmsg, name); + devlink_fmsg_u64_put(fmsg, value); + return devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put); int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, const char *value) { - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_string_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; + devlink_fmsg_pair_nest_start(fmsg, name); + devlink_fmsg_string_put(fmsg, value); + return devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put); @@ -1011,7 +919,6 @@ int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, const void *value, u32 value_len) { u32 data_size; - int end_err; u32 offset; int err; @@ -1027,14 +934,12 @@ int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, if (err) break; /* Exit from loop with a break (instead of - * return) to make sure putting_binary is turned off in - * devlink_fmsg_binary_pair_nest_end + * return) to make sure putting_binary is turned off */ } - end_err = devlink_fmsg_binary_pair_nest_end(fmsg); - if (end_err) - err = end_err; + err = devlink_fmsg_binary_pair_nest_end(fmsg); + fmsg->putting_binary = false; return err; } -- cgit v1.2.3 From 0050629cd36a58b568ac0aebeeca60bd2fde3d6d Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Wed, 18 Oct 2023 22:26:47 +0200 Subject: devlink: convert most of devlink_fmsg_*() to return void Since struct devlink_fmsg retains error by now (see 1st patch of this series), there is no longer need to keep returning it in each call. This is a separate commit to allow per-driver conversion to stop using those return values. Reviewed-by: Jiri Pirko Signed-off-by: Przemek Kitszel Signed-off-by: David S. Miller --- include/net/devlink.h | 60 ++++++++-------- net/devlink/health.c | 188 ++++++++++++++++++++++---------------------------- 2 files changed, 114 insertions(+), 134 deletions(-) (limited to 'net') diff --git a/include/net/devlink.h b/include/net/devlink.h index fad8e36e3d98..9ac394bdfbe4 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1854,36 +1854,36 @@ int devlink_info_version_running_put_ext(struct devlink_info_req *req, const char *version_value, enum devlink_info_version_type version_type); -int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg); -int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg); - -int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name); -int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg); - -int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name); -int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg); -int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name); -int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg); - -int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value); -int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value); -int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, - u16 value_len); - -int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, - bool value); -int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, - u8 value); -int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, - u32 value); -int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, - u64 value); -int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, - const char *value); -int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, - const void *value, u32 value_len); +void devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg); +void devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg); + +void devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name); +void devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg); + +void devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name); +void devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg); +void devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name); +void devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg); + +void devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value); +void devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value); +void devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len); + +void devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, + bool value); +void devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, + u8 value); +void devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, + u32 value); +void devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, + u64 value); +void devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, + const char *value); +void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, + const void *value, u32 value_len); struct devlink_health_reporter * devl_port_health_reporter_create(struct devlink_port *port, diff --git a/net/devlink/health.c b/net/devlink/health.c index 3858a436598e..89405e59f45c 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -566,16 +566,15 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter, if (!reporter->dump_fmsg) return -ENOMEM; - err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg); - if (err) - goto dump_err; + devlink_fmsg_obj_nest_start(reporter->dump_fmsg); err = reporter->ops->dump(reporter, reporter->dump_fmsg, priv_ctx, extack); if (err) goto dump_err; - err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg); + devlink_fmsg_obj_nest_end(reporter->dump_fmsg); + err = reporter->dump_fmsg->err; if (err) goto dump_err; @@ -675,63 +674,61 @@ static void devlink_fmsg_err_if_binary(struct devlink_fmsg *fmsg) fmsg->err = -EINVAL; } -static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, int attrtype) +static void devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, int attrtype) { struct devlink_fmsg_item *item; if (fmsg->err) - return fmsg->err; + return; item = kzalloc(sizeof(*item), GFP_KERNEL); if (!item) { fmsg->err = -ENOMEM; - return fmsg->err; + return; } item->attrtype = attrtype; list_add_tail(&item->list, &fmsg->item_list); - - return 0; } -int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) +void devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) { devlink_fmsg_err_if_binary(fmsg); - return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START); + devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START); } EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start); -static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg) +static void devlink_fmsg_nest_end(struct devlink_fmsg *fmsg) { devlink_fmsg_err_if_binary(fmsg); - return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END); + devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END); } -int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) +void devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) { - return devlink_fmsg_nest_end(fmsg); + devlink_fmsg_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end); #define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN) -static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) +static void devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) { struct devlink_fmsg_item *item; devlink_fmsg_err_if_binary(fmsg); if (fmsg->err) - return fmsg->err; + return; if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE) { fmsg->err = -EMSGSIZE; - return fmsg->err; + return; } item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL); if (!item) { fmsg->err = -ENOMEM; - return fmsg->err; + return; } item->nla_type = NLA_NUL_STRING; @@ -739,83 +736,76 @@ static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME; memcpy(&item->value, name, item->len); list_add_tail(&item->list, &fmsg->item_list); - - return 0; } -int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) +void devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) { devlink_fmsg_err_if_binary(fmsg); devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START); - return devlink_fmsg_put_name(fmsg, name); + devlink_fmsg_put_name(fmsg, name); } EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start); -int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) +void devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) { - return devlink_fmsg_nest_end(fmsg); + devlink_fmsg_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end); -int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name) +void devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name) { devlink_fmsg_pair_nest_start(fmsg, name); - return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START); + devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START); } EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start); -int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) +void devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) { devlink_fmsg_nest_end(fmsg); - return devlink_fmsg_nest_end(fmsg); + devlink_fmsg_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end); -int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name) +void devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name) { - int err; - - err = devlink_fmsg_arr_pair_nest_start(fmsg, name); - if (err) - return err; - + devlink_fmsg_arr_pair_nest_start(fmsg, name); fmsg->putting_binary = true; - return 0; } EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_start); -int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg) +void devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg) { if (fmsg->err) - return fmsg->err; + return; - if (!fmsg->putting_binary) { + if (!fmsg->putting_binary) fmsg->err = -EINVAL; - return fmsg->err; - } fmsg->putting_binary = false; - return devlink_fmsg_arr_pair_nest_end(fmsg); + devlink_fmsg_arr_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_end); -static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, - const void *value, u16 value_len, - u8 value_nla_type) +static void devlink_fmsg_put_value(struct devlink_fmsg *fmsg, + const void *value, u16 value_len, + u8 value_nla_type) { struct devlink_fmsg_item *item; + if (fmsg->err) + return; + if (value_len > DEVLINK_FMSG_MAX_SIZE) { fmsg->err = -EMSGSIZE; - return fmsg->err; + return; } item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL); if (!item) { fmsg->err = -ENOMEM; - return fmsg->err; + return; } item->nla_type = value_nla_type; @@ -823,125 +813,113 @@ static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA; memcpy(&item->value, value, item->len); list_add_tail(&item->list, &fmsg->item_list); - - return 0; } -static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) +static void devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) { devlink_fmsg_err_if_binary(fmsg); - return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); + devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); } -static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) +static void devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) { devlink_fmsg_err_if_binary(fmsg); - return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); + devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); } -int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) +void devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) { devlink_fmsg_err_if_binary(fmsg); - return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32); + devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32); } EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put); -static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) +static void devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) { devlink_fmsg_err_if_binary(fmsg); - return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); + devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); } -int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) +void devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) { devlink_fmsg_err_if_binary(fmsg); - return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, - NLA_NUL_STRING); + devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, NLA_NUL_STRING); } EXPORT_SYMBOL_GPL(devlink_fmsg_string_put); -int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, - u16 value_len) +void devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len) { - if (!fmsg->putting_binary) - return -EINVAL; + if (!fmsg->err && !fmsg->putting_binary) + fmsg->err = -EINVAL; - return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY); + devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY); } EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put); -int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, - bool value) +void devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, + bool value) { devlink_fmsg_pair_nest_start(fmsg, name); devlink_fmsg_bool_put(fmsg, value); - return devlink_fmsg_pair_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put); -int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, - u8 value) +void devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, + u8 value) { devlink_fmsg_pair_nest_start(fmsg, name); devlink_fmsg_u8_put(fmsg, value); - return devlink_fmsg_pair_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put); -int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, - u32 value) +void devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, + u32 value) { devlink_fmsg_pair_nest_start(fmsg, name); devlink_fmsg_u32_put(fmsg, value); - return devlink_fmsg_pair_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put); -int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, - u64 value) +void devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, + u64 value) { devlink_fmsg_pair_nest_start(fmsg, name); devlink_fmsg_u64_put(fmsg, value); - return devlink_fmsg_pair_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put); -int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, - const char *value) +void devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, + const char *value) { devlink_fmsg_pair_nest_start(fmsg, name); devlink_fmsg_string_put(fmsg, value); - return devlink_fmsg_pair_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put); -int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, - const void *value, u32 value_len) +void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, + const void *value, u32 value_len) { u32 data_size; u32 offset; - int err; - err = devlink_fmsg_binary_pair_nest_start(fmsg, name); - if (err) - return err; + devlink_fmsg_binary_pair_nest_start(fmsg, name); for (offset = 0; offset < value_len; offset += data_size) { data_size = value_len - offset; if (data_size > DEVLINK_FMSG_MAX_SIZE) data_size = DEVLINK_FMSG_MAX_SIZE; - err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); - if (err) - break; - /* Exit from loop with a break (instead of - * return) to make sure putting_binary is turned off - */ + + devlink_fmsg_binary_put(fmsg, value + offset, data_size); } - err = devlink_fmsg_binary_pair_nest_end(fmsg); + devlink_fmsg_binary_pair_nest_end(fmsg); fmsg->putting_binary = false; - - return err; } EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put); @@ -1051,6 +1029,9 @@ static int devlink_fmsg_snd(struct devlink_fmsg *fmsg, void *hdr; int err; + if (fmsg->err) + return fmsg->err; + while (!last) { int tmp_index = index; @@ -1104,6 +1085,9 @@ static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb, void *hdr; int err; + if (fmsg->err) + return fmsg->err; + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, cmd); if (!hdr) { @@ -1143,17 +1127,13 @@ int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, if (!fmsg) return -ENOMEM; - err = devlink_fmsg_obj_nest_start(fmsg); - if (err) - goto out; + devlink_fmsg_obj_nest_start(fmsg); err = reporter->ops->diagnose(reporter, fmsg, info->extack); if (err) goto out; - err = devlink_fmsg_obj_nest_end(fmsg); - if (err) - goto out; + devlink_fmsg_obj_nest_end(fmsg); err = devlink_fmsg_snd(fmsg, info, DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0); -- cgit v1.2.3 From 374d345d9b5e13380c66d7042f9533a6ac6d1195 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 18 Oct 2023 14:39:20 -0700 Subject: netlink: add variable-length / auto integers We currently push everyone to use padding to align 64b values in netlink. Un-padded nla_put_u64() doesn't even exist any more. The story behind this possibly start with this thread: https://lore.kernel.org/netdev/20121204.130914.1457976839967676240.davem@davemloft.net/ where DaveM was concerned about the alignment of a structure containing 64b stats. If user space tries to access such struct directly: struct some_stats *stats = nla_data(attr); printf("A: %llu", stats->a); lack of alignment may become problematic for some architectures. These days we most often put every single member in a separate attribute, meaning that the code above would use a helper like nla_get_u64(), which can deal with alignment internally. Even for arches which don't have good unaligned access - access aligned to 4B should be pretty efficient. Kernel and well known libraries deal with unaligned input already. Padded 64b is quite space-inefficient (64b + pad means at worst 16B per attr vs 32b which takes 8B). It is also more typing: if (nla_put_u64_pad(rsp, NETDEV_A_SOMETHING_SOMETHING, value, NETDEV_A_SOMETHING_PAD)) Create a new attribute type which will use 32 bits at netlink level if value is small enough (probably most of the time?), and (4B-aligned) 64 bits otherwise. Kernel API is just: if (nla_put_uint(rsp, NETDEV_A_SOMETHING_SOMETHING, value)) Calling this new type "just" sint / uint with no specific size will hopefully also make people more comfortable with using it. Currently telling people "don't use u8, you may need the bits, and netlink will round up to 4B, anyway" is the #1 comment we give to newcomers. In terms of netlink layout it looks like this: 0 4 8 12 16 32b: [nlattr][ u32 ] 64b: [ pad ][nlattr][ u64 ] uint(32) [nlattr][ u32 ] uint(64) [nlattr][ u64 ] Signed-off-by: Jakub Kicinski Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- Documentation/userspace-api/netlink/specs.rst | 18 ++++++- include/net/netlink.h | 69 ++++++++++++++++++++++++++- include/uapi/linux/netlink.h | 5 ++ lib/nlattr.c | 22 +++++++++ net/netlink/policy.c | 14 ++++-- 5 files changed, 121 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst index 40dd7442d2c3..c1b951649113 100644 --- a/Documentation/userspace-api/netlink/specs.rst +++ b/Documentation/userspace-api/netlink/specs.rst @@ -403,10 +403,21 @@ This section describes the attribute types supported by the ``genetlink`` compatibility level. Refer to documentation of different levels for additional attribute types. -Scalar integer types +Common integer types -------------------- -Fixed-width integer types: +``sint`` and ``uint`` represent signed and unsigned 64 bit integers. +If the value can fit on 32 bits only 32 bits are carried in netlink +messages, otherwise full 64 bits are carried. Note that the payload +is only aligned to 4B, so the full 64 bit value may be unaligned! + +Common integer types should be preferred over fix-width types in majority +of cases. + +Fix-width integer types +----------------------- + +Fixed-width integer types include: ``u8``, ``u16``, ``u32``, ``u64``, ``s8``, ``s16``, ``s32``, ``s64``. Note that types smaller than 32 bit should be avoided as using them @@ -416,6 +427,9 @@ See :ref:`pad_type` for padding of 64 bit attributes. The payload of the attribute is the integer in host order unless ``byte-order`` specifies otherwise. +64 bit values are usually aligned by the kernel but it is recommended +that the user space is able to deal with unaligned values. + .. _pad_type: pad diff --git a/include/net/netlink.h b/include/net/netlink.h index 8a7cd1170e1f..aba2b162a226 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -128,6 +128,8 @@ * nla_len(nla) length of attribute payload * * Attribute Payload Access for Basic Types: + * nla_get_uint(nla) get payload for a uint attribute + * nla_get_sint(nla) get payload for a sint attribute * nla_get_u8(nla) get payload for a u8 attribute * nla_get_u16(nla) get payload for a u16 attribute * nla_get_u32(nla) get payload for a u32 attribute @@ -183,6 +185,8 @@ enum { NLA_REJECT, NLA_BE16, NLA_BE32, + NLA_SINT, + NLA_UINT, __NLA_TYPE_MAX, }; @@ -229,6 +233,7 @@ enum nla_policy_validation { * nested header (or empty); len field is used if * nested_policy is also used, for the max attr * number in the nested policy. + * NLA_SINT, NLA_UINT, * NLA_U8, NLA_U16, * NLA_U32, NLA_U64, * NLA_S8, NLA_S16, @@ -260,12 +265,14 @@ enum nla_policy_validation { * while an array has the nested attributes at another * level down and the attribute types directly in the * nesting don't matter. + * NLA_UINT, * NLA_U8, * NLA_U16, * NLA_U32, * NLA_U64, * NLA_BE16, * NLA_BE32, + * NLA_SINT, * NLA_S8, * NLA_S16, * NLA_S32, @@ -280,6 +287,7 @@ enum nla_policy_validation { * or NLA_POLICY_FULL_RANGE_SIGNED() macros instead. * Use the NLA_POLICY_MIN(), NLA_POLICY_MAX() and * NLA_POLICY_RANGE() macros. + * NLA_UINT, * NLA_U8, * NLA_U16, * NLA_U32, @@ -288,6 +296,7 @@ enum nla_policy_validation { * to a struct netlink_range_validation that indicates * the min/max values. * Use NLA_POLICY_FULL_RANGE(). + * NLA_SINT, * NLA_S8, * NLA_S16, * NLA_S32, @@ -377,9 +386,11 @@ struct nla_policy { #define __NLA_IS_UINT_TYPE(tp) \ (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || \ - tp == NLA_U64 || tp == NLA_BE16 || tp == NLA_BE32) + tp == NLA_U64 || tp == NLA_UINT || \ + tp == NLA_BE16 || tp == NLA_BE32) #define __NLA_IS_SINT_TYPE(tp) \ - (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64) + (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64 || \ + tp == NLA_SINT) #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) #define NLA_ENSURE_UINT_TYPE(tp) \ @@ -1357,6 +1368,22 @@ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) return nla_put(skb, attrtype, sizeof(u32), &tmp); } +/** + * nla_put_uint - Add a variable-size unsigned int to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_uint(struct sk_buff *skb, int attrtype, u64 value) +{ + u64 tmp64 = value; + u32 tmp32 = value; + + if (tmp64 == tmp32) + return nla_put_u32(skb, attrtype, tmp32); + return nla_put(skb, attrtype, sizeof(u64), &tmp64); +} + /** * nla_put_be32 - Add a __be32 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to @@ -1511,6 +1538,22 @@ static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value, return nla_put_64bit(skb, attrtype, sizeof(s64), &tmp, padattr); } +/** + * nla_put_sint - Add a variable-size signed int to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_sint(struct sk_buff *skb, int attrtype, s64 value) +{ + s64 tmp64 = value; + s32 tmp32 = value; + + if (tmp64 == tmp32) + return nla_put_s32(skb, attrtype, tmp32); + return nla_put(skb, attrtype, sizeof(s64), &tmp64); +} + /** * nla_put_string - Add a string netlink attribute to a socket buffer * @skb: socket buffer to add attribute to @@ -1667,6 +1710,17 @@ static inline u64 nla_get_u64(const struct nlattr *nla) return tmp; } +/** + * nla_get_uint - return payload of uint attribute + * @nla: uint netlink attribute + */ +static inline u64 nla_get_uint(const struct nlattr *nla) +{ + if (nla_len(nla) == sizeof(u32)) + return nla_get_u32(nla); + return nla_get_u64(nla); +} + /** * nla_get_be64 - return payload of __be64 attribute * @nla: __be64 netlink attribute @@ -1729,6 +1783,17 @@ static inline s64 nla_get_s64(const struct nlattr *nla) return tmp; } +/** + * nla_get_sint - return payload of uint attribute + * @nla: uint netlink attribute + */ +static inline s64 nla_get_sint(const struct nlattr *nla) +{ + if (nla_len(nla) == sizeof(s32)) + return nla_get_s32(nla); + return nla_get_s64(nla); +} + /** * nla_get_flag - return payload of flag attribute * @nla: flag netlink attribute diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index e2ae82e3f9f7..f87aaf28a649 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -298,6 +298,8 @@ struct nla_bitfield32 { * entry has attributes again, the policy for those inner ones * and the corresponding maxtype may be specified. * @NL_ATTR_TYPE_BITFIELD32: &struct nla_bitfield32 attribute + * @NL_ATTR_TYPE_SINT: 32-bit or 64-bit signed attribute, aligned to 4B + * @NL_ATTR_TYPE_UINT: 32-bit or 64-bit unsigned attribute, aligned to 4B */ enum netlink_attribute_type { NL_ATTR_TYPE_INVALID, @@ -322,6 +324,9 @@ enum netlink_attribute_type { NL_ATTR_TYPE_NESTED_ARRAY, NL_ATTR_TYPE_BITFIELD32, + + NL_ATTR_TYPE_SINT, + NL_ATTR_TYPE_UINT, }; /** diff --git a/lib/nlattr.c b/lib/nlattr.c index 7a2b6c38fd59..dc15e7888fc1 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -134,6 +134,7 @@ void nla_get_range_unsigned(const struct nla_policy *pt, range->max = U32_MAX; break; case NLA_U64: + case NLA_UINT: case NLA_MSECS: range->max = U64_MAX; break; @@ -183,6 +184,9 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt, case NLA_U64: value = nla_get_u64(nla); break; + case NLA_UINT: + value = nla_get_uint(nla); + break; case NLA_MSECS: value = nla_get_u64(nla); break; @@ -248,6 +252,7 @@ void nla_get_range_signed(const struct nla_policy *pt, range->max = S32_MAX; break; case NLA_S64: + case NLA_SINT: range->min = S64_MIN; range->max = S64_MAX; break; @@ -295,6 +300,9 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt, case NLA_S64: value = nla_get_s64(nla); break; + case NLA_SINT: + value = nla_get_sint(nla); + break; default: return -EINVAL; } @@ -320,6 +328,7 @@ static int nla_validate_int_range(const struct nla_policy *pt, case NLA_U16: case NLA_U32: case NLA_U64: + case NLA_UINT: case NLA_MSECS: case NLA_BINARY: case NLA_BE16: @@ -329,6 +338,7 @@ static int nla_validate_int_range(const struct nla_policy *pt, case NLA_S16: case NLA_S32: case NLA_S64: + case NLA_SINT: return nla_validate_int_range_signed(pt, nla, extack); default: WARN_ON(1); @@ -355,6 +365,9 @@ static int nla_validate_mask(const struct nla_policy *pt, case NLA_U64: value = nla_get_u64(nla); break; + case NLA_UINT: + value = nla_get_uint(nla); + break; case NLA_BE16: value = ntohs(nla_get_be16(nla)); break; @@ -433,6 +446,15 @@ static int validate_nla(const struct nlattr *nla, int maxtype, goto out_err; break; + case NLA_SINT: + case NLA_UINT: + if (attrlen != sizeof(u32) && attrlen != sizeof(u64)) { + NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, + "invalid attribute length"); + return -EINVAL; + } + break; + case NLA_BITFIELD32: if (attrlen != sizeof(struct nla_bitfield32)) goto out_err; diff --git a/net/netlink/policy.c b/net/netlink/policy.c index e2f111edf66c..1f8909c16f14 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -230,6 +230,8 @@ int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt) case NLA_S16: case NLA_S32: case NLA_S64: + case NLA_SINT: + case NLA_UINT: /* maximum is common, u64 min/max with padding */ return common + 2 * (nla_attr_size(0) + nla_attr_size(sizeof(u64))); @@ -288,6 +290,7 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, case NLA_U16: case NLA_U32: case NLA_U64: + case NLA_UINT: case NLA_MSECS: { struct netlink_range_validation range; @@ -297,8 +300,10 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, type = NL_ATTR_TYPE_U16; else if (pt->type == NLA_U32) type = NL_ATTR_TYPE_U32; - else + else if (pt->type == NLA_U64) type = NL_ATTR_TYPE_U64; + else + type = NL_ATTR_TYPE_UINT; if (pt->validation_type == NLA_VALIDATE_MASK) { if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MASK, @@ -320,7 +325,8 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, case NLA_S8: case NLA_S16: case NLA_S32: - case NLA_S64: { + case NLA_S64: + case NLA_SINT: { struct netlink_range_validation_signed range; if (pt->type == NLA_S8) @@ -329,8 +335,10 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, type = NL_ATTR_TYPE_S16; else if (pt->type == NLA_S32) type = NL_ATTR_TYPE_S32; - else + else if (pt->type == NLA_S64) type = NL_ATTR_TYPE_S64; + else + type = NL_ATTR_TYPE_SINT; nla_get_range_signed(pt, &range); -- cgit v1.2.3 From b4a11b2033b7d3dfdd46592f7036a775b18cecd1 Mon Sep 17 00:00:00 2001 From: Heng Guo Date: Thu, 19 Oct 2023 09:20:53 +0800 Subject: net: fix IPSTATS_MIB_OUTPKGS increment in OutForwDatagrams. Reproduce environment: network with 3 VM linuxs is connected as below: VM1<---->VM2(latest kernel 6.5.0-rc7)<---->VM3 VM1: eth0 ip: 192.168.122.207 MTU 1500 VM2: eth0 ip: 192.168.122.208, eth1 ip: 192.168.123.224 MTU 1500 VM3: eth0 ip: 192.168.123.240 MTU 1500 Reproduce: VM1 send 1400 bytes UDP data to VM3 using tools scapy with flags=0. scapy command: send(IP(dst="192.168.123.240",flags=0)/UDP()/str('0'*1400),count=1, inter=1.000000) Result: Before IP data is sent. ---------------------------------------------------------------------- root@qemux86-64:~# cat /proc/net/snmp Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates Ip: 1 64 11 0 3 4 0 0 4 7 0 0 0 0 0 0 0 0 0 ...... ---------------------------------------------------------------------- After IP data is sent. ---------------------------------------------------------------------- root@qemux86-64:~# cat /proc/net/snmp Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates Ip: 1 64 12 0 3 5 0 0 4 8 0 0 0 0 0 0 0 0 0 ...... ---------------------------------------------------------------------- "ForwDatagrams" increase from 4 to 5 and "OutRequests" also increase from 7 to 8. Issue description and patch: IPSTATS_MIB_OUTPKTS("OutRequests") is counted with IPSTATS_MIB_OUTOCTETS ("OutOctets") in ip_finish_output2(). According to RFC 4293, it is "OutOctets" counted with "OutTransmits" but not "OutRequests". "OutRequests" does not include any datagrams counted in "ForwDatagrams". ipSystemStatsOutOctets OBJECT-TYPE DESCRIPTION "The total number of octets in IP datagrams delivered to the lower layers for transmission. Octets from datagrams counted in ipIfStatsOutTransmits MUST be counted here. ipSystemStatsOutRequests OBJECT-TYPE DESCRIPTION "The total number of IP datagrams that local IP user- protocols (including ICMP) supplied to IP in requests for transmission. Note that this counter does not include any datagrams counted in ipSystemStatsOutForwDatagrams. So do patch to define IPSTATS_MIB_OUTPKTS to "OutTransmits" and add IPSTATS_MIB_OUTREQUESTS for "OutRequests". Add IPSTATS_MIB_OUTREQUESTS counter in __ip_local_out() for ipv4 and add IPSTATS_MIB_OUT counter in ip6_finish_output2() for ipv6. Test result with patch: Before IP data is sent. ---------------------------------------------------------------------- root@qemux86-64:~# cat /proc/net/snmp Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates OutTransmits Ip: 1 64 9 0 5 1 0 0 3 3 0 0 0 0 0 0 0 0 0 4 ...... root@qemux86-64:~# cat /proc/net/netstat ...... IpExt: InNoRoutes InTruncatedPkts InMcastPkts OutMcastPkts InBcastPkts OutBcastPkts InOctets OutOctets InMcastOctets OutMcastOctets InBcastOctets OutBcastOctets InCsumErrors InNoECTPkts InECT1Pkts InECT0Pkts InCEPkts ReasmOverlaps IpExt: 0 0 0 0 0 0 2976 1896 0 0 0 0 0 9 0 0 0 0 ---------------------------------------------------------------------- After IP data is sent. ---------------------------------------------------------------------- root@qemux86-64:~# cat /proc/net/snmp Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates OutTransmits Ip: 1 64 10 0 5 2 0 0 3 3 0 0 0 0 0 0 0 0 0 5 ...... root@qemux86-64:~# cat /proc/net/netstat ...... IpExt: InNoRoutes InTruncatedPkts InMcastPkts OutMcastPkts InBcastPkts OutBcastPkts InOctets OutOctets InMcastOctets OutMcastOctets InBcastOctets OutBcastOctets InCsumErrors InNoECTPkts InECT1Pkts InECT0Pkts InCEPkts ReasmOverlaps IpExt: 0 0 0 0 0 0 4404 3324 0 0 0 0 0 10 0 0 0 0 ---------------------------------------------------------------------- "ForwDatagrams" increase from 1 to 2 and "OutRequests" is keeping 3. "OutTransmits" increase from 4 to 5 and "OutOctets" increase 1428. Signed-off-by: Heng Guo Reviewed-by: Kun Song Reviewed-by: Filip Pudak Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 3 ++- net/ipv4/ip_output.c | 2 ++ net/ipv4/proc.c | 3 ++- net/ipv6/ip6_output.c | 6 ++++-- net/ipv6/mcast.c | 5 ++--- net/ipv6/ndisc.c | 2 +- net/ipv6/proc.c | 3 ++- net/ipv6/raw.c | 2 +- 8 files changed, 16 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 26f33a4c253d..b2b72886cb6d 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -24,7 +24,7 @@ enum IPSTATS_MIB_INOCTETS, /* InOctets */ IPSTATS_MIB_INDELIVERS, /* InDelivers */ IPSTATS_MIB_OUTFORWDATAGRAMS, /* OutForwDatagrams */ - IPSTATS_MIB_OUTPKTS, /* OutRequests */ + IPSTATS_MIB_OUTREQUESTS, /* OutRequests */ IPSTATS_MIB_OUTOCTETS, /* OutOctets */ /* other fields */ IPSTATS_MIB_INHDRERRORS, /* InHdrErrors */ @@ -57,6 +57,7 @@ enum IPSTATS_MIB_ECT0PKTS, /* InECT0Pkts */ IPSTATS_MIB_CEPKTS, /* InCEPkts */ IPSTATS_MIB_REASM_OVERLAPS, /* ReasmOverlaps */ + IPSTATS_MIB_OUTPKTS, /* OutTransmits */ __IPSTATS_MIB_MAX }; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 89e62ed08dad..b06f678b03a1 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -101,6 +101,8 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); + IP_INC_STATS(net, IPSTATS_MIB_OUTREQUESTS); + iph_set_totlen(iph, skb->len); ip_send_check(iph); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index eaf1d3113b62..a85b0aba3646 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -83,7 +83,7 @@ static const struct snmp_mib snmp4_ipstats_list[] = { SNMP_MIB_ITEM("InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS), SNMP_MIB_ITEM("InDiscards", IPSTATS_MIB_INDISCARDS), SNMP_MIB_ITEM("InDelivers", IPSTATS_MIB_INDELIVERS), - SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTPKTS), + SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTREQUESTS), SNMP_MIB_ITEM("OutDiscards", IPSTATS_MIB_OUTDISCARDS), SNMP_MIB_ITEM("OutNoRoutes", IPSTATS_MIB_OUTNOROUTES), SNMP_MIB_ITEM("ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT), @@ -93,6 +93,7 @@ static const struct snmp_mib snmp4_ipstats_list[] = { SNMP_MIB_ITEM("FragOKs", IPSTATS_MIB_FRAGOKS), SNMP_MIB_ITEM("FragFails", IPSTATS_MIB_FRAGFAILS), SNMP_MIB_ITEM("FragCreates", IPSTATS_MIB_FRAGCREATES), + SNMP_MIB_ITEM("OutTransmits", IPSTATS_MIB_OUTPKTS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a471c7e91761..571c10fb00b1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -117,6 +117,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * return res; } + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + rcu_read_lock(); nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); @@ -328,7 +330,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -1987,7 +1989,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->tstamp = cork->base.transmit_time; ip6_cork_steal_dst(skb, cork); - IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); u8 icmp6_type; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 99e28b444a4c..b75d3c9d41bb 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1789,7 +1789,7 @@ static void mld_sendpack(struct sk_buff *skb) rcu_read_lock(); idev = __in6_dev_get(skb->dev); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) - sizeof(*pip6); @@ -2147,8 +2147,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) full_len = sizeof(struct ipv6hdr) + payload_len; rcu_read_lock(); - IP6_UPD_PO_STATS(net, __in6_dev_get(dev), - IPSTATS_MIB_OUT, full_len); + IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS); rcu_read_unlock(); skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 679443d7ecb5..a19999b30bc0 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -504,7 +504,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, rcu_read_lock(); idev = __in6_dev_get(dst->dev); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, dst->dev, diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index e20b3705c2d2..6d1d9221649d 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -61,7 +61,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = { SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS), SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS), SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS), - SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTPKTS), + SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS), SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS), SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES), SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT), @@ -84,6 +84,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = { SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS), SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS), SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS), + SNMP_MIB_ITEM("Ip6OutTransmits", IPSTATS_MIB_OUTPKTS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a2aa54a2baae..dd0a4e73e602 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -651,7 +651,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, * have been queued for deletion. */ rcu_read_lock(); - IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); if (err > 0) -- cgit v1.2.3 From 20c6e05bd33deaa6fa890252d7ffc5ad54a0942c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 19 Oct 2023 08:28:15 -0700 Subject: ethtool: untangle the linkmode and ethtool headers Commit 26c5334d344d ("ethtool: Add forced speed to supported link modes maps") added a dependency between ethtool.h and linkmode.h. The dependency in the opposite direction already exists so the new code was inserted in an awkward place. The reason for ethtool.h to include linkmode.h, is that ethtool_forced_speed_maps_init() is a static inline helper. That's not really necessary. Signed-off-by: Jakub Kicinski Reviewed-by: Paul Greenwalt Reviewed-by: Russell King (Oracle) Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/ethtool.h | 22 ++-------------------- include/linux/linkmode.h | 29 ++++++++++++++--------------- net/ethtool/common.c | 21 +++++++++++++++++++++ 3 files changed, 37 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 8e91e8b8a693..226a36ed5aa1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -13,7 +13,6 @@ #ifndef _LINUX_ETHTOOL_H #define _LINUX_ETHTOOL_H -#include #include #include #include @@ -1070,23 +1069,6 @@ struct ethtool_forced_speed_map { .arr_size = ARRAY_SIZE(prefix##_##value), \ } -/** - * ethtool_forced_speed_maps_init - * @maps: Pointer to an array of Ethtool forced speed map - * @size: Array size - * - * Initialize an array of Ethtool forced speed map to Ethtool link modes. This - * should be called during driver module init. - */ -static inline void -ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size) -{ - for (u32 i = 0; i < size; i++) { - struct ethtool_forced_speed_map *map = &maps[i]; - - linkmode_set_bit_array(map->cap_arr, map->arr_size, map->caps); - map->cap_arr = NULL; - map->arr_size = 0; - } -} +void +ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size); #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index cd38f89553e6..7303b4bc2ce0 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -2,21 +2,6 @@ #define __LINKMODE_H #include - -static inline void linkmode_set_bit(int nr, volatile unsigned long *addr) -{ - __set_bit(nr, addr); -} - -static inline void linkmode_set_bit_array(const int *array, int array_size, - unsigned long *addr) -{ - int i; - - for (i = 0; i < array_size; i++) - linkmode_set_bit(array[i], addr); -} - #include #include @@ -53,6 +38,11 @@ static inline int linkmode_andnot(unsigned long *dst, const unsigned long *src1, return bitmap_andnot(dst, src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); } +static inline void linkmode_set_bit(int nr, volatile unsigned long *addr) +{ + __set_bit(nr, addr); +} + static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr) { __clear_bit(nr, addr); @@ -72,6 +62,15 @@ static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr) return test_bit(nr, addr); } +static inline void linkmode_set_bit_array(const int *array, int array_size, + unsigned long *addr) +{ + int i; + + for (i = 0; i < array_size; i++) + linkmode_set_bit(array[i], addr); +} + static inline int linkmode_equal(const unsigned long *src1, const unsigned long *src2) { diff --git a/net/ethtool/common.c b/net/ethtool/common.c index f5598c5f50de..b4419fb6df6a 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -685,3 +685,24 @@ ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, link_ksettings->base.duplex = link_info->duplex; } EXPORT_SYMBOL_GPL(ethtool_params_from_link_mode); + +/** + * ethtool_forced_speed_maps_init + * @maps: Pointer to an array of Ethtool forced speed map + * @size: Array size + * + * Initialize an array of Ethtool forced speed map to Ethtool link modes. This + * should be called during driver module init. + */ +void +ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size) +{ + for (u32 i = 0; i < size; i++) { + struct ethtool_forced_speed_map *map = &maps[i]; + + linkmode_set_bit_array(map->cap_arr, map->arr_size, map->caps); + map->cap_arr = NULL; + map->arr_size = 0; + } +} +EXPORT_SYMBOL_GPL(ethtool_forced_speed_maps_init); -- cgit v1.2.3 From 92fc97ae9cfd1e8c13d973ac92d224a185056840 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Fri, 20 Oct 2023 12:21:16 +0000 Subject: net: atm: Remove redundant check. Checking the 'adev' variable is unnecessary, because 'cdev' has already been checked earlier. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 656d98b09d57 ("[ATM]: basic sysfs support for ATM devices") Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller --- net/atm/atm_sysfs.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 466353b3dde4..54e7fb1a4ee5 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -116,8 +116,6 @@ static int atm_uevent(const struct device *cdev, struct kobj_uevent_env *env) return -ENODEV; adev = to_atm_dev(cdev); - if (!adev) - return -ENODEV; if (add_uevent_var(env, "NAME=%s%d", adev->type, adev->number)) return -ENOMEM; -- cgit v1.2.3 From fc47e86dbfb75a864c0c9dd8e78affb6506296bb Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Fri, 20 Oct 2023 13:55:25 +0200 Subject: ipv6: rename and move ip6_dst_lookup_tunnel() At the moment ip6_dst_lookup_tunnel() is used only by bareudp. Ideally, other UDP tunnel implementations should use it, but to do so the function needs to accept new parameters that are specific for UDP tunnels, such as the ports. Prepare for these changes by renaming the function to udp_tunnel6_dst_lookup() and move it to file net/ipv6/ip6_udp_tunnel.c. This is similar to what already done for IPv4 in commit bf3fcbf7e7a0 ("ipv4: rename and move ip_route_output_tunnel()"). Suggested-by: Guillaume Nault Signed-off-by: Beniamino Galvani Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bareudp.c | 10 +++---- include/net/ipv6.h | 6 ----- include/net/udp_tunnel.h | 7 +++++ net/ipv6/ip6_output.c | 68 ---------------------------------------------- net/ipv6/ip6_udp_tunnel.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 81 insertions(+), 79 deletions(-) (limited to 'net') diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 47a9c2a5583c..9a0a1a9f6cfe 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -371,8 +371,8 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (!sock) return -ESHUTDOWN; - dst = ip6_dst_lookup_tunnel(skb, dev, bareudp->net, sock, &saddr, info, - IPPROTO_UDP, use_cache); + dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, &saddr, info, + IPPROTO_UDP, use_cache); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -498,9 +498,9 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, struct in6_addr saddr; struct socket *sock = rcu_dereference(bareudp->sock); - dst = ip6_dst_lookup_tunnel(skb, dev, bareudp->net, sock, - &saddr, info, IPPROTO_UDP, - use_cache); + dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, + &saddr, info, IPPROTO_UDP, + use_cache); if (IS_ERR(dst)) return PTR_ERR(dst); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index b3444c8a6f74..78d38dd88aba 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1133,12 +1133,6 @@ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, st struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool connected); -struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, struct socket *sock, - struct in6_addr *saddr, - const struct ip_tunnel_info *info, - u8 protocol, bool use_cache); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *orig_dst); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 4d0578fab01a..1dac296d8449 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -169,6 +169,13 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, const struct ip_tunnel_key *key, __be16 sport, __be16 dport, u8 tos, struct dst_cache *dst_cache); +struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, + struct socket *sock, + struct in6_addr *saddr, + const struct ip_tunnel_info *info, + u8 protocol, bool use_cache); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 571c10fb00b1..3c7de89d6755 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1283,74 +1283,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); -/** - * ip6_dst_lookup_tunnel - perform route lookup on tunnel - * @skb: Packet for which lookup is done - * @dev: Tunnel device - * @net: Network namespace of tunnel device - * @sock: Socket which provides route info - * @saddr: Memory to store the src ip address - * @info: Tunnel information - * @protocol: IP protocol - * @use_cache: Flag to enable cache usage - * This function performs a route lookup on a tunnel - * - * It returns a valid dst pointer and stores src address to be used in - * tunnel in param saddr on success, else a pointer encoded error code. - */ - -struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, - struct socket *sock, - struct in6_addr *saddr, - const struct ip_tunnel_info *info, - u8 protocol, - bool use_cache) -{ - struct dst_entry *dst = NULL; -#ifdef CONFIG_DST_CACHE - struct dst_cache *dst_cache; -#endif - struct flowi6 fl6; - __u8 prio; - -#ifdef CONFIG_DST_CACHE - dst_cache = (struct dst_cache *)&info->dst_cache; - if (use_cache) { - dst = dst_cache_get_ip6(dst_cache, saddr); - if (dst) - return dst; - } -#endif - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_mark = skb->mark; - fl6.flowi6_proto = protocol; - fl6.daddr = info->key.u.ipv6.dst; - fl6.saddr = info->key.u.ipv6.src; - prio = info->key.tos; - fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label); - - dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, - NULL); - if (IS_ERR(dst)) { - netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); - return ERR_PTR(-ENETUNREACH); - } - if (dst->dev == dev) { /* is this necessary? */ - netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); - dst_release(dst); - return ERR_PTR(-ELOOP); - } -#ifdef CONFIG_DST_CACHE - if (use_cache) - dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); -#endif - *saddr = fl6.saddr; - return dst; -} -EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); - static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 70d38705c92f..fc122abf6b75 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -1,3 +1,4 @@ + // SPDX-License-Identifier: GPL-2.0-only #include #include @@ -112,4 +113,72 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, } EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); +/** + * udp_tunnel6_dst_lookup - perform route lookup on UDP tunnel + * @skb: Packet for which lookup is done + * @dev: Tunnel device + * @net: Network namespace of tunnel device + * @sock: Socket which provides route info + * @saddr: Memory to store the src ip address + * @info: Tunnel information + * @protocol: IP protocol + * @use_cache: Flag to enable cache usage + * This function performs a route lookup on a UDP tunnel + * + * It returns a valid dst pointer and stores src address to be used in + * tunnel in param saddr on success, else a pointer encoded error code. + */ + +struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, + struct socket *sock, + struct in6_addr *saddr, + const struct ip_tunnel_info *info, + u8 protocol, + bool use_cache) +{ + struct dst_entry *dst = NULL; +#ifdef CONFIG_DST_CACHE + struct dst_cache *dst_cache; +#endif + struct flowi6 fl6; + __u8 prio; + +#ifdef CONFIG_DST_CACHE + dst_cache = (struct dst_cache *)&info->dst_cache; + if (use_cache) { + dst = dst_cache_get_ip6(dst_cache, saddr); + if (dst) + return dst; + } +#endif + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = protocol; + fl6.daddr = info->key.u.ipv6.dst; + fl6.saddr = info->key.u.ipv6.src; + prio = info->key.tos; + fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label); + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, + NULL); + if (IS_ERR(dst)) { + netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); + return ERR_PTR(-ENETUNREACH); + } + if (dst->dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); + dst_release(dst); + return ERR_PTR(-ELOOP); + } +#ifdef CONFIG_DST_CACHE + if (use_cache) + dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); +#endif + *saddr = fl6.saddr; + return dst; +} +EXPORT_SYMBOL_GPL(udp_tunnel6_dst_lookup); + MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 7e937dcf96d0489b3cdd1cff9dfd049617d28492 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Fri, 20 Oct 2023 13:55:26 +0200 Subject: ipv6: remove "proto" argument from udp_tunnel6_dst_lookup() The function is now UDP-specific, the protocol is always IPPROTO_UDP. This is similar to what already done for IPv4 in commit 78f3655adcb5 ("ipv4: remove "proto" argument from udp_tunnel_dst_lookup()"). Suggested-by: Guillaume Nault Signed-off-by: Beniamino Galvani Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bareudp.c | 5 ++--- include/net/udp_tunnel.h | 2 +- net/ipv6/ip6_udp_tunnel.c | 4 +--- 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 9a0a1a9f6cfe..9eb5e11c09b4 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -372,7 +372,7 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, return -ESHUTDOWN; dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, &saddr, info, - IPPROTO_UDP, use_cache); + use_cache); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -499,8 +499,7 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, struct socket *sock = rcu_dereference(bareudp->sock); dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, - &saddr, info, IPPROTO_UDP, - use_cache); + &saddr, info, use_cache); if (IS_ERR(dst)) return PTR_ERR(dst); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 1dac296d8449..583867643bd1 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -175,7 +175,7 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct socket *sock, struct in6_addr *saddr, const struct ip_tunnel_info *info, - u8 protocol, bool use_cache); + bool use_cache); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index fc122abf6b75..b9c906518ce2 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -121,7 +121,6 @@ EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); * @sock: Socket which provides route info * @saddr: Memory to store the src ip address * @info: Tunnel information - * @protocol: IP protocol * @use_cache: Flag to enable cache usage * This function performs a route lookup on a UDP tunnel * @@ -135,7 +134,6 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct socket *sock, struct in6_addr *saddr, const struct ip_tunnel_info *info, - u8 protocol, bool use_cache) { struct dst_entry *dst = NULL; @@ -155,7 +153,7 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, #endif memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = skb->mark; - fl6.flowi6_proto = protocol; + fl6.flowi6_proto = IPPROTO_UDP; fl6.daddr = info->key.u.ipv6.dst; fl6.saddr = info->key.u.ipv6.src; prio = info->key.tos; -- cgit v1.2.3 From 946fcfdbc5b97e26d31339ebca2d9a51a4f975ff Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Fri, 20 Oct 2023 13:55:27 +0200 Subject: ipv6: add new arguments to udp_tunnel6_dst_lookup() We want to make the function more generic so that it can be used by other UDP tunnel implementations such as geneve and vxlan. To do that, add the following arguments: - source and destination UDP port; - ifindex of the output interface, needed by vxlan; - the tos, because in some cases it is not taken from struct ip_tunnel_info (for example, when it's inherited from the inner packet); - the dst cache, because not all tunnel types (e.g. vxlan) want to use the one from struct ip_tunnel_info. With these parameters, the function no longer needs the full struct ip_tunnel_info as argument and we can pass only the relevant part of it (struct ip_tunnel_key). This is similar to what already done for IPv4 in commit 72fc68c6356b ("ipv4: add new arguments to udp_tunnel_dst_lookup()"). Suggested-by: Guillaume Nault Signed-off-by: Beniamino Galvani Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bareudp.c | 10 +++++++--- include/net/udp_tunnel.h | 7 ++++--- net/ipv6/ip6_udp_tunnel.c | 33 ++++++++++++++++++--------------- 3 files changed, 29 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 9eb5e11c09b4..9c11a0d0273b 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -371,8 +371,10 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (!sock) return -ESHUTDOWN; - dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, &saddr, info, - use_cache); + dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, 0, &saddr, + key, 0, 0, key->tos, + use_cache ? + (struct dst_cache *) &info->dst_cache : NULL); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -499,7 +501,9 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, struct socket *sock = rcu_dereference(bareudp->sock); dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, - &saddr, info, use_cache); + 0, &saddr, &info->key, + 0, 0, info->key.tos, + use_cache ? &info->dst_cache : NULL); if (IS_ERR(dst)) return PTR_ERR(dst); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 583867643bd1..d716214fe03d 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -172,10 +172,11 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct net_device *dev, struct net *net, - struct socket *sock, + struct socket *sock, int oif, struct in6_addr *saddr, - const struct ip_tunnel_info *info, - bool use_cache); + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 dsfield, + struct dst_cache *dst_cache); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index b9c906518ce2..a7bf0327b380 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -119,9 +119,13 @@ EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); * @dev: Tunnel device * @net: Network namespace of tunnel device * @sock: Socket which provides route info + * @oif: Index of the output interface * @saddr: Memory to store the src ip address - * @info: Tunnel information - * @use_cache: Flag to enable cache usage + * @key: Tunnel information + * @sport: UDP source port + * @dport: UDP destination port + * @dsfield: The traffic class field + * @dst_cache: The dst cache to use for lookup * This function performs a route lookup on a UDP tunnel * * It returns a valid dst pointer and stores src address to be used in @@ -132,20 +136,17 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct net_device *dev, struct net *net, struct socket *sock, + int oif, struct in6_addr *saddr, - const struct ip_tunnel_info *info, - bool use_cache) + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 dsfield, + struct dst_cache *dst_cache) { struct dst_entry *dst = NULL; -#ifdef CONFIG_DST_CACHE - struct dst_cache *dst_cache; -#endif struct flowi6 fl6; - __u8 prio; #ifdef CONFIG_DST_CACHE - dst_cache = (struct dst_cache *)&info->dst_cache; - if (use_cache) { + if (dst_cache) { dst = dst_cache_get_ip6(dst_cache, saddr); if (dst) return dst; @@ -154,10 +155,12 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; - fl6.daddr = info->key.u.ipv6.dst; - fl6.saddr = info->key.u.ipv6.src; - prio = info->key.tos; - fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label); + fl6.flowi6_oif = oif; + fl6.daddr = key->u.ipv6.dst; + fl6.saddr = key->u.ipv6.src; + fl6.fl6_sport = sport; + fl6.fl6_dport = dport; + fl6.flowlabel = ip6_make_flowinfo(dsfield, key->label); dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, NULL); @@ -171,7 +174,7 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, return ERR_PTR(-ELOOP); } #ifdef CONFIG_DST_CACHE - if (use_cache) + if (dst_cache) dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); #endif *saddr = fl6.saddr; -- cgit v1.2.3 From 73ed8e03388d16c12fc577e5c700b58a29045a15 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2023 12:57:37 +0000 Subject: tcp: fix cookie_init_timestamp() overflows cookie_init_timestamp() is supposed to return a 64bit timestamp suitable for both TSval determination and setting of skb->tstamp. Unfortunately it uses 32bit fields and overflows after 2^32 * 10^6 nsec (~49 days) of uptime. Generated TSval are still correct, but skb->tstamp might be set far away in the past, potentially confusing other layers. tcp_ns_to_ts() is changed to return a full 64bit value, ts and ts_now variables are changed to u64 type, and TSMASK is removed in favor of shifts operations. While we are at it, change this sequence: ts >>= TSBITS; ts--; ts <<= TSBITS; ts |= options; to: ts -= (1UL << TSBITS); Fixes: 9a568de4818d ("tcp: switch TCP TS option (RFC 7323) to 1ms clock") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/syncookies.c | 20 +++++++------------- 2 files changed, 8 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index bad304d173a5..d47a57a47b50 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -805,7 +805,7 @@ static inline u32 tcp_time_stamp(const struct tcp_sock *tp) } /* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */ -static inline u32 tcp_ns_to_ts(u64 ns) +static inline u64 tcp_ns_to_ts(u64 ns) { return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ); } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index dc478a0574cb..3b4dafefb4b0 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -41,7 +41,6 @@ static siphash_aligned_key_t syncookie_secret[2]; * requested/supported by the syn/synack exchange. */ #define TSBITS 6 -#define TSMASK (((__u32)1 << TSBITS) - 1) static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, u32 count, int c) @@ -62,27 +61,22 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, */ u64 cookie_init_timestamp(struct request_sock *req, u64 now) { - struct inet_request_sock *ireq; - u32 ts, ts_now = tcp_ns_to_ts(now); + const struct inet_request_sock *ireq = inet_rsk(req); + u64 ts, ts_now = tcp_ns_to_ts(now); u32 options = 0; - ireq = inet_rsk(req); - options = ireq->wscale_ok ? ireq->snd_wscale : TS_OPT_WSCALE_MASK; if (ireq->sack_ok) options |= TS_OPT_SACK; if (ireq->ecn_ok) options |= TS_OPT_ECN; - ts = ts_now & ~TSMASK; + ts = (ts_now >> TSBITS) << TSBITS; ts |= options; - if (ts > ts_now) { - ts >>= TSBITS; - ts--; - ts <<= TSBITS; - ts |= options; - } - return (u64)ts * (NSEC_PER_SEC / TCP_TS_HZ); + if (ts > ts_now) + ts -= (1UL << TSBITS); + + return ts * (NSEC_PER_SEC / TCP_TS_HZ); } -- cgit v1.2.3 From 99d679556d737a14391c68e562d94076c2983252 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2023 12:57:38 +0000 Subject: tcp: add tcp_time_stamp_ms() helper In preparation of adding usec TCP TS values, add tcp_time_stamp_ms() for contexts needing ms based values. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 5 +++++ net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_timer.c | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index d47a57a47b50..9fc6dc4ba9e2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -804,6 +804,11 @@ static inline u32 tcp_time_stamp(const struct tcp_sock *tp) return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); } +static inline u32 tcp_time_stamp_ms(const struct tcp_sock *tp) +{ + return div_u64(tp->tcp_mstamp, USEC_PER_MSEC); +} + /* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */ static inline u64 tcp_ns_to_ts(u64 ns) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ab87f0285b72..ffce17545b62 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2856,7 +2856,7 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack) static void tcp_update_rto_time(struct tcp_sock *tp) { if (tp->rto_stamp) { - tp->total_rto_time += tcp_time_stamp(tp) - tp->rto_stamp; + tp->total_rto_time += tcp_time_stamp_ms(tp) - tp->rto_stamp; tp->rto_stamp = 0; } } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0862b73dd3b5..63247c78dc13 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -422,7 +422,7 @@ static void tcp_update_rto_stats(struct sock *sk) if (!icsk->icsk_retransmits) { tp->total_rto_recoveries++; - tp->rto_stamp = tcp_time_stamp(tp); + tp->rto_stamp = tcp_time_stamp_ms(tp); } icsk->icsk_retransmits++; tp->total_rto++; -- cgit v1.2.3 From 2a7c8d291ffeba69a47d8528987156f625cc05b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2023 12:57:39 +0000 Subject: tcp: introduce tcp_clock_ms() It delivers current TCP time stamp in ms unit, and is used in place of confusing tcp_time_stamp_raw() It is the same family than tcp_clock_ns() and tcp_clock_ms(). tcp_time_stamp_raw() will be replaced later for TSval contexts with a more descriptive name. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 5 +++++ net/ipv4/tcp.c | 6 ++---- net/ipv4/tcp_minisocks.c | 4 ++-- net/netfilter/nf_synproxy_core.c | 2 +- tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c | 4 ++-- 5 files changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 9fc6dc4ba9e2..3bdf1141f5a2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -798,6 +798,11 @@ static inline u64 tcp_clock_us(void) return div_u64(tcp_clock_ns(), NSEC_PER_USEC); } +static inline u64 tcp_clock_ms(void) +{ + return div_u64(tcp_clock_ns(), NSEC_PER_MSEC); +} + /* This should only be used in contexts where tp->tcp_mstamp is up to date */ static inline u32 tcp_time_stamp(const struct tcp_sock *tp) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 56a8d936000f..5b034b0356ec 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3817,10 +3817,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_total_rto = tp->total_rto; info->tcpi_total_rto_recoveries = tp->total_rto_recoveries; info->tcpi_total_rto_time = tp->total_rto_time; - if (tp->rto_stamp) { - info->tcpi_total_rto_time += tcp_time_stamp_raw() - - tp->rto_stamp; - } + if (tp->rto_stamp) + info->tcpi_total_rto_time += tcp_clock_ms() - tp->rto_stamp; unlock_sock_fast(sk, slow); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3f87611077ef..a9fdba897a28 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -567,8 +567,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, USEC_PER_SEC / TCP_TS_HZ); newtp->total_rto = req->num_timeout; newtp->total_rto_recoveries = 1; - newtp->total_rto_time = tcp_time_stamp_raw() - - newtp->retrans_stamp; + newtp->total_rto_time = tcp_clock_ms() - + newtp->retrans_stamp; } newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 16915f8eef2b..467671f2d42f 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -153,7 +153,7 @@ void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info, struct synproxy_options *opts) { opts->tsecr = opts->tsval; - opts->tsval = tcp_time_stamp_raw() & ~0x3f; + opts->tsval = tcp_clock_ms() & ~0x3f; if (opts->options & NF_SYNPROXY_OPT_WSCALE) { opts->tsval |= opts->wscale; diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index 07d786329105..e959336c7a73 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -177,7 +177,7 @@ static __always_inline __u32 tcp_ns_to_ts(__u64 ns) return ns / (NSEC_PER_SEC / TCP_TS_HZ); } -static __always_inline __u32 tcp_time_stamp_raw(void) +static __always_inline __u32 tcp_clock_ms(void) { return tcp_ns_to_ts(tcp_clock_ns()); } @@ -274,7 +274,7 @@ static __always_inline bool tscookie_init(struct tcphdr *tcp_header, if (!loop_ctx.option_timestamp) return false; - cookie = tcp_time_stamp_raw() & ~TSMASK; + cookie = tcp_clock_ms() & ~TSMASK; cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK; if (loop_ctx.option_sack) cookie |= TS_OPT_SACK; -- cgit v1.2.3 From 16cf6477741bdaa287d5e4531a1a503618a41a22 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2023 12:57:40 +0000 Subject: tcp: replace tcp_time_stamp_raw() In preparation of usec TCP TS support, remove tcp_time_stamp_raw() in favor of tcp_clock_ts() helper. This helper will return a suitable 32bit result to feed TS values, depending on a socket field. Also add tcp_tw_tsval() and tcp_rsk_tsval() helpers to factorize the details. We do not yet support usec timestamps. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 25 +++++++++++++++++++------ net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 4 ++-- 4 files changed, 25 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 3bdf1141f5a2..0534526a535d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -803,6 +803,16 @@ static inline u64 tcp_clock_ms(void) return div_u64(tcp_clock_ns(), NSEC_PER_MSEC); } +/* TCP Timestamp included in TS option (RFC 1323) can either use ms + * or usec resolution. Each socket carries a flag to select one or other + * resolution, as the route attribute could change anytime. + * Each flow must stick to initial resolution. + */ +static inline u32 tcp_clock_ts(bool usec_ts) +{ + return usec_ts ? tcp_clock_us() : tcp_clock_ms(); +} + /* This should only be used in contexts where tp->tcp_mstamp is up to date */ static inline u32 tcp_time_stamp(const struct tcp_sock *tp) { @@ -820,12 +830,6 @@ static inline u64 tcp_ns_to_ts(u64 ns) return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ); } -/* Could use tcp_clock_us() / 1000, but this version uses a single divide */ -static inline u32 tcp_time_stamp_raw(void) -{ - return tcp_ns_to_ts(tcp_clock_ns()); -} - void tcp_mstamp_refresh(struct tcp_sock *tp); static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) @@ -844,6 +848,15 @@ static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); } +static inline u32 tcp_tw_tsval(const struct tcp_timewait_sock *tcptw) +{ + return tcp_clock_ts(false) + tcptw->tw_ts_offset; +} + +static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq) +{ + return tcp_clock_ts(false) + treq->ts_off; +} #define tcp_flag_byte(th) (((u_int8_t *)th)[13]) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5b034b0356ec..805f8341064f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3632,7 +3632,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, if (!tp->repair) err = -EPERM; else - WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw()); + WRITE_ONCE(tp->tsoffset, val - tcp_clock_ts(false)); break; case TCP_REPAIR_WINDOW: err = tcp_repair_set_window(tp, optval, optlen); @@ -4143,7 +4143,7 @@ int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_TIMESTAMP: - val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset); + val = tcp_clock_ts(false) + READ_ONCE(tp->tsoffset); break; case TCP_NOTSENT_LOWAT: val = READ_ONCE(tp->notsent_lowat); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a67a5de86253..cdd65cc594bc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -954,7 +954,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcp_time_stamp_raw() + tcptw->tw_ts_offset, + tcp_tw_tsval(tcptw), tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), @@ -988,7 +988,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, + tcp_rsk_tsval(tcp_rsk(req)), READ_ONCE(req->ts_recent), 0, tcp_md5_do_lookup(sk, l3index, addr, AF_INET), diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d410703bb5a1..1ee6517e9b2f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1096,7 +1096,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcp_time_stamp_raw() + tcptw->tw_ts_offset, + tcp_tw_tsval(tcptw), tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, tw->tw_txhash); @@ -1123,7 +1123,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, + tcp_rsk_tsval(tcp_rsk(req)), READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), ipv6_get_dsfield(ipv6_hdr(skb)), 0, -- cgit v1.2.3 From d1a02ed66fe62aa2edd77bd54e270ebc33bd12ff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2023 12:57:41 +0000 Subject: tcp: rename tcp_skb_timestamp() This helper returns a 32bit TCP TSval from skb->tstamp. As we are going to support usec or ms units soon, rename it to tcp_skb_timestamp_ts() and add a boolean to select the unit. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 14 +++++++++----- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 8 ++++---- net/ipv4/tcp_timer.c | 4 ++-- 4 files changed, 16 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 0534526a535d..493f8550055b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -837,17 +837,21 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) return max_t(s64, t1 - t0, 0); } -static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) -{ - return tcp_ns_to_ts(skb->skb_mstamp_ns); -} - /* provide the departure time in us unit */ static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) { return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); } +/* Provide skb TSval in usec or ms unit */ +static inline u32 tcp_skb_timestamp_ts(bool usec_ts, const struct sk_buff *skb) +{ + if (usec_ts) + return tcp_skb_timestamp_us(skb); + + return div_u64(skb->skb_mstamp_ns, NSEC_PER_MSEC); +} + static inline u32 tcp_tw_tsval(const struct tcp_timewait_sock *tcptw) { return tcp_clock_ts(false) + tcptw->tw_ts_offset; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ffce17545b62..de68cad82d19 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2442,7 +2442,7 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp, const struct sk_buff *skb) { return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) && - tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb)); + tcp_tsopt_ecr_before(tp, tcp_skb_timestamp_ts(false, skb)); } /* Nothing was retransmitted or returned timestamp is less diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 909f85aefd74..03a2a9fc0dc1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -799,7 +799,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { opts->options |= OPTION_TS; - opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; + opts->tsval = tcp_skb_timestamp_ts(false, skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -884,7 +884,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, } if (likely(ireq->tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off; + opts->tsval = tcp_skb_timestamp_ts(false, skb) + tcp_rsk(req)->ts_off; opts->tsecr = READ_ONCE(req->ts_recent); remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -943,7 +943,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0; + opts->tsval = skb ? tcp_skb_timestamp_ts(false, skb) + tp->tsoffset : 0; opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } @@ -3379,7 +3379,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) /* Save stamp of the first (attempted) retransmit. */ if (!tp->retrans_stamp) - tp->retrans_stamp = tcp_skb_timestamp(skb); + tp->retrans_stamp = tcp_skb_timestamp_ts(false, skb); if (tp->undo_retrans < 0) tp->undo_retrans = 0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 63247c78dc13..8764a9a2dc21 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -479,7 +479,7 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, return false; rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) - - (tp->retrans_stamp ?: tcp_skb_timestamp(skb))); + (tp->retrans_stamp ?: tcp_skb_timestamp_ts(false, skb))); return rtx_delta > timeout; } @@ -534,7 +534,7 @@ void tcp_retransmit_timer(struct sock *sk) struct inet_sock *inet = inet_sk(sk); u32 rtx_delta; - rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp(skb)); + rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp_ts(false, skb)); if (sk->sk_family == AF_INET) { net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", &inet->inet_daddr, ntohs(inet->inet_dport), -- cgit v1.2.3 From 003e07a1e48e9423647d2fef1c86b4caab3a94be Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2023 12:57:42 +0000 Subject: tcp: move tcp_ns_to_ts() to net/ipv4/syncookies.c tcp_ns_to_ts() is only used once from cookie_init_timestamp(). Also add the 'bool usec_ts' parameter to enable usec TS later. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ------ net/ipv4/syncookies.c | 10 +++++++++- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 493f8550055b..b86abf1fbe46 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -824,12 +824,6 @@ static inline u32 tcp_time_stamp_ms(const struct tcp_sock *tp) return div_u64(tp->tcp_mstamp, USEC_PER_MSEC); } -/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */ -static inline u64 tcp_ns_to_ts(u64 ns) -{ - return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ); -} - void tcp_mstamp_refresh(struct tcp_sock *tp); static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3b4dafefb4b0..62395fdb0ca5 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -51,6 +51,14 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, count, &syncookie_secret[c]); } +/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */ +static u64 tcp_ns_to_ts(bool usec_ts, u64 val) +{ + if (usec_ts) + return div_u64(val, NSEC_PER_USEC); + + return div_u64(val, NSEC_PER_MSEC); +} /* * when syncookies are in effect and tcp timestamps are enabled we encode @@ -62,7 +70,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, u64 cookie_init_timestamp(struct request_sock *req, u64 now) { const struct inet_request_sock *ireq = inet_rsk(req); - u64 ts, ts_now = tcp_ns_to_ts(now); + u64 ts, ts_now = tcp_ns_to_ts(false, now); u32 options = 0; options = ireq->wscale_ok ? ireq->snd_wscale : TS_OPT_WSCALE_MASK; -- cgit v1.2.3 From 9d0c00f5ca05be9e89649c156f9d5b9421fc534e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2023 12:57:43 +0000 Subject: tcp: rename tcp_time_stamp() to tcp_time_stamp_ts() This helper returns a TSval from a TCP socket. It currently calls tcp_time_stamp_ms() but will soon be able to return a usec based TSval, depending on an upcoming tp->tcp_usec_ts field. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 9 ++++----- net/ipv4/tcp_input.c | 6 +++--- net/ipv4/tcp_lp.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_timer.c | 10 +++++----- 5 files changed, 14 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index b86abf1fbe46..af72c1dc37f3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -813,15 +813,14 @@ static inline u32 tcp_clock_ts(bool usec_ts) return usec_ts ? tcp_clock_us() : tcp_clock_ms(); } -/* This should only be used in contexts where tp->tcp_mstamp is up to date */ -static inline u32 tcp_time_stamp(const struct tcp_sock *tp) +static inline u32 tcp_time_stamp_ms(const struct tcp_sock *tp) { - return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); + return div_u64(tp->tcp_mstamp, USEC_PER_MSEC); } -static inline u32 tcp_time_stamp_ms(const struct tcp_sock *tp) +static inline u32 tcp_time_stamp_ts(const struct tcp_sock *tp) { - return div_u64(tp->tcp_mstamp, USEC_PER_MSEC); + return tcp_time_stamp_ms(tp); } void tcp_mstamp_refresh(struct tcp_sock *tp); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index de68cad82d19..e7e38fc1d62f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -704,7 +704,7 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, if (TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) { - u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; + u32 delta = tcp_time_stamp_ts(tp) - tp->rx_opt.rcv_tsecr; u32 delta_us; if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { @@ -3148,7 +3148,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, */ if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) { - u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; + u32 delta = tcp_time_stamp_ts(tp) - tp->rx_opt.rcv_tsecr; if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { if (!delta) @@ -6293,7 +6293,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, - tcp_time_stamp(tp))) { + tcp_time_stamp_ts(tp))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); goto reset_and_undo; diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index ae36780977d2..52fe17167460 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -272,7 +272,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); - u32 now = tcp_time_stamp(tp); + u32 now = tcp_time_stamp_ts(tp); u32 delta; if (sample->rtt_us > 0) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 03a2a9fc0dc1..a1fec8be9ac3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3961,7 +3961,7 @@ int tcp_connect(struct sock *sk) tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); tcp_mstamp_refresh(tp); - tp->retrans_stamp = tcp_time_stamp(tp); + tp->retrans_stamp = tcp_time_stamp_ts(tp); tcp_connect_queue_skb(sk, buff); tcp_ecn_send_syn(sk, buff); tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8764a9a2dc21..bfcf3fe44c72 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -33,7 +33,7 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) user_timeout = READ_ONCE(icsk->icsk_user_timeout); if (!user_timeout) return icsk->icsk_rto; - elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; + elapsed = tcp_time_stamp_ts(tcp_sk(sk)) - start_ts; remaining = user_timeout - elapsed; if (remaining <= 0) return 1; /* user timeout has passed; fire ASAP */ @@ -226,7 +226,7 @@ static bool retransmits_timed_out(struct sock *sk, timeout = tcp_model_timeout(sk, boundary, rto_base); } - return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; + return (s32)(tcp_time_stamp_ts(tcp_sk(sk)) - start_ts - timeout) >= 0; } /* A write timeout has occurred. Process the after effects. */ @@ -462,7 +462,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) req->num_timeout++; tcp_update_rto_stats(sk); if (!tp->retrans_stamp) - tp->retrans_stamp = tcp_time_stamp(tp); + tp->retrans_stamp = tcp_time_stamp_ts(tp); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, req->timeout << req->num_timeout, TCP_RTO_MAX); } @@ -478,7 +478,7 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, if (rcv_delta <= timeout) return false; - rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) - + rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp_ts(false, skb))); return rtx_delta > timeout; @@ -534,7 +534,7 @@ void tcp_retransmit_timer(struct sock *sk) struct inet_sock *inet = inet_sk(sk); u32 rtx_delta; - rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp_ts(false, skb)); + rtx_delta = tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp_ts(false, skb)); if (sk->sk_family == AF_INET) { net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", &inet->inet_daddr, ntohs(inet->inet_dport), -- cgit v1.2.3 From b04c3320885a88a94e4bbb2f9dbc4871c9bc336f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2023 12:57:44 +0000 Subject: tcp: add tcp_rtt_tsopt_us() Before adding usec TS support, add tcp_rtt_tsopt_us() helper to factorize code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e7e38fc1d62f..5666f6137167 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -693,6 +693,21 @@ new_measure: tp->rcv_rtt_est.time = tp->tcp_mstamp; } +static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp) +{ + u32 delta, delta_us; + + delta = tcp_time_stamp_ts(tp) - tp->rx_opt.rcv_tsecr; + + if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { + if (!delta) + delta = 1; + delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + return delta_us; + } + return -1; +} + static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb) { @@ -704,15 +719,10 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, if (TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) { - u32 delta = tcp_time_stamp_ts(tp) - tp->rx_opt.rcv_tsecr; - u32 delta_us; - - if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { - if (!delta) - delta = 1; - delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); - tcp_rcv_rtt_update(tp, delta_us, 0); - } + s32 delta = tcp_rtt_tsopt_us(tp); + + if (delta >= 0) + tcp_rcv_rtt_update(tp, delta, 0); } } @@ -3146,17 +3156,10 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, * left edge of the send window. * See draft-ietf-tcplw-high-performance-00, section 3.3. */ - if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && - flag & FLAG_ACKED) { - u32 delta = tcp_time_stamp_ts(tp) - tp->rx_opt.rcv_tsecr; - - if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { - if (!delta) - delta = 1; - seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ); - ca_rtt_us = seq_rtt_us; - } - } + if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && + tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) + seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp); + rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */ if (seq_rtt_us < 0) return false; -- cgit v1.2.3 From 614e8316aa4cafba3e204cb8ee48bd12b92f3d93 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2023 12:57:47 +0000 Subject: tcp: add support for usec resolution in TCP TS values Back in 2015, Van Jacobson suggested to use usec resolution in TCP TS values. This has been implemented in our private kernels. Goals were : 1) better observability of delays in networking stacks. 2) better disambiguation of events based on TSval/ecr values. 3) building block for congestion control modules needing usec resolution. Back then we implemented a schem based on private SYN options to negotiate the feature. For upstream submission, we chose to use a route attribute, because this feature is probably going to be used in private networks [1] [2]. ip route add 10/8 ... features tcp_usec_ts Note that RFC 7323 recommends a "timestamp clock frequency in the range 1 ms to 1 sec per tick.", but also mentions "the maximum acceptable clock frequency is one tick every 59 ns." [1] Unfortunately RFC 7323 5.5 (Outdated Timestamps) suggests to invalidate TS.Recent values after a flow was idle for more than 24 days. This is the part making usec_ts a problem for peers following this recommendation for long living idle flows. [2] Attempts to standardize usec ts went nowhere: https://www.ietf.org/proceedings/97/slides/slides-97-tcpm-tcp-options-for-low-latency-00.pdf https://datatracker.ietf.org/doc/draft-wang-tcpm-low-latency-opt/ Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 +++- include/net/inet_timewait_sock.h | 3 ++- include/net/tcp.h | 6 ++++-- net/ipv4/syncookies.c | 6 +++++- net/ipv4/tcp.c | 18 ++++++++++++++---- net/ipv4/tcp_input.c | 5 ++++- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_minisocks.c | 19 ++++++++++++++----- net/ipv4/tcp_output.c | 12 ++++++++---- net/ipv4/tcp_timer.c | 40 ++++++++++++++++++++++++++-------------- net/ipv6/tcp_ipv6.c | 1 + 11 files changed, 82 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 04a0e647ef74..6df715b6e51d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -152,6 +152,7 @@ struct tcp_request_sock { u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; bool is_mptcp; + s8 req_usec_ts; #if IS_ENABLED(CONFIG_MPTCP) bool drop_req; #endif @@ -257,7 +258,8 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - unused:5; + tcp_usec_ts:1, /* TSval values in usec */ + unused:4; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 4a8e578405cb..b14999ff55db 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -67,7 +67,8 @@ struct inet_timewait_sock { /* And these are ours. */ unsigned int tw_transparent : 1, tw_flowlabel : 20, - tw_pad : 3, /* 3 bits hole */ + tw_usec_ts : 1, + tw_pad : 2, /* 2 bits hole */ tw_tos : 8; u32 tw_txhash; u32 tw_priority; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0ab577869d7a..39b731c900dd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -825,6 +825,8 @@ static inline u32 tcp_time_stamp_ms(const struct tcp_sock *tp) static inline u32 tcp_time_stamp_ts(const struct tcp_sock *tp) { + if (tp->tcp_usec_ts) + return tp->tcp_mstamp; return tcp_time_stamp_ms(tp); } @@ -852,12 +854,12 @@ static inline u32 tcp_skb_timestamp_ts(bool usec_ts, const struct sk_buff *skb) static inline u32 tcp_tw_tsval(const struct tcp_timewait_sock *tcptw) { - return tcp_clock_ts(false) + tcptw->tw_ts_offset; + return tcp_clock_ts(tcptw->tw_sk.tw_usec_ts) + tcptw->tw_ts_offset; } static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq) { - return tcp_clock_ts(false) + treq->ts_off; + return tcp_clock_ts(treq->req_usec_ts) + treq->ts_off; } #define tcp_flag_byte(th) (((u_int8_t *)th)[13]) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 62395fdb0ca5..c64334363230 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -84,7 +84,9 @@ u64 cookie_init_timestamp(struct request_sock *req, u64 now) if (ts > ts_now) ts -= (1UL << TSBITS); - return ts * (NSEC_PER_SEC / TCP_TS_HZ); + if (tcp_rsk(req)->req_usec_ts) + return ts * NSEC_PER_USEC; + return ts * NSEC_PER_MSEC; } @@ -304,6 +306,8 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, treq->af_specific = af_ops; treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; + treq->req_usec_ts = -1; + #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); if (treq->is_mptcp) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 805f8341064f..b961364b4961 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3629,10 +3629,16 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, tp->fastopen_no_cookie = val; break; case TCP_TIMESTAMP: - if (!tp->repair) + if (!tp->repair) { err = -EPERM; - else - WRITE_ONCE(tp->tsoffset, val - tcp_clock_ts(false)); + break; + } + /* val is an opaque field, + * and low order bit contains usec_ts enable bit. + * Its a best effort, and we do not care if user makes an error. + */ + tp->tcp_usec_ts = val & 1; + WRITE_ONCE(tp->tsoffset, val - tcp_clock_ts(tp->tcp_usec_ts)); break; case TCP_REPAIR_WINDOW: err = tcp_repair_set_window(tp, optval, optlen); @@ -4143,7 +4149,11 @@ int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_TIMESTAMP: - val = tcp_clock_ts(false) + READ_ONCE(tp->tsoffset); + val = tcp_clock_ts(tp->tcp_usec_ts) + READ_ONCE(tp->tsoffset); + if (tp->tcp_usec_ts) + val |= 1; + else + val &= ~1; break; case TCP_NOTSENT_LOWAT: val = READ_ONCE(tp->notsent_lowat); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5666f6137167..18b858597af4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -698,6 +698,8 @@ static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp) u32 delta, delta_us; delta = tcp_time_stamp_ts(tp) - tp->rx_opt.rcv_tsecr; + if (tp->tcp_usec_ts) + return delta; if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { if (!delta) @@ -2452,7 +2454,7 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp, const struct sk_buff *skb) { return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) && - tcp_tsopt_ecr_before(tp, tcp_skb_timestamp_ts(false, skb)); + tcp_tsopt_ecr_before(tp, tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb)); } /* Nothing was retransmitted or returned timestamp is less @@ -7045,6 +7047,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, req->syncookie = want_cookie; tcp_rsk(req)->af_specific = af_ops; tcp_rsk(req)->ts_off = 0; + tcp_rsk(req)->req_usec_ts = -1; #if IS_ENABLED(CONFIG_MPTCP) tcp_rsk(req)->is_mptcp = 0; #endif diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cdd65cc594bc..7583d4e34c8c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -296,6 +296,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = NULL; goto failure; } + tp->tcp_usec_ts = dst_tcp_usec_ts(&rt->dst); /* OK, now commit destination to socket. */ sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->dst); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a9fdba897a28..ace806c5bd0c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -300,6 +300,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; tcptw->tw_ts_offset = tp->tsoffset; + tw->tw_usec_ts = tp->tcp_usec_ts; tcptw->tw_last_oow_ack_time = 0; tcptw->tw_tx_delay = tp->tcp_tx_delay; tw->tw_txhash = sk->sk_txhash; @@ -554,21 +555,29 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->max_window = newtp->snd_wnd; if (newtp->rx_opt.tstamp_ok) { + newtp->tcp_usec_ts = treq->req_usec_ts; newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent); newtp->rx_opt.ts_recent_stamp = ktime_get_seconds(); newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; } else { + newtp->tcp_usec_ts = 0; newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } if (req->num_timeout) { - newtp->undo_marker = treq->snt_isn; - newtp->retrans_stamp = div_u64(treq->snt_synack, - USEC_PER_SEC / TCP_TS_HZ); newtp->total_rto = req->num_timeout; + newtp->undo_marker = treq->snt_isn; + if (newtp->tcp_usec_ts) { + newtp->retrans_stamp = treq->snt_synack; + newtp->total_rto_time = (u32)(tcp_clock_us() - + newtp->retrans_stamp) / USEC_PER_MSEC; + } else { + newtp->retrans_stamp = div_u64(treq->snt_synack, + USEC_PER_SEC / TCP_TS_HZ); + newtp->total_rto_time = tcp_clock_ms() - + newtp->retrans_stamp; + } newtp->total_rto_recoveries = 1; - newtp->total_rto_time = tcp_clock_ms() - - newtp->retrans_stamp; } newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a1fec8be9ac3..2866ccbccde0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -799,7 +799,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { opts->options |= OPTION_TS; - opts->tsval = tcp_skb_timestamp_ts(false, skb) + tp->tsoffset; + opts->tsval = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -884,7 +884,8 @@ static unsigned int tcp_synack_options(const struct sock *sk, } if (likely(ireq->tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = tcp_skb_timestamp_ts(false, skb) + tcp_rsk(req)->ts_off; + opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) + + tcp_rsk(req)->ts_off; opts->tsecr = READ_ONCE(req->ts_recent); remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -943,7 +944,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = skb ? tcp_skb_timestamp_ts(false, skb) + tp->tsoffset : 0; + opts->tsval = skb ? tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + + tp->tsoffset : 0; opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } @@ -3379,7 +3381,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) /* Save stamp of the first (attempted) retransmit. */ if (!tp->retrans_stamp) - tp->retrans_stamp = tcp_skb_timestamp_ts(false, skb); + tp->retrans_stamp = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb); if (tp->undo_retrans < 0) tp->undo_retrans = 0; @@ -3665,6 +3667,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); memset(&opts, 0, sizeof(opts)); + if (tcp_rsk(req)->req_usec_ts < 0) + tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); now = tcp_clock_ns(); #ifdef CONFIG_SYN_COOKIES if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index bfcf3fe44c72..1f9f6c1c196b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -26,14 +26,18 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - u32 elapsed, start_ts, user_timeout; + const struct tcp_sock *tp = tcp_sk(sk); + u32 elapsed, user_timeout; s32 remaining; - start_ts = tcp_sk(sk)->retrans_stamp; user_timeout = READ_ONCE(icsk->icsk_user_timeout); if (!user_timeout) return icsk->icsk_rto; - elapsed = tcp_time_stamp_ts(tcp_sk(sk)) - start_ts; + + elapsed = tcp_time_stamp_ts(tp) - tp->retrans_stamp; + if (tp->tcp_usec_ts) + elapsed /= USEC_PER_MSEC; + remaining = user_timeout - elapsed; if (remaining <= 0) return 1; /* user timeout has passed; fire ASAP */ @@ -212,12 +216,13 @@ static bool retransmits_timed_out(struct sock *sk, unsigned int boundary, unsigned int timeout) { - unsigned int start_ts; + struct tcp_sock *tp = tcp_sk(sk); + unsigned int start_ts, delta; if (!inet_csk(sk)->icsk_retransmits) return false; - start_ts = tcp_sk(sk)->retrans_stamp; + start_ts = tp->retrans_stamp; if (likely(timeout == 0)) { unsigned int rto_base = TCP_RTO_MIN; @@ -226,7 +231,12 @@ static bool retransmits_timed_out(struct sock *sk, timeout = tcp_model_timeout(sk, boundary, rto_base); } - return (s32)(tcp_time_stamp_ts(tcp_sk(sk)) - start_ts - timeout) >= 0; + if (tp->tcp_usec_ts) { + /* delta maybe off up to a jiffy due to timer granularity. */ + delta = tp->tcp_mstamp - start_ts + jiffies_to_usecs(1); + return (s32)(delta - timeout * USEC_PER_MSEC) >= 0; + } + return (s32)(tcp_time_stamp_ts(tp) - start_ts - timeout) >= 0; } /* A write timeout has occurred. Process the after effects. */ @@ -468,20 +478,18 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) } static bool tcp_rtx_probe0_timed_out(const struct sock *sk, - const struct sk_buff *skb) + const struct sk_buff *skb, + u32 rtx_delta) { const struct tcp_sock *tp = tcp_sk(sk); const int timeout = TCP_RTO_MAX * 2; - u32 rcv_delta, rtx_delta; + u32 rcv_delta; rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; if (rcv_delta <= timeout) return false; - rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp_ts(tp) - - (tp->retrans_stamp ?: tcp_skb_timestamp_ts(false, skb))); - - return rtx_delta > timeout; + return msecs_to_jiffies(rtx_delta) > timeout; } /** @@ -534,7 +542,11 @@ void tcp_retransmit_timer(struct sock *sk) struct inet_sock *inet = inet_sk(sk); u32 rtx_delta; - rtx_delta = tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp_ts(false, skb)); + rtx_delta = tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?: + tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb)); + if (tp->tcp_usec_ts) + rtx_delta /= USEC_PER_MSEC; + if (sk->sk_family == AF_INET) { net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", &inet->inet_daddr, ntohs(inet->inet_dport), @@ -551,7 +563,7 @@ void tcp_retransmit_timer(struct sock *sk) rtx_delta); } #endif - if (tcp_rtx_probe0_timed_out(sk, skb)) { + if (tcp_rtx_probe0_timed_out(sk, skb, rtx_delta)) { tcp_write_err(sk); goto out; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1ee6517e9b2f..0c8a14ba104f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -286,6 +286,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto failure; } + tp->tcp_usec_ts = dst_tcp_usec_ts(dst); tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!saddr) { -- cgit v1.2.3 From a77a0f5c7f23a8a4981a2a3ff47baa91ceaf1f53 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2023 12:57:48 +0000 Subject: tcp: add TCPI_OPT_USEC_TS Add the ability to report in tcp_info.tcpi_options if a flow is using usec resolution in TCP TS val. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'net') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index d1d08da6331a..8aa3916e14f6 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -170,6 +170,7 @@ enum tcp_fastopen_client_fail { #define TCPI_OPT_ECN 8 /* ECN was negociated at TCP session init */ #define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ #define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ +#define TCPI_OPT_USEC_TS 64 /* usec timestamps */ /* * Sender's congestion state indicating normal or abnormal situations diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b961364b4961..a86d8200a1e8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3760,6 +3760,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_options |= TCPI_OPT_ECN_SEEN; if (tp->syn_data_acked) info->tcpi_options |= TCPI_OPT_SYN_DATA; + if (tp->tcp_usec_ts) + info->tcpi_options |= TCPI_OPT_USEC_TS; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato, -- cgit v1.2.3 From 6b398f1c28f033b82c7363caa73f5669ce4a1853 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Thu, 28 Sep 2023 17:35:22 +0300 Subject: wifi: mac80211: cleanup auth_data only if association continues If the association command fails then the authentication is still valid and it makes sense to keep it alive. Otherwise, we would currently get into an inconsistent state because mac80211 on the one hand is disconnected but on the other hand the state is not entirely cleared and a new authentication could not continue. Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.c9855f46ebc8.I7f3dcd4120a186484a91b87560e9b7201d40984f@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8d2514a9a6c4..3518c0808897 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7718,7 +7718,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, match = ether_addr_equal(ifmgd->auth_data->ap_addr, assoc_data->ap_addr) && ifmgd->auth_data->link_id == req->link_id; - ieee80211_destroy_auth_data(sdata, match); + + /* Cleanup is delayed if auth_data matches */ + if (!match) + ieee80211_destroy_auth_data(sdata, false); } /* prepare assoc data */ @@ -7941,11 +7944,17 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, run_again(sdata, assoc_data->timeout); + /* We are associating, clean up auth_data */ + if (ifmgd->auth_data) + ieee80211_destroy_auth_data(sdata, true); + return 0; err_clear: - eth_zero_addr(sdata->deflink.u.mgd.bssid); - ieee80211_link_info_change_notify(sdata, &sdata->deflink, - BSS_CHANGED_BSSID); + if (!ifmgd->auth_data) { + eth_zero_addr(sdata->deflink.u.mgd.bssid); + ieee80211_link_info_change_notify(sdata, &sdata->deflink, + BSS_CHANGED_BSSID); + } ifmgd->assoc_data = NULL; err_free: kfree(assoc_data); -- cgit v1.2.3 From 822cab1987a0e028e38b60aecd98af0289b46e7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Sep 2023 17:35:24 +0300 Subject: wifi: mac80211: don't recreate driver link debugfs in reconfig We can delete any that we want to remove, but we can't recreate the links as they already exist. Fixes: 170cd6a66d9a ("wifi: mac80211: add netdev per-link debugfs data and driver hook") Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.3d0214838421.I512a0ff86f631ff42bf25ea0cb2e8e8616794a94@changeid Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 169dbbca54b6..9112715a749a 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -526,10 +526,13 @@ int drv_change_vif_links(struct ieee80211_local *local, if (ret) return ret; - for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { - link = rcu_access_pointer(sdata->link[link_id]); + if (!local->in_reconfig) { + for_each_set_bit(link_id, &links_to_add, + IEEE80211_MLD_MAX_NUM_LINKS) { + link = rcu_access_pointer(sdata->link[link_id]); - ieee80211_link_debugfs_drv_add(link); + ieee80211_link_debugfs_drv_add(link); + } } return 0; -- cgit v1.2.3 From a1f5dcb1c0c1e26a7e158ce9fc28355041f26909 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 28 Sep 2023 17:35:25 +0300 Subject: wifi: mac80211: add a driver callback to add vif debugfs Add a callback which the driver can use to add the vif debugfs. We used to have this back until commit d260ff12e776 ("mac80211: remove vif debugfs driver callbacks") where we thought that it will be easier to just add them during interface add/remove. However, now with multi-link, we want to have proper debugfs for drivers for multi-link where some files might be in the netdev for non-MLO connections, and in the links for MLO ones, so we need to do some reconstruction when switching the mode. Moving to this new call enables that and MLO drivers will have to use it for proper debugfs operation. Signed-off-by: Miri Korenblit Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.ac38913f6ab7.Iee731d746bb08fcc628fa776f337016a12dc62ac@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/driver-ops.c | 11 ++++++++--- net/mac80211/driver-ops.h | 23 +++++++++++++++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 72375eceb786..a9b73e357462 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3869,6 +3869,10 @@ struct ieee80211_prep_tx_info { * the station. See @sta_pre_rcu_remove if needed. * This callback can sleep. * + * @vif_add_debugfs: Drivers can use this callback to add a debugfs vif + * directory with its files. This callback should be within a + * CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep. + * * @link_add_debugfs: Drivers can use this callback to add debugfs files * when a link is added to a mac80211 vif. This callback should be within * a CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep. @@ -4368,6 +4372,8 @@ struct ieee80211_ops { int (*sta_remove)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta); #ifdef CONFIG_MAC80211_DEBUGFS + void (*vif_add_debugfs)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); void (*link_add_debugfs)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf, diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 9112715a749a..08861ec61be9 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -72,10 +72,15 @@ int drv_add_interface(struct ieee80211_local *local, ret = local->ops->add_interface(&local->hw, &sdata->vif); trace_drv_return_int(local, ret); - if (ret == 0) - sdata->flags |= IEEE80211_SDATA_IN_DRIVER; + if (ret) + return ret; - return ret; + sdata->flags |= IEEE80211_SDATA_IN_DRIVER; + + if (!local->in_reconfig) + drv_vif_add_debugfs(local, sdata); + + return 0; } int drv_change_interface(struct ieee80211_local *local, diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 77048b9065e6..e07e65da15ee 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -489,6 +489,23 @@ static inline void drv_sta_remove(struct ieee80211_local *local, } #ifdef CONFIG_MAC80211_DEBUGFS +static inline void drv_vif_add_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + + if (sdata->vif.type == NL80211_IFTYPE_MONITOR || + WARN_ON(!sdata->vif.debugfs_dir)) + return; + + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return; + + if (local->ops->vif_add_debugfs) + local->ops->vif_add_debugfs(&local->hw, &sdata->vif); +} + static inline void drv_link_add_debugfs(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf, @@ -539,6 +556,12 @@ static inline void drv_link_sta_add_debugfs(struct ieee80211_local *local, local->ops->link_sta_add_debugfs(&local->hw, &sdata->vif, link_sta, dir); } +#else +static inline void drv_vif_add_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); +} #endif static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, -- cgit v1.2.3 From c942398f95efb06d5434f86ea00fabe267f57af8 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 28 Sep 2023 17:35:26 +0300 Subject: wifi: mac80211: handle debugfs when switching to/from MLO In MLO, we have a per-link debugfs directory which contains the per-link files. In case of non-MLO we would like to put the per-link files in the netdev directory to keep it how it was before MLO. - Upon interface creation the netdev will be created with the per-link files in it. - Upon switching to MLO: delete the entire netdev directory and then recreate it without the per-link files. Then the per-link directories with the per-link files in it will be created in ieee80211_link_init() - Upon switching to non-MLO: delete the entire netdev directory (including the per-link directories) and recreate it with the per-link files in it. Note that this also aligns to always call the vif link debugfs method for the deflink as promised in the documentation, which wasn't done before. Signed-off-by: Miri Korenblit Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.082e698caca9.I5bef7b2026e0f58b4a958b3d1f459ac5baeccfc9@changeid Signed-off-by: Johannes Berg --- net/mac80211/debugfs_netdev.c | 23 ++++++++++++++++++----- net/mac80211/debugfs_netdev.h | 15 ++++++++++++--- net/mac80211/driver-ops.c | 5 ++++- net/mac80211/iface.c | 2 +- net/mac80211/link.c | 5 +++++ 5 files changed, 40 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 14a40348959a..b383dad18841 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -934,18 +934,20 @@ static void add_link_files(struct ieee80211_link_data *link, } } -void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) +void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata, + bool mld_vif) { char buf[10+IFNAMSIZ]; sprintf(buf, "netdev:%s", sdata->name); sdata->vif.debugfs_dir = debugfs_create_dir(buf, sdata->local->hw.wiphy->debugfsdir); + /* deflink also has this */ + sdata->deflink.debugfs_dir = sdata->vif.debugfs_dir; sdata->debugfs.subdir_stations = debugfs_create_dir("stations", sdata->vif.debugfs_dir); add_files(sdata); - - if (!(sdata->local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)) + if (!mld_vif) add_link_files(&sdata->deflink, sdata->vif.debugfs_dir); } @@ -973,11 +975,21 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) debugfs_rename(dir->d_parent, dir, dir->d_parent, buf); } +void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata, + bool mld_vif) +{ + ieee80211_debugfs_remove_netdev(sdata); + ieee80211_debugfs_add_netdev(sdata, mld_vif); + drv_vif_add_debugfs(sdata->local, sdata); + if (!mld_vif) + ieee80211_link_debugfs_drv_add(&sdata->deflink); +} + void ieee80211_link_debugfs_add(struct ieee80211_link_data *link) { char link_dir_name[10]; - if (WARN_ON(!link->sdata->vif.debugfs_dir)) + if (WARN_ON(!link->sdata->vif.debugfs_dir || link->debugfs_dir)) return; /* For now, this should not be called for non-MLO capable drivers */ @@ -1014,7 +1026,8 @@ void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link) void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link) { - if (WARN_ON(!link->debugfs_dir)) + if (link->sdata->vif.type == NL80211_IFTYPE_MONITOR || + WARN_ON(!link->debugfs_dir)) return; drv_link_add_debugfs(link->sdata->local, link->sdata, diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h index 99e688dcabd6..b226b1aae88a 100644 --- a/net/mac80211/debugfs_netdev.h +++ b/net/mac80211/debugfs_netdev.h @@ -1,4 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions: + * Copyright (C) 2023 Intel Corporation + */ /* routines exported for debugfs handling */ #ifndef __IEEE80211_DEBUGFS_NETDEV_H @@ -7,9 +11,12 @@ #include "ieee80211_i.h" #ifdef CONFIG_MAC80211_DEBUGFS -void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata); +void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata, + bool mld_vif); void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata); +void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata, + bool mld_vif); void ieee80211_link_debugfs_add(struct ieee80211_link_data *link); void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link); @@ -18,7 +25,7 @@ void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link); void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link); #else static inline void ieee80211_debugfs_add_netdev( - struct ieee80211_sub_if_data *sdata) + struct ieee80211_sub_if_data *sdata, bool mld_vif) {} static inline void ieee80211_debugfs_remove_netdev( struct ieee80211_sub_if_data *sdata) @@ -26,7 +33,9 @@ static inline void ieee80211_debugfs_remove_netdev( static inline void ieee80211_debugfs_rename_netdev( struct ieee80211_sub_if_data *sdata) {} - +static inline void ieee80211_debugfs_recreate_netdev( + struct ieee80211_sub_if_data *sdata, bool mld_vif) +{} static inline void ieee80211_link_debugfs_add(struct ieee80211_link_data *link) {} static inline void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 08861ec61be9..7938ec87ef25 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -77,8 +77,11 @@ int drv_add_interface(struct ieee80211_local *local, sdata->flags |= IEEE80211_SDATA_IN_DRIVER; - if (!local->in_reconfig) + if (!local->in_reconfig) { drv_vif_add_debugfs(local, sdata); + /* initially vif is not MLD */ + ieee80211_link_debugfs_drv_add(&sdata->deflink); + } return 0; } diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 510f8aead4f9..124cc53f6b34 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1775,7 +1775,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, /* need to do this after the switch so vif.type is correct */ ieee80211_link_setup(&sdata->deflink); - ieee80211_debugfs_add_netdev(sdata); + ieee80211_debugfs_add_netdev(sdata, false); } static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 2a78374f6f04..76c61a132569 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -235,6 +235,9 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, RCU_INIT_POINTER(sdata->vif.link_conf[link_id], NULL); } + if (!old_links) + ieee80211_debugfs_recreate_netdev(sdata, true); + /* link them into data structures */ for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { WARN_ON(!use_deflink && @@ -261,6 +264,8 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, old_links & old_active, new_links & sdata->vif.active_links, old); + if (!new_links) + ieee80211_debugfs_recreate_netdev(sdata, false); } if (ret) { -- cgit v1.2.3 From 00f823b68ecee865bb2e4e6d5b7f4359eef0b3e3 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 28 Sep 2023 17:35:27 +0300 Subject: wifi: mac80211: Rename and update IEEE80211_VIF_DISABLE_SMPS_OVERRIDE EMLSR operation and SMPS operation cannot coexist. Thus, when EMLSR is enabled, all SMPS signaling towards the AP should be stopped (it is expected that the AP will consider SMPS to be off). Rename IEEE80211_VIF_DISABLE_SMPS_OVERRIDE to IEEE80211_VIF_EML_ACTIVE and use the flag as an indication from the driver that EMLSR is enabled. When EMLSR is enabled SMPS flows towards the AP MLD should be stopped. Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.fb2c2f9a0645.If6df5357568abd623a081f0f33b07e63fb8bba99@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 6 +++--- include/net/mac80211.h | 6 +++--- net/mac80211/cfg.c | 6 ++++++ net/mac80211/debugfs_netdev.c | 5 ++++- 4 files changed, 16 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index 46e207211f21..6b4b32b5350b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -240,8 +240,8 @@ static int iwl_mvm_esr_mode_active(struct iwl_mvm *mvm, mvmvif->esr_active = true; - /* Disable SMPS overrideing by user */ - vif->driver_flags |= IEEE80211_VIF_DISABLE_SMPS_OVERRIDE; + /* Indicate to mac80211 that EML is enabled */ + vif->driver_flags |= IEEE80211_VIF_EML_ACTIVE; iwl_mvm_update_smps_on_active_links(mvm, vif, IWL_MVM_SMPS_REQ_FW, IEEE80211_SMPS_OFF); @@ -399,7 +399,7 @@ static int iwl_mvm_esr_mode_inactive(struct iwl_mvm *mvm, mvmvif->esr_active = false; - vif->driver_flags &= ~IEEE80211_VIF_DISABLE_SMPS_OVERRIDE; + vif->driver_flags &= ~IEEE80211_VIF_EML_ACTIVE; iwl_mvm_update_smps_on_active_links(mvm, vif, IWL_MVM_SMPS_REQ_FW, IEEE80211_SMPS_AUTOMATIC); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a9b73e357462..7dae9aac089c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1767,15 +1767,15 @@ struct ieee80211_channel_switch { * @IEEE80211_VIF_GET_NOA_UPDATE: request to handle NOA attributes * and send P2P_PS notification to the driver if NOA changed, even * this is not pure P2P vif. - * @IEEE80211_VIF_DISABLE_SMPS_OVERRIDE: disable user configuration of - * SMPS mode via debugfs. + * @IEEE80211_VIF_EML_ACTIVE: The driver indicates that EML operation is + * enabled for the interface. */ enum ieee80211_vif_flags { IEEE80211_VIF_BEACON_FILTER = BIT(0), IEEE80211_VIF_SUPPORTS_CQM_RSSI = BIT(1), IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2), IEEE80211_VIF_GET_NOA_UPDATE = BIT(3), - IEEE80211_VIF_DISABLE_SMPS_OVERRIDE = BIT(4), + IEEE80211_VIF_EML_ACTIVE = BIT(4), }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5cec0c251e86..606b1b2e4123 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3158,6 +3158,12 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, old_req = link->u.mgd.req_smps; link->u.mgd.req_smps = smps_mode; + /* The driver indicated that EML is enabled for the interface, which + * implies that SMPS flows towards the AP should be stopped. + */ + if (sdata->vif.driver_flags & IEEE80211_VIF_EML_ACTIVE) + return 0; + if (old_req == smps_mode && smps_mode != IEEE80211_SMPS_AUTOMATIC) return 0; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index b383dad18841..ec91e131b29e 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -323,7 +323,10 @@ static int ieee80211_set_smps(struct ieee80211_link_data *link, struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; - if (sdata->vif.driver_flags & IEEE80211_VIF_DISABLE_SMPS_OVERRIDE) + /* The driver indicated that EML is enabled for the interface, thus do + * not allow to override the SMPS state. + */ + if (sdata->vif.driver_flags & IEEE80211_VIF_EML_ACTIVE) return -EOPNOTSUPP; if (!(local->hw.wiphy->features & NL80211_FEATURE_STATIC_SMPS) && -- cgit v1.2.3 From 256caff27874c40c6f02f3e047e47bf4ae7702bc Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 28 Sep 2023 17:35:28 +0300 Subject: wifi: cfg80211: Include operating class 137 in 6GHz band Draft P802.11be_D3.1 added operating class to describe 320 MHz operation in the 6GHz band. Include this new operating class in ieee80211_operating_class_to_band(). Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.bed4a007d81b.I3eb4b8fe39c0c1a988c98a103b11a9f45a92b038@changeid Signed-off-by: Johannes Berg --- net/wireless/util.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index 213c9405e645..0893b7f57832 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1999,6 +1999,7 @@ bool ieee80211_operating_class_to_band(u8 operating_class, *band = NL80211_BAND_5GHZ; return true; case 131 ... 135: + case 137: *band = NL80211_BAND_6GHZ; return true; case 81: -- cgit v1.2.3 From c00de1c49294cb83ecf11c2f9306df5fec5c16a0 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Thu, 28 Sep 2023 17:35:29 +0300 Subject: wifi: mac80211: mesh: fix some kdoc warnings These were mostly missing or incorrectly tagged return values. Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.33fea2968c62.I41d197b570370ab7cad1405518512fdd36e08717@changeid Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 8 ++++++-- net/mac80211/mesh_hwmp.c | 2 ++ net/mac80211/mesh_pathtbl.c | 20 +++++++++++++------- net/mac80211/mesh_plink.c | 6 +++++- net/mac80211/mesh_ps.c | 6 +++++- net/mac80211/mesh_sync.c | 4 +++- 6 files changed, 34 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 092a1dc7314d..fccbcde3359a 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -56,6 +56,8 @@ static void ieee80211_mesh_housekeeping_timer(struct timer_list *t) * * This function checks if the mesh configuration of a mesh point matches the * local mesh configuration, i.e. if both nodes belong to the same mesh network. + * + * Returns: %true if both nodes belong to the same mesh */ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *ie) @@ -119,6 +121,8 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, * mesh_peer_accepts_plinks - check if an mp is willing to establish peer links * * @ie: information elements of a management frame from the mesh peer + * + * Returns: %true if the mesh peer is willing to establish peer links */ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) { @@ -858,7 +862,7 @@ bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata, * @meshsa: source address in the mesh. Same as TA, as frame is * locally originated. * - * Return the length of the 802.11 (does not include a mesh control header) + * Returns: the length of the 802.11 frame header (excludes mesh control header) */ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, const u8 *meshda, const u8 *meshsa) @@ -891,7 +895,7 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, * @addr6: 2nd address in the ae header, which corresponds to addr6 of the * mesh frame * - * Return the header length. + * Returns: the header length */ unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata, struct ieee80211s_hdr *meshhdr, diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 51369072984e..775d52561c54 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -230,6 +230,8 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, * Note: This function may be called with driver locks taken that the driver * also acquires in the TX path. To avoid a deadlock we don't transmit the * frame directly but add it to the pending queue instead. + * + * Returns: 0 on success */ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, u8 ttl, const u8 *target, u32 target_sn, diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 3e52aaa57b1f..8a3f44ce3e04 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. + * Copyright (C) 2023 Intel Corporation * Author: Luis Carlos Cobo */ @@ -173,6 +174,11 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr, /** * mesh_path_move_to_queue - Move or copy frames from one mpath queue to another * + * @gate_mpath: An active mpath the frames will be sent to (i.e. the gate) + * @from_mpath: The failed mpath + * @copy: When true, copy all the frames to the new mpath queue. When false, + * move them. + * * This function is used to transfer or copy frames from an unresolved mpath to * a gate mpath. The function also adds the Address Extension field and * updates the next hop. @@ -181,11 +187,6 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr, * destination addresses are updated. * * The gate mpath must be an active mpath with a valid mpath->next_hop. - * - * @gate_mpath: An active mpath the frames will be sent to (i.e. the gate) - * @from_mpath: The failed mpath - * @copy: When true, copy all the frames to the new mpath queue. When false, - * move them. */ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, struct mesh_path *from_mpath, @@ -330,6 +331,8 @@ mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) /** * mesh_path_add_gate - add the given mpath to a mesh gate to our path table * @mpath: gate path to add to table + * + * Returns: 0 on success, -EEXIST */ int mesh_path_add_gate(struct mesh_path *mpath) { @@ -388,6 +391,8 @@ static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) /** * mesh_gate_num - number of gates known to this interface * @sdata: subif data + * + * Returns: The number of gates */ int mesh_gate_num(struct ieee80211_sub_if_data *sdata) { @@ -861,10 +866,9 @@ static void table_flush_by_iface(struct mesh_table *tbl) /** * mesh_path_flush_by_iface - Deletes all mesh paths associated with a given iface * - * This function deletes both mesh paths as well as mesh portal paths. - * * @sdata: interface data to match * + * This function deletes both mesh paths as well as mesh portal paths. */ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) { @@ -944,6 +948,8 @@ void mesh_path_tx_pending(struct mesh_path *mpath) * queue to that gate's queue. If there are more than one gates, the frames * are copied from each gate to the next. After frames are copied, the * mpath queues are emptied onto the transmission queue. + * + * Returns: 0 on success, -EHOSTUNREACH */ int mesh_path_send_to_gates(struct mesh_path *mpath) { diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index a1e526419e9d..dbabeefe4515 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -153,6 +153,8 @@ out: * selected if any non-HT peers are present in our MBSS. 20MHz-protection mode * is selected if all peers in our 20/40MHz MBSS support HT and at least one * HT20 peer is present. Otherwise no-protection mode is selected. + * + * Returns: BSS_CHANGED_HT or 0 for no change */ static u64 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) { @@ -362,7 +364,7 @@ free: * Mesh paths with this peer as next hop should be flushed * by the caller outside of plink_lock. * - * Returns beacon changed flag if the beacon content changed. + * Returns: beacon changed flag if the beacon content changed. * * Locking: the caller must hold sta->mesh->plink_lock */ @@ -390,6 +392,8 @@ static u64 __mesh_plink_deactivate(struct sta_info *sta) * @sta: mesh peer link to deactivate * * All mesh paths with this peer as next hop will be flushed + * + * Returns: beacon changed flag if the beacon content changed. */ u64 mesh_plink_deactivate(struct sta_info *sta) { diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 35eacca43e49..20e022a03933 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -15,6 +15,8 @@ /** * mps_qos_null_get - create pre-addressed QoS Null frame for mesh powersave * @sta: the station to get the frame for + * + * Returns: A newly allocated SKB */ static struct sk_buff *mps_qos_null_get(struct sta_info *sta) { @@ -77,6 +79,8 @@ static void mps_qos_null_tx(struct sta_info *sta) * * sets the non-peer power mode and triggers the driver PS (re-)configuration * Return BSS_CHANGED_BEACON if a beacon update is necessary. + * + * Returns: BSS_CHANGED_BEACON if a beacon update is in order. */ u64 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata) { @@ -147,7 +151,7 @@ u64 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata) * * @sta: mesh STA * @pm: the power mode to set - * Return BSS_CHANGED_BEACON if a beacon update is in order. + * Returns: BSS_CHANGED_BEACON if a beacon update is in order. */ u64 ieee80211_mps_set_sta_local_pm(struct sta_info *sta, enum nl80211_mesh_power_mode pm) diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 9e342cc2504c..8cf3f395f52f 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -3,7 +3,7 @@ * Copyright 2011-2012, Pavel Zubarev * Copyright 2011-2012, Marco Porsch * Copyright 2011-2012, cozybit Inc. - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021,2023 Intel Corporation */ #include "ieee80211_i.h" @@ -37,6 +37,8 @@ struct sync_method { * mesh_peer_tbtt_adjusting - check if an mp is currently adjusting its TBTT * * @cfg: mesh config element from the mesh peer (or %NULL) + * + * Returns: If the mesh peer is currently adjusting its TBTT */ static bool mesh_peer_tbtt_adjusting(const struct ieee80211_meshconf_ie *cfg) { -- cgit v1.2.3 From 0fca7784b7a14d4ede64f479662afb98876ec7f8 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 28 Sep 2023 17:35:30 +0300 Subject: wifi: cfg80211: Handle specific BSSID in 6GHz scanning When the scan parameters for a 6GHz scan specify a unicast BSSID address, and the corresponding AP is found in the scan list, add a corresponding entry in the collocated AP list, so this AP would be directly probed even if it was not advertised as a collocated AP. This is needed for handling a scan request that is intended for a ML probe flow, where user space can requests a scan to retrieve information for other links in the AP MLD. Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.54b954bc02ad.I1c072793d3d77a4c8fbbc64b4db5cce1bbb00382@changeid Signed-off-by: Johannes Berg --- net/wireless/scan.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 8d114faf4842..6c2acd3fa36a 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -830,10 +830,47 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) list_for_each_entry(intbss, &rdev->bss_list, list) { struct cfg80211_bss *res = &intbss->pub; const struct cfg80211_bss_ies *ies; + const struct element *ssid_elem; + struct cfg80211_colocated_ap *entry; + u32 s_ssid_tmp; + int ret; ies = rcu_access_pointer(res->ies); count += cfg80211_parse_colocated_ap(ies, &coloc_ap_list); + + /* In case the scan request specified a specific BSSID + * and the BSS is found and operating on 6GHz band then + * add this AP to the collocated APs list. + * This is relevant for ML probe requests when the lower + * band APs have not been discovered. + */ + if (is_broadcast_ether_addr(rdev_req->bssid) || + !ether_addr_equal(rdev_req->bssid, res->bssid) || + res->channel->band != NL80211_BAND_6GHZ) + continue; + + ret = cfg80211_calc_short_ssid(ies, &ssid_elem, + &s_ssid_tmp); + if (ret) + continue; + + entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN, + GFP_ATOMIC); + + if (!entry) + continue; + + memcpy(entry->bssid, res->bssid, ETH_ALEN); + entry->short_ssid = s_ssid_tmp; + memcpy(entry->ssid, ssid_elem->data, + ssid_elem->datalen); + entry->ssid_len = ssid_elem->datalen; + entry->short_ssid_valid = true; + entry->center_freq = res->channel->center_freq; + + list_add_tail(&entry->list, &coloc_ap_list); + count++; } spin_unlock_bh(&rdev->bss_lock); } -- cgit v1.2.3 From e7182c4e6bbeafa272612e6c06fa92b42ad107ad Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 28 Sep 2023 17:35:31 +0300 Subject: wifi: mac80211: Fix setting vif links When setting the interface links, ignore the change iff both the valid links and the dormant links did not change. This is needed to support cases where the valid links didn't change but the dormant links did. Fixes: 6d543b34dbcf ("wifi: mac80211: Support disabled links during association") Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.0357b6306587.I7dbfec347949b629fea680d246a650d6207ff217@changeid Signed-off-by: Johannes Berg --- net/mac80211/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 76c61a132569..bf7bd880d062 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -195,7 +195,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, memset(to_free, 0, sizeof(links)); - if (old_links == new_links) + if (old_links == new_links && dormant_links == sdata->vif.dormant_links) return 0; /* if there were no old links, need to clear the pointers to deflink */ -- cgit v1.2.3 From 271d14b37fa5f2f9bd9e22711c3ba6b1532c8de1 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 28 Sep 2023 17:35:35 +0300 Subject: wifi: mac80211: make mgd_protect_tdls_discover MLO-aware Since userspace can choose now what link to establish the TDLS on, we should know on what channel to do session protection. Add a link id parameter to this callback. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.ef12ce3eb835.If864f406cfd9e24f36a2b88fd13a37328633fcf9@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 ++- drivers/net/wireless/intel/iwlwifi/mvm/tdls.c | 5 +++-- include/net/mac80211.h | 3 ++- net/mac80211/driver-ops.h | 8 ++++++-- net/mac80211/tdls.c | 2 +- 5 files changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 66d9de0f1511..74cb2f863472 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -2345,7 +2345,8 @@ void iwl_mvm_teardown_tdls_peers(struct iwl_mvm *mvm); void iwl_mvm_recalc_tdls_state(struct iwl_mvm *mvm, struct ieee80211_vif *vif, bool sta_added); void iwl_mvm_mac_mgd_protect_tdls_discover(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + unsigned int link_id); int iwl_mvm_tdls_channel_switch(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, u8 oper_class, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c b/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c index dae6f2a1aad9..fac992af3ddb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c @@ -2,7 +2,7 @@ /* * Copyright (C) 2014 Intel Mobile Communications GmbH * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020, 2022 Intel Corporation + * Copyright (C) 2018-2020, 2022-2023 Intel Corporation */ #include #include "mvm.h" @@ -144,7 +144,8 @@ void iwl_mvm_recalc_tdls_state(struct iwl_mvm *mvm, struct ieee80211_vif *vif, } void iwl_mvm_mac_mgd_protect_tdls_discover(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + unsigned int link_id) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); u32 duration = 2 * vif->bss_conf.dtim_period * vif->bss_conf.beacon_int; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7dae9aac089c..0ce5b0831884 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4522,7 +4522,8 @@ struct ieee80211_ops { struct ieee80211_prep_tx_info *info); void (*mgd_protect_tdls_discover)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + unsigned int link_id); int (*add_chanctx)(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index e07e65da15ee..d92de4cd960b 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -955,7 +955,8 @@ static inline void drv_mgd_complete_tx(struct ieee80211_local *local, static inline void drv_mgd_protect_tdls_discover(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) + struct ieee80211_sub_if_data *sdata, + int link_id) { might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); @@ -964,9 +965,12 @@ drv_mgd_protect_tdls_discover(struct ieee80211_local *local, return; WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); + link_id = link_id > 0 ? link_id : 0; + trace_drv_mgd_protect_tdls_discover(local, sdata); if (local->ops->mgd_protect_tdls_discover) - local->ops->mgd_protect_tdls_discover(&local->hw, &sdata->vif); + local->ops->mgd_protect_tdls_discover(&local->hw, &sdata->vif, + link_id); trace_drv_return_void(local); } diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index f3fd66d30b84..05a7dff69fe9 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1318,7 +1318,7 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, * response frame. It is transmitted directly and not buffered * by the AP. */ - drv_mgd_protect_tdls_discover(sdata->local, sdata); + drv_mgd_protect_tdls_discover(sdata->local, sdata, link_id); fallthrough; case WLAN_TDLS_SETUP_CONFIRM: case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: -- cgit v1.2.3 From 9ad08fb1bcfdebfe71f9485affacfc24dd1b486b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 28 Sep 2023 17:35:36 +0300 Subject: wifi: mac80211: fix a expired vs. cancel race in roc When the remain on channel is removed at the time it should have expired, we have a race: the driver could be handling the flow of the expiration while mac80211 is cancelling that very same remain on channel request. This wouldn't be problem in itself, but since mac80211 can send the next request to the driver in the cancellation flow, we can get to the following situation: CPU0 CPU1 expiration of roc in driver ieee80211_remain_on_channel_expired() Cancellation of the roc schedules a worker (hw_roc_done) Add next roc hw_roc_done_wk runs and ends the second roc prematurely. Since, by design, there is only one single request sent to the driver at a time, we can safely assume that after the cancel() request returns from the driver, we should not handle any worker that handles the expiration of the request. Cancel the hw_roc_done worker after the cancellation to make sure we start the next one with a clean slate. Signed-off-by: Emmanuel Grumbach Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.4e4469be20ac.Iab0525f5cc4698acf23eab98b8b1eec02099cde0@changeid Signed-off-by: Johannes Berg --- net/mac80211/offchannel.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'net') diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 071582dbe6a5..6c4080202573 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -717,6 +717,23 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, return ret; } + /* + * We could be racing against the notification from the driver: + * + driver is handling the notification on CPU0 + * + user space is cancelling the remain on channel and + * schedules the hw_roc_done worker. + * + * Now hw_roc_done might start to run after the next roc will + * start and mac80211 will think that this second roc has + * ended prematurely. + * Cancel the work to make sure that all the pending workers + * have completed execution. + * Note that this assumes that by the time the driver returns + * from drv_cancel_remain_on_channel, it has completed all + * the processing of related notifications. + */ + wiphy_work_cancel(local->hw.wiphy, &local->hw_roc_done); + /* TODO: * if multiple items were combined here then we really shouldn't * cancel them all - we should wait for as much time as needed -- cgit v1.2.3 From c7d91ccb442538fb75a55ac55b44a00d5bef2841 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Sep 2023 17:35:37 +0300 Subject: wifi: cfg80211: wext: convert return value to kernel-doc Since I'm getting a warning here right now, fix the kernel-doc to be "Returns:" rather than just writing that out in the doc paragraph. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.ab3b9274bf07.If263f9f6726d6ad4661f8603e6a4485e0385d67f@changeid Signed-off-by: Johannes Berg --- net/wireless/wext-compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index d23ce088bffa..2371069f3c43 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -7,7 +7,7 @@ * we directly assign the wireless handlers of wireless interfaces. * * Copyright 2008-2009 Johannes Berg - * Copyright (C) 2019-2022 Intel Corporation + * Copyright (C) 2019-2023 Intel Corporation */ #include @@ -227,7 +227,7 @@ EXPORT_WEXT_HANDLER(cfg80211_wext_giwrange); * cfg80211_wext_freq - get wext frequency for non-"auto" * @freq: the wext freq encoding * - * Returns a frequency, or a negative error code, or 0 for auto. + * Returns: a frequency, or a negative error code, or 0 for auto. */ int cfg80211_wext_freq(struct iw_freq *freq) { -- cgit v1.2.3 From 3831f6d8ce9c3c237a561219a2fb9c41ec800331 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 28 Sep 2023 17:35:38 +0300 Subject: wifi: mac80211: purge TX queues in flush_queues flow When this flow is invoked with the "drop" parameter as true, we only drop the frames from the hw queues, but not from the sw queues. So when we call wake_queues() after hw queue purging, all the frames from the sw queues will be TX'ed, when what we actually want to do is to purge all queues in order to not TX anything... This can cause, for example, TXing data frames to the peer after the deauth frame was sent. Fix this by purging the sw queues in addition to the hw queues if the drop parameter is true. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.8fc2ee23e56f.I8b3f6def9c28ea96261e2d31df8786986fb5385b@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/sta_info.c | 28 ++++++++++++++++++---------- net/mac80211/util.c | 13 +++++++++++++ 3 files changed, 32 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e92eaf835ee0..70b6870fe5b4 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2390,6 +2390,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, struct txq_info *txq, int tid); void ieee80211_txq_purge(struct ieee80211_local *local, struct txq_info *txqi); +void ieee80211_purge_sta_txqs(struct sta_info *sta); void ieee80211_txq_remove_vlan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); void ieee80211_fill_txq_stats(struct cfg80211_txq_stats *txqstats, diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ba36fc29e532..450700173422 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -113,6 +113,23 @@ static int link_sta_info_hash_del(struct ieee80211_local *local, &link_sta->link_hash_node, link_sta_rht_params); } +void ieee80211_purge_sta_txqs(struct sta_info *sta) +{ + struct ieee80211_local *local = sta->sdata->local; + int i; + + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { + struct txq_info *txqi; + + if (!sta->sta.txq[i]) + continue; + + txqi = to_txq_info(sta->sta.txq[i]); + + ieee80211_txq_purge(local, txqi); + } +} + static void __cleanup_single_sta(struct sta_info *sta) { int ac, i; @@ -139,16 +156,7 @@ static void __cleanup_single_sta(struct sta_info *sta) atomic_dec(&ps->num_sta_ps); } - for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { - struct txq_info *txqi; - - if (!sta->sta.txq[i]) - continue; - - txqi = to_txq_info(sta->sta.txq[i]); - - ieee80211_txq_purge(local, txqi); - } + ieee80211_purge_sta_txqs(sta); for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 98a3bffc6991..b6be18710441 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -693,6 +693,19 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, IEEE80211_QUEUE_STOP_REASON_FLUSH, false); + if (drop) { + struct sta_info *sta; + + /* Purge the queues, so the frames on them won't be + * sent during __ieee80211_wake_queue() + */ + list_for_each_entry(sta, &local->sta_list, list) { + if (sdata != sta->sdata) + continue; + ieee80211_purge_sta_txqs(sta); + } + } + drv_flush(local, sdata, queues, drop); ieee80211_wake_queues_by_reason(&local->hw, queues, -- cgit v1.2.3 From 06d6af4e1223339bb597b02fa8ad3f979ddb5511 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Sep 2023 17:35:39 +0300 Subject: wifi: mac80211: flush STA queues on unauthorization When the station is marked as no longer authorized, we shouldn't transmit to it any longer, but in particular we shouldn't be able to transmit to it after removing keys, which might lead to frames being sent out unencrypted depending on the exact hardware offload mechanism. Thus, instead of flushing only on station destruction, which covers only some cases, always flush on unauthorization. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.d47f528829e7.I96903652c7ee0c5c66891f8b2364383da8e45a1f@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 450700173422..0ba613dd1cc4 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1275,6 +1275,8 @@ static int _sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state, bool recalc) { + struct ieee80211_local *local = sta->local; + might_sleep(); if (sta->sta_state == new_state) @@ -1350,6 +1352,24 @@ static int _sta_info_move_state(struct sta_info *sta, } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { ieee80211_vif_dec_num_mcast(sta->sdata); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + + /* + * If we have encryption offload, flush (station) queues + * (after ensuring concurrent TX completed) so we won't + * transmit anything later unencrypted if/when keys are + * also removed, which might otherwise happen depending + * on how the hardware offload works. + */ + if (local->ops->set_key) { + synchronize_net(); + if (local->ops->flush_sta) + drv_flush_sta(local, sta->sdata, sta); + else + ieee80211_flush_queues(local, + sta->sdata, + false); + } + ieee80211_clear_fast_xmit(sta); ieee80211_clear_fast_rx(sta); } @@ -1415,18 +1435,6 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc) WARN_ON_ONCE(ret); } - /* Flush queues before removing keys, as that might remove them - * from hardware, and then depending on the offload method, any - * frames sitting on hardware queues might be sent out without - * any encryption at all. - */ - if (local->ops->set_key) { - if (local->ops->flush_sta) - drv_flush_sta(local, sta->sdata, sta); - else - ieee80211_flush_queues(local, sta->sdata, false); - } - /* now keys can no longer be reached */ ieee80211_free_sta_keys(local, sta); -- cgit v1.2.3 From e433304ab437a6edff6b666246f7251d9a596b91 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 4 Oct 2023 12:12:02 +0300 Subject: wifi: mac80211: Check if we had first beacon with relevant links If there is a disassoc before the fisrt beacon we need to protect a session for the deauth frame. Currently we are checking if we had a beacon in the default link, which is wrong in a MLO connection and link id != 0. Fix this by checking all the active links, if none had a beacon then protect a session. If at least one link had a beacon there is no need for session protection. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20231004120820.d290f0ab77b0.Ic1505cf3d60f74580d31efa7e52046947c490b85@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3518c0808897..e71c5129cc8b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2936,9 +2936,20 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, * deauthentication frame by calling mgd_prepare_tx, if the * driver requested so. */ - if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) && - !sdata->deflink.u.mgd.have_beacon) { - drv_mgd_prepare_tx(sdata->local, sdata, &info); + if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP)) { + for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); + link_id++) { + struct ieee80211_link_data *link; + + link = sdata_dereference(sdata->link[link_id], + sdata); + if (!link) + continue; + if (link->u.mgd.have_beacon) + break; + } + if (link_id == IEEE80211_MLD_MAX_NUM_LINKS) + drv_mgd_prepare_tx(sdata->local, sdata, &info); } ieee80211_send_deauth_disassoc(sdata, sdata->vif.cfg.ap_addr, -- cgit v1.2.3 From e76f3b4a73ea60ef098c5762b2aef4d11e094a04 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 28 Sep 2023 17:35:34 +0300 Subject: wifi: mac80211: add link id to mgd_prepare_tx() As we are moving to MLO and links terms, also the airtime protection will be done for a link rather than for a vif. Thus, some drivers will need to know for which link to protect airtime. Add link id as a parameter to the mgd_prepare_tx() callback. Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230928172905.c7fc59a6780b.Ic88a5037d31e184a2dce0b031ece1a0a93a3a9da@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 +++ net/mac80211/driver-ops.h | 1 + net/mac80211/mlme.c | 9 ++++++++- 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0ce5b0831884..c839a04ad9db 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3641,11 +3641,14 @@ enum ieee80211_reconfig_type { * @success: whether the frame exchange was successful, only * used with the mgd_complete_tx() method, and then only * valid for auth and (re)assoc. + * @link_id: the link id on which the frame will be TX'ed. + * Only used with the mgd_prepare_tx() method. */ struct ieee80211_prep_tx_info { u16 duration; u16 subtype; u8 success:1; + int link_id; }; /** diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index d92de4cd960b..568633b38c47 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -928,6 +928,7 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, return; WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); + info->link_id = info->link_id < 0 ? 0 : info->link_id; trace_drv_mgd_prepare_tx(local, sdata, info->duration, info->subtype, info->success); if (local->ops->mgd_prepare_tx) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e71c5129cc8b..54a9f6db799e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1584,6 +1584,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) ifmgd->assoc_req_ies_len = pos - ie_start; + info.link_id = assoc_data->assoc_link_id; drv_mgd_prepare_tx(local, sdata, &info); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; @@ -2948,8 +2949,10 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, if (link->u.mgd.have_beacon) break; } - if (link_id == IEEE80211_MLD_MAX_NUM_LINKS) + if (link_id == IEEE80211_MLD_MAX_NUM_LINKS) { + info.link_id = ffs(sdata->vif.active_links) - 1; drv_mgd_prepare_tx(sdata->local, sdata, &info); + } } ieee80211_send_deauth_disassoc(sdata, sdata->vif.cfg.ap_addr, @@ -3577,6 +3580,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, u32 tx_flags = 0; struct ieee80211_prep_tx_info info = { .subtype = IEEE80211_STYPE_AUTH, + .link_id = auth_data->link_id, }; pos = mgmt->u.auth.variable; @@ -6569,6 +6573,7 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata) if (auth_data->algorithm == WLAN_AUTH_SAE) info.duration = jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE); + info.link_id = auth_data->link_id; drv_mgd_prepare_tx(local, sdata, &info); sdata_info(sdata, "send auth to %pM (try %d/%d)\n", @@ -7989,6 +7994,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->bssid, req->reason_code, ieee80211_get_reason_code_string(req->reason_code)); + info.link_id = ifmgd->auth_data->link_id; drv_mgd_prepare_tx(sdata->local, sdata, &info); ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid, IEEE80211_STYPE_DEAUTH, @@ -8009,6 +8015,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->bssid, req->reason_code, ieee80211_get_reason_code_string(req->reason_code)); + info.link_id = ifmgd->assoc_data->assoc_link_id; drv_mgd_prepare_tx(sdata->local, sdata, &info); ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid, IEEE80211_STYPE_DEAUTH, -- cgit v1.2.3 From 89141f965325de9aac3805cd506df788179b2809 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 4 Oct 2023 18:30:29 +0300 Subject: wifi: remove unused argument of ieee80211_get_tdls_action() Remove unused 'hdr_size' argument of 'ieee80211_get_tdls_action()' and adjust 'ieee80211_report_used_skb()' accordingly. Signed-off-by: Dmitry Antipov Link: https://lore.kernel.org/r/20231004153032.206134-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +-- net/mac80211/status.c | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 62b4469c6866..045a776ee547 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4490,12 +4490,11 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, /** * ieee80211_get_tdls_action - get tdls packet action (or -1, if not tdls packet) * @skb: the skb containing the frame, length will not be checked - * @hdr_size: the size of the ieee80211_hdr that starts at skb->data * * This function assumes the frame is a data frame, and that the network header * is in the correct place. */ -static inline int ieee80211_get_tdls_action(struct sk_buff *skb, u32 hdr_size) +static inline int ieee80211_get_tdls_action(struct sk_buff *skb) { if (!skb_is_nonlinear(skb) && skb->len > (skb_network_offset(skb) + 2)) { diff --git a/net/mac80211/status.c b/net/mac80211/status.c index f67eafada741..807cdab38d5e 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -731,12 +731,9 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, if (!sdata) { skb->dev = NULL; } else if (!dropped) { - unsigned int hdr_size = - ieee80211_hdrlen(hdr->frame_control); - /* Check to see if packet is a TDLS teardown packet */ if (ieee80211_is_data(hdr->frame_control) && - (ieee80211_get_tdls_action(skb, hdr_size) == + (ieee80211_get_tdls_action(skb) == WLAN_TDLS_TEARDOWN)) { ieee80211_tdls_td_tx_handle(local, sdata, skb, info->flags); -- cgit v1.2.3 From 9118796dfa67a58d17281e019acab4f651eb8dfa Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 9 Oct 2023 11:59:41 -0600 Subject: wifi: mac80211: Add __counted_by for struct ieee802_11_elems and use struct_size() Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). While there, use struct_size() helper, instead of the open-coded version, to calculate the size for the allocation of the whole flexible structure including, of course, the flexible-array member. This code was found with the help of Coccinelle, and audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/ZSQ/jcmTAf/PKHg/@work Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/util.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 70b6870fe5b4..84df104f272b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1746,7 +1746,7 @@ struct ieee802_11_elems { */ size_t scratch_len; u8 *scratch_pos; - u8 scratch[]; + u8 scratch[] __counted_by(scratch_len); }; static inline struct ieee80211_local *hw_to_local( diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b6be18710441..ed680120d5a7 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1625,7 +1625,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) int nontransmitted_profile_len = 0; size_t scratch_len = 3 * params->len; - elems = kzalloc(sizeof(*elems) + scratch_len, GFP_ATOMIC); + elems = kzalloc(struct_size(elems, scratch, scratch_len), GFP_ATOMIC); if (!elems) return NULL; elems->ie_start = params->start; -- cgit v1.2.3 From 74a7c93f45abba538914a65dd2ef2ea7cf7150e2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Oct 2023 12:34:47 +0200 Subject: wifi: mac80211: fix change_address deadlock during unregister When using e.g. bonding, and doing a sequence such as # iw wlan0 set type __ap # ip link add name bond1 type bond # ip link set wlan0 master bond1 # iw wlan0 interface del we deadlock, since the wlan0 interface removal will cause bonding to reset the MAC address of wlan0. The locking would be somewhat difficult to fix, but since this only happens during removal, we can simply ignore the MAC address change at this time. Reported-by: syzbot+25b3a0b24216651bc2af@syzkaller.appspotmail.com Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20231012123447.9f9d7fd1f237.Ic3a5ef4391b670941a69cec5592aefc79d9c2890@changeid Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 124cc53f6b34..e4e7c0b38cb6 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -298,6 +298,14 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) struct ieee80211_local *local = sdata->local; int ret; + /* + * This happens during unregistration if there's a bond device + * active (maybe other cases?) and we must get removed from it. + * But we really don't care anymore if it's not registered now. + */ + if (!dev->ieee80211_ptr->registered) + return 0; + wiphy_lock(local->hw.wiphy); ret = _ieee80211_change_mac(sdata, addr); wiphy_unlock(local->hw.wiphy); -- cgit v1.2.3 From 2703bc8513996e848b5aefa2deb1ff3baae5d79b Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Thu, 12 Oct 2023 14:42:28 +0300 Subject: wifi: mac80211: rename ieee80211_tx_status() to ieee80211_tx_status_skb() make htmldocs warns: Documentation/driver-api/80211/mac80211:109: ./include/net/mac80211.h:5170: WARNING: Duplicate C declaration, also defined at mac80211:1117. Declaration is '.. c:function:: void ieee80211_tx_status (struct ieee80211_hw *hw, struct sk_buff *skb)'. This is because there's a function named ieee80211_tx_status() and a struct named ieee80211_tx_status. This has been discussed previously but no solution found: https://lore.kernel.org/all/20220521114629.6ee9fc06@coco.lan/ There's also a bug open for three years with no solution in sight: https://github.com/sphinx-doc/sphinx/pull/8313 So I guess we have no other solution than to a workaround this in the code, for example to rename the function to ieee80211_tx_status_skb() to avoid the name conflict. I got the idea for the name from ieee80211_tx_status_noskb() in which the skb is not provided as an argument, instead with ieee80211_tx_status_skb() the skb is provided. Compile tested only. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231012114229.2931808-2-kvalo@kernel.org Signed-off-by: Johannes Berg --- Documentation/driver-api/80211/mac80211.rst | 2 +- drivers/net/wireless/ath/ath12k/dp_tx.c | 4 ++-- drivers/net/wireless/ath/ath5k/base.c | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 2 +- drivers/net/wireless/ath/ath9k/xmit.c | 2 +- drivers/net/wireless/broadcom/b43/dma.c | 4 ++-- drivers/net/wireless/broadcom/b43/pio.c | 2 +- drivers/net/wireless/intel/iwlwifi/dvm/tx.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 4 ++-- drivers/net/wireless/mediatek/mt7601u/tx.c | 2 +- drivers/net/wireless/ralink/rt2x00/rt2x00dev.c | 2 +- drivers/net/wireless/st/cw1200/txrx.c | 2 +- drivers/net/wireless/ti/wl1251/tx.c | 6 +++--- include/net/mac80211.h | 30 +++++++++++++------------- net/mac80211/main.c | 2 +- net/mac80211/status.c | 4 ++-- 16 files changed, 37 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/Documentation/driver-api/80211/mac80211.rst b/Documentation/driver-api/80211/mac80211.rst index 67d2e58b45e4..e38a220401f5 100644 --- a/Documentation/driver-api/80211/mac80211.rst +++ b/Documentation/driver-api/80211/mac80211.rst @@ -120,7 +120,7 @@ functions/definitions ieee80211_rx ieee80211_rx_ni ieee80211_rx_irqsafe - ieee80211_tx_status + ieee80211_tx_status_skb ieee80211_tx_status_ni ieee80211_tx_status_irqsafe ieee80211_rts_get diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c index f5e0f5426226..492ca6ce6714 100644 --- a/drivers/net/wireless/ath/ath12k/dp_tx.c +++ b/drivers/net/wireless/ath/ath12k/dp_tx.c @@ -401,7 +401,7 @@ ath12k_dp_tx_htt_tx_complete_buf(struct ath12k_base *ab, } } - ieee80211_tx_status(ar->hw, msdu); + ieee80211_tx_status_skb(ar->hw, msdu); } static void @@ -498,7 +498,7 @@ static void ath12k_dp_tx_complete_msdu(struct ath12k *ar, * Might end up reporting it out-of-band from HTT stats. */ - ieee80211_tx_status(ar->hw, msdu); + ieee80211_tx_status_skb(ar->hw, msdu); exit: rcu_read_unlock(); diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 597d1f916dfd..9f534ed2fbb3 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -1770,7 +1770,7 @@ ath5k_tx_frame_completed(struct ath5k_hw *ah, struct sk_buff *skb, ah->stats.antenna_tx[0]++; /* invalid */ trace_ath5k_tx_complete(ah, skb, txq, ts); - ieee80211_tx_status(ah->hw, skb); + ieee80211_tx_status_skb(ah->hw, skb); } static void diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index 672789e3c55d..800177021baf 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -523,7 +523,7 @@ send_mac80211: } /* Send status to mac80211 */ - ieee80211_tx_status(priv->hw, skb); + ieee80211_tx_status_skb(priv->hw, skb); } static inline void ath9k_htc_tx_drainq(struct ath9k_htc_priv *priv, diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 4e939dcac1c9..f15684379b03 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -94,7 +94,7 @@ static void ath_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & (IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_STATUS_EOSP)) { - ieee80211_tx_status(hw, skb); + ieee80211_tx_status_skb(hw, skb); return; } diff --git a/drivers/net/wireless/broadcom/b43/dma.c b/drivers/net/wireless/broadcom/b43/dma.c index 9a7c62bd5e43..760d1a28edc6 100644 --- a/drivers/net/wireless/broadcom/b43/dma.c +++ b/drivers/net/wireless/broadcom/b43/dma.c @@ -1531,9 +1531,9 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev, ring->nr_failed_tx_packets++; ring->nr_total_packet_tries += status->frame_count; #endif /* DEBUG */ - ieee80211_tx_status(dev->wl->hw, meta->skb); + ieee80211_tx_status_skb(dev->wl->hw, meta->skb); - /* skb will be freed by ieee80211_tx_status(). + /* skb will be freed by ieee80211_tx_status_skb(). * Poison our pointer. */ meta->skb = B43_DMA_PTR_POISON; } else { diff --git a/drivers/net/wireless/broadcom/b43/pio.c b/drivers/net/wireless/broadcom/b43/pio.c index 8c28a9250cd1..0cf70fdb60a6 100644 --- a/drivers/net/wireless/broadcom/b43/pio.c +++ b/drivers/net/wireless/broadcom/b43/pio.c @@ -582,7 +582,7 @@ void b43_pio_handle_txstatus(struct b43_wldev *dev, q->buffer_used -= total_len; q->free_packet_slots += 1; - ieee80211_tx_status(dev->wl->hw, pack->skb); + ieee80211_tx_status_skb(dev->wl->hw, pack->skb); pack->skb = NULL; list_add(&pack->list, &q->packets_list); diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c index 60a7b61d59aa..b0322af8e081 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/tx.c @@ -1247,7 +1247,7 @@ void iwlagn_rx_reply_tx(struct iwl_priv *priv, struct iwl_rx_cmd_buffer *rxb) while (!skb_queue_empty(&skbs)) { skb = __skb_dequeue(&skbs); - ieee80211_tx_status(priv->hw, skb); + ieee80211_tx_status_skb(priv->hw, skb); } } @@ -1384,6 +1384,6 @@ void iwlagn_rx_reply_compressed_ba(struct iwl_priv *priv, while (!skb_queue_empty(&reclaimed_skbs)) { skb = __skb_dequeue(&reclaimed_skbs); - ieee80211_tx_status(priv->hw, skb); + ieee80211_tx_status_skb(priv->hw, skb); } } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index b0f3d51a7613..73c5f1094a75 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1724,7 +1724,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, RS_DRV_DATA_PACK(lq_color, tx_resp->reduced_tpc); if (likely(!iwl_mvm_time_sync_frame(mvm, skb, hdr->addr1))) - ieee80211_tx_status(mvm->hw, skb); + ieee80211_tx_status_skb(mvm->hw, skb); } /* This is an aggregation queue or might become one, so we use @@ -2080,7 +2080,7 @@ out: while (!skb_queue_empty(&reclaimed_skbs)) { skb = __skb_dequeue(&reclaimed_skbs); - ieee80211_tx_status(mvm->hw, skb); + ieee80211_tx_status_skb(mvm->hw, skb); } } diff --git a/drivers/net/wireless/mediatek/mt7601u/tx.c b/drivers/net/wireless/mediatek/mt7601u/tx.c index 51d977ffc52f..5aeeac0dd9fe 100644 --- a/drivers/net/wireless/mediatek/mt7601u/tx.c +++ b/drivers/net/wireless/mediatek/mt7601u/tx.c @@ -110,7 +110,7 @@ void mt7601u_tx_status(struct mt7601u_dev *dev, struct sk_buff *skb) info->flags |= IEEE80211_TX_STAT_ACK; spin_lock_bh(&dev->mac_lock); - ieee80211_tx_status(dev->hw, skb); + ieee80211_tx_status_skb(dev->hw, skb); spin_unlock_bh(&dev->mac_lock); } diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c index 9a9cfd0ce402..c88ce446e117 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c @@ -533,7 +533,7 @@ void rt2x00lib_txdone(struct queue_entry *entry, */ if (!(skbdesc_flags & SKBDESC_NOT_MAC80211)) { if (rt2x00_has_cap_flag(rt2x00dev, REQUIRE_TASKLET_CONTEXT)) - ieee80211_tx_status(rt2x00dev->hw, entry->skb); + ieee80211_tx_status_skb(rt2x00dev->hw, entry->skb); else ieee80211_tx_status_ni(rt2x00dev->hw, entry->skb); } else { diff --git a/drivers/net/wireless/st/cw1200/txrx.c b/drivers/net/wireless/st/cw1200/txrx.c index e16e9ae90d20..084d52b11f5b 100644 --- a/drivers/net/wireless/st/cw1200/txrx.c +++ b/drivers/net/wireless/st/cw1200/txrx.c @@ -994,7 +994,7 @@ void cw1200_skb_dtor(struct cw1200_common *priv, txpriv->raw_link_id, txpriv->tid); tx_policy_put(priv, txpriv->rate_id); } - ieee80211_tx_status(priv->hw, skb); + ieee80211_tx_status_skb(priv->hw, skb); } void cw1200_rx_cb(struct cw1200_common *priv, diff --git a/drivers/net/wireless/ti/wl1251/tx.c b/drivers/net/wireless/ti/wl1251/tx.c index e9dc3c72bb11..474b603c121c 100644 --- a/drivers/net/wireless/ti/wl1251/tx.c +++ b/drivers/net/wireless/ti/wl1251/tx.c @@ -434,7 +434,7 @@ static void wl1251_tx_packet_cb(struct wl1251 *wl, result->status, wl1251_tx_parse_status(result->status)); - ieee80211_tx_status(wl->hw, skb); + ieee80211_tx_status_skb(wl->hw, skb); wl->tx_frames[result->id] = NULL; } @@ -566,7 +566,7 @@ void wl1251_tx_flush(struct wl1251 *wl) if (!(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS)) continue; - ieee80211_tx_status(wl->hw, skb); + ieee80211_tx_status_skb(wl->hw, skb); } for (i = 0; i < FW_TX_CMPLT_BLOCK_SIZE; i++) @@ -577,7 +577,7 @@ void wl1251_tx_flush(struct wl1251 *wl) if (!(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS)) continue; - ieee80211_tx_status(wl->hw, skb); + ieee80211_tx_status_skb(wl->hw, skb); wl->tx_frames[i] = NULL; } } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 511d5d1c042f..580781ff9dcf 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4911,7 +4911,7 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw); * for a single hardware must be synchronized against each other. Calls to * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be * mixed for a single hardware. Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * This function must be called with BHs disabled and RCU read lock * @@ -4936,7 +4936,7 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta, * for a single hardware must be synchronized against each other. Calls to * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be * mixed for a single hardware. Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * This function must be called with BHs disabled. * @@ -4961,7 +4961,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *sta, * for a single hardware must be synchronized against each other. Calls to * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be * mixed for a single hardware. Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * In process context use instead ieee80211_rx_ni(). * @@ -4981,7 +4981,7 @@ static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) * * Calls to this function, ieee80211_rx() or ieee80211_rx_ni() may not * be mixed for a single hardware.Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * @hw: the hardware this frame came in on * @skb: the buffer to receive, owned by mac80211 after this call @@ -4996,7 +4996,7 @@ void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb); * * Calls to this function, ieee80211_rx() and ieee80211_rx_irqsafe() may * not be mixed for a single hardware. Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * @hw: the hardware this frame came in on * @skb: the buffer to receive, owned by mac80211 after this call @@ -5172,7 +5172,7 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw, struct ieee80211_tx_info *info); /** - * ieee80211_tx_status - transmit status callback + * ieee80211_tx_status_skb - transmit status callback * * Call this function for all transmitted frames after they have been * transmitted. It is permissible to not call this function for @@ -5187,13 +5187,13 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw, * @hw: the hardware the frame was transmitted by * @skb: the frame that was transmitted, owned by mac80211 after this call */ -void ieee80211_tx_status(struct ieee80211_hw *hw, - struct sk_buff *skb); +void ieee80211_tx_status_skb(struct ieee80211_hw *hw, + struct sk_buff *skb); /** * ieee80211_tx_status_ext - extended transmit status callback * - * This function can be used as a replacement for ieee80211_tx_status + * This function can be used as a replacement for ieee80211_tx_status_skb() * in drivers that may want to provide extra information that does not * fit into &struct ieee80211_tx_info. * @@ -5210,7 +5210,7 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, /** * ieee80211_tx_status_noskb - transmit status callback without skb * - * This function can be used as a replacement for ieee80211_tx_status + * This function can be used as a replacement for ieee80211_tx_status_skb() * in drivers that cannot reliably map tx status information back to * specific skbs. * @@ -5238,9 +5238,9 @@ static inline void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, /** * ieee80211_tx_status_ni - transmit status callback (in process context) * - * Like ieee80211_tx_status() but can be called in process context. + * Like ieee80211_tx_status_skb() but can be called in process context. * - * Calls to this function, ieee80211_tx_status() and + * Calls to this function, ieee80211_tx_status_skb() and * ieee80211_tx_status_irqsafe() may not be mixed * for a single hardware. * @@ -5251,17 +5251,17 @@ static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw, struct sk_buff *skb) { local_bh_disable(); - ieee80211_tx_status(hw, skb); + ieee80211_tx_status_skb(hw, skb); local_bh_enable(); } /** * ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback * - * Like ieee80211_tx_status() but can be called in IRQ context + * Like ieee80211_tx_status_skb() but can be called in IRQ context * (internally defers to a tasklet.) * - * Calls to this function, ieee80211_tx_status() and + * Calls to this function, ieee80211_tx_status_skb() and * ieee80211_tx_status_ni() may not be mixed for a single hardware. * * @hw: the hardware the frame was transmitted by diff --git a/net/mac80211/main.c b/net/mac80211/main.c index b46f4d733c5d..033a5261ac3a 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -319,7 +319,7 @@ static void ieee80211_tasklet_handler(struct tasklet_struct *t) break; case IEEE80211_TX_STATUS_MSG: skb->pkt_type = 0; - ieee80211_tx_status(&local->hw, skb); + ieee80211_tx_status_skb(&local->hw, skb); break; default: WARN(1, "mac80211: Packet is of unknown type %d\n", diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 807cdab38d5e..1708b33cdc5e 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -1092,7 +1092,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, send_to_cooked, status); } -void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) +void ieee80211_tx_status_skb(struct ieee80211_hw *hw, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_local *local = hw_to_local(hw); @@ -1111,7 +1111,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) ieee80211_tx_status_ext(hw, &status); rcu_read_unlock(); } -EXPORT_SYMBOL(ieee80211_tx_status); +EXPORT_SYMBOL(ieee80211_tx_status_skb); void ieee80211_tx_status_ext(struct ieee80211_hw *hw, struct ieee80211_tx_status *status) -- cgit v1.2.3 From 8e4687f6061ec00d16b06cb2ed6d2593cae19c46 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Thu, 12 Oct 2023 14:42:29 +0300 Subject: wifi: mac80211: rename struct cfg80211_rx_assoc_resp to cfg80211_rx_assoc_resp_data make htmldocs warns: Documentation/driver-api/80211/cfg80211:48: ./include/net/cfg80211.h:7290: WARNING: Duplicate C declaration, also defined at cfg80211:7251. Declaration is '.. c:function:: void cfg80211_rx_assoc_resp (struct net_device *dev, struct cfg80211_rx_assoc_resp *data)'. This is because there's a function named cfg80211_rx_assoc_resp() and a struct named cfg80211_rx_assoc_resp, see previous patch for more info. To workaround this rename the struct to cfg80211_rx_assoc_resp_data. The parameter for the function is named 'data' anyway so the naming here is consistent. Compile tested only. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231012114229.2931808-3-kvalo@kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++---- net/mac80211/mlme.c | 2 +- net/wireless/mlme.c | 2 +- net/wireless/nl80211.c | 2 +- net/wireless/nl80211.h | 2 +- net/wireless/trace.h | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ff4ab88393c3..0e7abb083a77 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7269,7 +7269,7 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len); void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); /** - * struct cfg80211_rx_assoc_resp - association response data + * struct cfg80211_rx_assoc_resp_data - association response data * @bss: the BSS that association was requested with, ownership of the pointer * moves to cfg80211 in the call to cfg80211_rx_assoc_resp() * @buf: (Re)Association Response frame (header + body) @@ -7284,7 +7284,7 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); * @links.status: Set this (along with a BSS pointer) for links that * were rejected by the AP. */ -struct cfg80211_rx_assoc_resp { +struct cfg80211_rx_assoc_resp_data { const u8 *buf; size_t len; const u8 *req_ies; @@ -7301,7 +7301,7 @@ struct cfg80211_rx_assoc_resp { /** * cfg80211_rx_assoc_resp - notification of processed association response * @dev: network device - * @data: association response data, &struct cfg80211_rx_assoc_resp + * @data: association response data, &struct cfg80211_rx_assoc_resp_data * * After being asked to associate via cfg80211_ops::assoc() the driver must * call either this function or cfg80211_auth_timeout(). @@ -7309,7 +7309,7 @@ struct cfg80211_rx_assoc_resp { * This function may sleep. The caller must hold the corresponding wdev's mutex. */ void cfg80211_rx_assoc_resp(struct net_device *dev, - struct cfg80211_rx_assoc_resp *data); + struct cfg80211_rx_assoc_resp_data *data); /** * struct cfg80211_assoc_failure - association failure data diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 54a9f6db799e..887b496f2b81 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5281,7 +5281,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, .u.mlme.data = ASSOC_EVENT, }; struct ieee80211_prep_tx_info info = {}; - struct cfg80211_rx_assoc_resp resp = { + struct cfg80211_rx_assoc_resp_data resp = { .uapsd_queues = -1, }; u8 ap_mld_addr[ETH_ALEN] __aligned(2); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 579fea2f3548..f90f58c65688 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -22,7 +22,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, - struct cfg80211_rx_assoc_resp *data) + struct cfg80211_rx_assoc_resp_data *data) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2650543dcebe..d468e015d828 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -17799,7 +17799,7 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, - struct cfg80211_rx_assoc_resp *data) + struct cfg80211_rx_assoc_resp_data *data) { nl80211_send_mlme_event(rdev, netdev, data->buf, data->len, NL80211_CMD_ASSOCIATE, GFP_KERNEL, diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index b4af53f9b227..3edd53c38912 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -60,7 +60,7 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, const u8 *buf, size_t len, gfp_t gfp); void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, - struct cfg80211_rx_assoc_resp *data); + struct cfg80211_rx_assoc_resp_data *data); void nl80211_send_deauth(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, diff --git a/net/wireless/trace.h b/net/wireless/trace.h index f6667bf3fd12..30cd1bd58aac 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2928,7 +2928,7 @@ DEFINE_EVENT(netdev_evt_only, cfg80211_send_rx_auth, TRACE_EVENT(cfg80211_send_rx_assoc, TP_PROTO(struct net_device *netdev, - struct cfg80211_rx_assoc_resp *data), + struct cfg80211_rx_assoc_resp_data *data), TP_ARGS(netdev, data), TP_STRUCT__entry( NETDEV_ENTRY -- cgit v1.2.3 From e5dfb9416b6eecb19a3ee0277b0432aa0f9b9f7c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Oct 2023 09:38:28 +0200 Subject: wifi: mac80211: fix another key installation error path Due to overlapping changes and merges, another error path ended up broken. Fix this one as well. Reported-by: Jakub Kicinski Signed-off-by: Johannes Berg --- net/mac80211/key.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/key.c b/net/mac80211/key.c index e0ff3a753e15..af74d7f9d94d 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -881,8 +881,10 @@ int ieee80211_key_link(struct ieee80211_key *key, if (link_id >= 0) { link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&sta->local->hw.wiphy->mtx)); - if (!link_sta) - return -ENOLINK; + if (!link_sta) { + ret = -ENOLINK; + goto out; + } } old_key = wiphy_dereference(sdata->local->hw.wiphy, -- cgit v1.2.3 From e4e7e3af73694380f0d9a742d13b80598a3393e9 Mon Sep 17 00:00:00 2001 From: Vinayak Yadawad Date: Tue, 26 Sep 2023 16:02:22 +0530 Subject: wifi: cfg80211: Allow AP/P2PGO to indicate port authorization to peer STA/P2PClient In 4way handshake offload, cfg80211_port_authorized enables driver to indicate successful 4way handshake to cfg80211 layer. Currently this path of port authorization is restricted to interface type NL80211_IFTYPE_STATION and NL80211_IFTYPE_P2P_CLIENT. This patch extends the support for NL80211_IFTYPE_AP and NL80211_IFTYPE_P2P_GO interfaces to authorize peer STA/P2P_CLIENT, whenever authentication is offloaded on the AP/P2P_GO interface. Signed-off-by: Vinayak Yadawad Link: https://lore.kernel.org/r/dee3b0a2b4f617e932c90bff4504a89389273632.1695721435.git.vinayak.yadawad@broadcom.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++++-- net/wireless/core.h | 4 ++-- net/wireless/nl80211.c | 4 ++-- net/wireless/nl80211.h | 5 ++++- net/wireless/sme.c | 23 ++++++++++++++--------- net/wireless/util.c | 2 +- 6 files changed, 29 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0e7abb083a77..b137a33a1b68 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8028,7 +8028,8 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, * cfg80211_port_authorized - notify cfg80211 of successful security association * * @dev: network device - * @bssid: the BSSID of the AP + * @peer_addr: BSSID of the AP/P2P GO in case of STA/GC or STA/GC MAC address + * in case of AP/P2P GO * @td_bitmap: transition disable policy * @td_bitmap_len: Length of transition disable policy * @gfp: allocation flags @@ -8039,8 +8040,11 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, * should be preceded with a call to cfg80211_connect_result(), * cfg80211_connect_done(), cfg80211_connect_bss() or cfg80211_roamed() to * indicate the 802.11 association. + * This function can also be called by AP/P2P GO driver that supports + * authentication offload. In this case the peer_mac passed is that of + * associated STA/GC. */ -void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, +void cfg80211_port_authorized(struct net_device *dev, const u8 *peer_addr, const u8* td_bitmap, u8 td_bitmap_len, gfp_t gfp); /** diff --git a/net/wireless/core.h b/net/wireless/core.h index 79b1c6d17847..4c692c7faf30 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -270,7 +270,7 @@ struct cfg80211_event { struct ieee80211_channel *channel; } ij; struct { - u8 bssid[ETH_ALEN]; + u8 peer_addr[ETH_ALEN]; const u8 *td_bitmap; u8 td_bitmap_len; } pa; @@ -404,7 +404,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, bool wextev); void __cfg80211_roamed(struct wireless_dev *wdev, struct cfg80211_roam_info *info); -void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid, +void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len); int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d468e015d828..56fd7cf2563b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -18124,7 +18124,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, } void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *bssid, + struct net_device *netdev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len) { struct sk_buff *msg; @@ -18142,7 +18142,7 @@ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer_addr)) goto nla_put_failure; if ((td_bitmap_len > 0) && td_bitmap) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 3edd53c38912..aad40240d9cb 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -82,8 +82,11 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, void nl80211_send_roamed(struct cfg80211_registered_device *rdev, struct net_device *netdev, struct cfg80211_roam_info *info, gfp_t gfp); +/* For STA/GC, indicate port authorized with AP/GO bssid. + * For GO/AP, use peer GC/STA mac_addr. + */ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *bssid, + struct net_device *netdev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len); void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 50fcb27e6dab..acfe66da7109 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1281,24 +1281,29 @@ out: } EXPORT_SYMBOL(cfg80211_roamed); -void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid, +void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len) { lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && - wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT && + wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO)) return; - if (WARN_ON(!wdev->connected) || - WARN_ON(!ether_addr_equal(wdev->u.client.connected_addr, bssid))) - return; + if (wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) { + if (WARN_ON(!wdev->connected) || + WARN_ON(!ether_addr_equal(wdev->u.client.connected_addr, peer_addr))) + return; + } nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev, - bssid, td_bitmap, td_bitmap_len); + peer_addr, td_bitmap, td_bitmap_len); } -void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, +void cfg80211_port_authorized(struct net_device *dev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -1306,7 +1311,7 @@ void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, struct cfg80211_event *ev; unsigned long flags; - if (WARN_ON(!bssid)) + if (WARN_ON(!peer_addr)) return; ev = kzalloc(sizeof(*ev) + td_bitmap_len, gfp); @@ -1314,7 +1319,7 @@ void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, return; ev->type = EVENT_PORT_AUTHORIZED; - memcpy(ev->pa.bssid, bssid, ETH_ALEN); + memcpy(ev->pa.peer_addr, peer_addr, ETH_ALEN); ev->pa.td_bitmap = ((u8 *)ev) + sizeof(*ev); ev->pa.td_bitmap_len = td_bitmap_len; memcpy((void *)ev->pa.td_bitmap, td_bitmap, td_bitmap_len); diff --git a/net/wireless/util.c b/net/wireless/util.c index 0893b7f57832..626b858b4b35 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1062,7 +1062,7 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); break; case EVENT_PORT_AUTHORIZED: - __cfg80211_port_authorized(wdev, ev->pa.bssid, + __cfg80211_port_authorized(wdev, ev->pa.peer_addr, ev->pa.td_bitmap, ev->pa.td_bitmap_len); break; -- cgit v1.2.3 From f3bd5932780091e214959ffdb1d91032ea4744be Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 1 Oct 2023 13:01:09 +0300 Subject: wifi: mac80211: drop robust action frames before assoc To be able to more easily understand the code, drop robust action frames before being associated, even if there's no MFP in the end, as they are Class 3 Frames and shouldn't be transmitted in the first place. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20231001125722.b2fd37083371.Ie9f4906e2f6c698989bce6681956ed2f9454f27c@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 051db97a92b4..6f679d2c0409 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2474,6 +2474,15 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) return RX_DROP_U_UNPROT_UNICAST_PUB_ACTION; } + /* + * Drop robust action frames before assoc regardless of MFP state, + * after assoc we also have decided on MFP or not. + */ + if (ieee80211_is_action(fc) && + ieee80211_is_robust_mgmt_frame(rx->skb) && + (!rx->sta || !test_sta_flag(rx->sta, WLAN_STA_ASSOC))) + return RX_DROP_U_UNPROT_ROBUST_ACTION; + return RX_CONTINUE; } -- cgit v1.2.3 From b7c4f5730a9fa258c8e79f6387a03f3a95c681a2 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 20 Oct 2023 16:00:55 +0200 Subject: tls: don't reset prot->aad_size and prot->tail_size for TLS_HW Prior to commit 1a074f7618e8 ("tls: also use init_prot_info in tls_set_device_offload"), setting TLS_HW on TX didn't touch prot->aad_size and prot->tail_size. They are set to 0 during context allocation (tls_prot_info is embedded in tls_context, kzalloc'd by tls_ctx_create). When the RX key is configured, tls_set_sw_offload is called (for both TLS_SW and TLS_HW). If the TX key is configured in TLS_HW mode after the RX key has been installed, init_prot_info will now overwrite the correct values of aad_size and tail_size, breaking SW decryption and causing -EBADMSG errors to be returned to userspace. Since TLS_HW doesn't use aad_size and tail_size at all (for TLS1.2, tail_size is always 0, and aad_size is equal to TLS_HEADER_SIZE + rec_seq_size), we can simply drop this hunk. Fixes: 1a074f7618e8 ("tls: also use init_prot_info in tls_set_device_offload") Signed-off-by: Sabrina Dubroca Acked-by: Jakub Kicinski Tested-by: Ran Rozenstein Link: https://lore.kernel.org/r/979d2f89a6a994d5bb49cae49a80be54150d094d.1697653889.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls.h | 3 +-- net/tls/tls_device.c | 2 +- net/tls/tls_sw.c | 10 ++-------- 3 files changed, 4 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/tls/tls.h b/net/tls/tls.h index 478b2c0060aa..762f424ff2d5 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -144,8 +144,7 @@ void tls_err_abort(struct sock *sk, int err); int init_prot_info(struct tls_prot_info *prot, const struct tls_crypto_info *crypto_info, - const struct tls_cipher_desc *cipher_desc, - int mode); + const struct tls_cipher_desc *cipher_desc); int tls_set_sw_offload(struct sock *sk, int tx); void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index f01543557a60..bf8ed36b1ad6 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1099,7 +1099,7 @@ int tls_set_device_offload(struct sock *sk) goto release_netdev; } - rc = init_prot_info(prot, crypto_info, cipher_desc, TLS_HW); + rc = init_prot_info(prot, crypto_info, cipher_desc); if (rc) goto release_netdev; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index b5905e60d792..a78e8e722409 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2629,8 +2629,7 @@ static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) int init_prot_info(struct tls_prot_info *prot, const struct tls_crypto_info *crypto_info, - const struct tls_cipher_desc *cipher_desc, - int mode) + const struct tls_cipher_desc *cipher_desc) { u16 nonce_size = cipher_desc->nonce; @@ -2643,11 +2642,6 @@ int init_prot_info(struct tls_prot_info *prot, prot->tail_size = 0; } - if (mode == TLS_HW) { - prot->aad_size = 0; - prot->tail_size = 0; - } - /* Sanity-check the sizes for stack allocations. */ if (nonce_size > TLS_MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) return -EINVAL; @@ -2707,7 +2701,7 @@ int tls_set_sw_offload(struct sock *sk, int tx) goto free_priv; } - rc = init_prot_info(prot, crypto_info, cipher_desc, TLS_SW); + rc = init_prot_info(prot, crypto_info, cipher_desc); if (rc) goto free_priv; -- cgit v1.2.3 From a254b90c9aac3d3d938a07e019773e35a977451b Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Wed, 6 Sep 2023 16:59:54 +0300 Subject: Bluetooth: ISO: Fix BIS cleanup This fixes the master BIS cleanup procedure - as opposed to CIS cleanup, no HCI disconnect command should be issued. A master BIS should only be terminated by disabling periodic and extended advertising, and terminating the BIG. In case of a Broadcast Receiver, all BIS and PA connections can be cleaned up by calling hci_conn_failed, since it contains all function calls that are necessary for successful cleanup. Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 2 ++ net/bluetooth/hci_conn.c | 7 +++++++ net/bluetooth/hci_sync.c | 28 ++++++++++++---------------- 3 files changed, 21 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 57eeb07aeb25..6efbc2152146 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -80,6 +80,8 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, u8 *data, u32 flags, u16 min_interval, u16 max_interval, u16 sync_interval); +int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance); + int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, u8 instance, bool force); int hci_disable_advertising_sync(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 73470cc3518a..973c9de01a04 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -759,6 +759,7 @@ static int terminate_big_sync(struct hci_dev *hdev, void *data) bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", d->big, d->bis); + hci_disable_per_advertising_sync(hdev, d->bis); hci_remove_ext_adv_instance_sync(hdev, d->bis, NULL); /* Only terminate BIG if it has been created */ @@ -1247,6 +1248,12 @@ void hci_conn_failed(struct hci_conn *conn, u8 status) break; } + /* In case of BIG/PA sync failed, clear conn flags so that + * the conns will be correctly cleaned up by ISO layer + */ + test_and_clear_bit(HCI_CONN_BIG_SYNC_FAILED, &conn->flags); + test_and_clear_bit(HCI_CONN_PA_SYNC_FAILED, &conn->flags); + conn->state = BT_CLOSED; hci_connect_cfm(conn, status); hci_conn_del(conn); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index a15ab0b874a9..3485602518fe 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1312,7 +1312,7 @@ int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance) return hci_enable_ext_advertising_sync(hdev, instance); } -static int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance) +int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_per_adv_enable cp; struct adv_info *adv = NULL; @@ -5232,6 +5232,17 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, if (conn->type == AMP_LINK) return hci_disconnect_phy_link_sync(hdev, conn->handle, reason); + if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) { + /* This is a BIS connection, hci_conn_del will + * do the necessary cleanup. + */ + hci_dev_lock(hdev); + hci_conn_failed(conn, reason); + hci_dev_unlock(hdev); + + return 0; + } + memset(&cp, 0, sizeof(cp)); cp.handle = cpu_to_le16(conn->handle); cp.reason = reason; @@ -5384,21 +5395,6 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) err = hci_reject_conn_sync(hdev, conn, reason); break; case BT_OPEN: - hci_dev_lock(hdev); - - /* Cleanup bis or pa sync connections */ - if (test_and_clear_bit(HCI_CONN_BIG_SYNC_FAILED, &conn->flags) || - test_and_clear_bit(HCI_CONN_PA_SYNC_FAILED, &conn->flags)) { - hci_conn_failed(conn, reason); - } else if (test_bit(HCI_CONN_PA_SYNC, &conn->flags) || - test_bit(HCI_CONN_BIG_SYNC, &conn->flags)) { - conn->state = BT_CLOSED; - hci_disconn_cfm(conn, reason); - hci_conn_del(conn); - } - - hci_dev_unlock(hdev); - return 0; case BT_BOUND: break; default: -- cgit v1.2.3 From 1d11d70d1f6b23e7d3fc00396c17b90b876162a4 Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Wed, 6 Sep 2023 17:01:03 +0300 Subject: Bluetooth: ISO: Pass BIG encryption info through QoS This enables a broadcast sink to be informed if the PA it has synced with is associated with an encrypted BIG, by retrieving the socket QoS and checking the encryption field. After PA sync has been successfully established and the first BIGInfo advertising report is received, a new hcon is added and notified to the ISO layer. The ISO layer sets the encryption field of the socket and hcon QoS according to the encryption parameter of the BIGInfo advertising report event. After that, the userspace is woken up, and the QoS of the new PA sync socket can be read, to inspect the encryption field and follow up accordingly. Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 3 +++ include/net/bluetooth/hci_core.h | 25 ++++++++++++++++++- net/bluetooth/hci_conn.c | 1 + net/bluetooth/hci_event.c | 54 +++++++++++++++++++++++++++++----------- net/bluetooth/iso.c | 19 ++++++++++---- 5 files changed, 82 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 87d92accc26e..bdee5d649cc6 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1,6 +1,7 @@ /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright 2023 NXP Written 2000,2001 by Maxim Krasnyansky @@ -673,6 +674,8 @@ enum { #define HCI_TX_POWER_INVALID 127 #define HCI_RSSI_INVALID 127 +#define HCI_SYNC_HANDLE_INVALID 0xffff + #define HCI_ROLE_MASTER 0x00 #define HCI_ROLE_SLAVE 0x01 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c33348ba1657..f36c1fd5d64e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1314,7 +1314,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev * } static inline struct hci_conn * -hci_conn_hash_lookup_pa_sync(struct hci_dev *hdev, __u8 big) +hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; @@ -1336,6 +1336,29 @@ hci_conn_hash_lookup_pa_sync(struct hci_dev *hdev, __u8 big) return NULL; } +static inline struct hci_conn * +hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type != ISO_LINK || + !test_bit(HCI_CONN_PA_SYNC, &c->flags)) + continue; + + if (c->sync_handle == sync_handle) { + rcu_read_unlock(); + return c; + } + } + rcu_read_unlock(); + + return NULL; +} + static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, __u8 type, __u16 state) { diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 973c9de01a04..271da46e7428 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -974,6 +974,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, conn->rssi = HCI_RSSI_INVALID; conn->tx_power = HCI_TX_POWER_INVALID; conn->max_tx_power = HCI_TX_POWER_INVALID; + conn->sync_handle = HCI_SYNC_HANDLE_INVALID; set_bit(HCI_CONN_POWER_SAVE, &conn->flags); conn->disc_timeout = HCI_DISCONN_TIMEOUT; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1e1c9147356c..9b34c9f8ee02 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6603,7 +6603,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, struct hci_ev_le_pa_sync_established *ev = data; int mask = hdev->link_mode; __u8 flags = 0; - struct hci_conn *bis; + struct hci_conn *pa_sync; bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); @@ -6620,20 +6620,19 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, if (!(flags & HCI_PROTO_DEFER)) goto unlock; - /* Add connection to indicate the PA sync event */ - bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY, - HCI_ROLE_SLAVE); + if (ev->status) { + /* Add connection to indicate the failed PA sync event */ + pa_sync = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY, + HCI_ROLE_SLAVE); - if (!bis) - goto unlock; + if (!pa_sync) + goto unlock; - if (ev->status) - set_bit(HCI_CONN_PA_SYNC_FAILED, &bis->flags); - else - set_bit(HCI_CONN_PA_SYNC, &bis->flags); + set_bit(HCI_CONN_PA_SYNC_FAILED, &pa_sync->flags); - /* Notify connection to iso layer */ - hci_connect_cfm(bis, ev->status); + /* Notify iso layer */ + hci_connect_cfm(pa_sync, ev->status); + } unlock: hci_dev_unlock(hdev); @@ -7125,7 +7124,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); if (!ev->status) { - pa_sync = hci_conn_hash_lookup_pa_sync(hdev, ev->handle); + pa_sync = hci_conn_hash_lookup_pa_sync_big_handle(hdev, ev->handle); if (pa_sync) /* Also mark the BIG sync established event on the * associated PA sync hcon @@ -7186,15 +7185,42 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data, struct hci_evt_le_big_info_adv_report *ev = data; int mask = hdev->link_mode; __u8 flags = 0; + struct hci_conn *pa_sync; bt_dev_dbg(hdev, "sync_handle 0x%4.4x", le16_to_cpu(ev->sync_handle)); hci_dev_lock(hdev); mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags); - if (!(mask & HCI_LM_ACCEPT)) + if (!(mask & HCI_LM_ACCEPT)) { hci_le_pa_term_sync(hdev, ev->sync_handle); + goto unlock; + } + if (!(flags & HCI_PROTO_DEFER)) + goto unlock; + + pa_sync = hci_conn_hash_lookup_pa_sync_handle + (hdev, + le16_to_cpu(ev->sync_handle)); + + if (pa_sync) + goto unlock; + + /* Add connection to indicate the PA sync event */ + pa_sync = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY, + HCI_ROLE_SLAVE); + + if (!pa_sync) + goto unlock; + + pa_sync->sync_handle = le16_to_cpu(ev->sync_handle); + set_bit(HCI_CONN_PA_SYNC, &pa_sync->flags); + + /* Notify iso layer */ + hci_connect_cfm(pa_sync, 0x00); + +unlock: hci_dev_unlock(hdev); } diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 71248163ce9a..2132a16be93c 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -77,6 +77,7 @@ static struct bt_iso_qos default_qos; static bool check_ucast_qos(struct bt_iso_qos *qos); static bool check_bcast_qos(struct bt_iso_qos *qos); static bool iso_match_sid(struct sock *sk, void *data); +static bool iso_match_sync_handle(struct sock *sk, void *data); static void iso_sock_disconn(struct sock *sk); /* ---- ISO timers ---- */ @@ -1202,7 +1203,6 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg, test_bit(HCI_CONN_PA_SYNC, &pi->conn->hcon->flags)) { iso_conn_big_sync(sk); sk->sk_state = BT_LISTEN; - set_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags); } else { iso_conn_defer_accept(pi->conn->hcon); sk->sk_state = BT_CONFIG; @@ -1579,6 +1579,7 @@ static void iso_conn_ready(struct iso_conn *conn) struct sock *sk = conn->sk; struct hci_ev_le_big_sync_estabilished *ev = NULL; struct hci_ev_le_pa_sync_established *ev2 = NULL; + struct hci_evt_le_big_info_adv_report *ev3 = NULL; struct hci_conn *hcon; BT_DBG("conn %p", conn); @@ -1603,14 +1604,20 @@ static void iso_conn_ready(struct iso_conn *conn) parent = iso_get_sock_listen(&hcon->src, &hcon->dst, iso_match_big, ev); - } else if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags) || - test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) { + } else if (test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) { ev2 = hci_recv_event_data(hcon->hdev, HCI_EV_LE_PA_SYNC_ESTABLISHED); if (ev2) parent = iso_get_sock_listen(&hcon->src, &hcon->dst, iso_match_sid, ev2); + } else if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags)) { + ev3 = hci_recv_event_data(hcon->hdev, + HCI_EVT_LE_BIG_INFO_ADV_REPORT); + if (ev3) + parent = iso_get_sock_listen(&hcon->src, + &hcon->dst, + iso_match_sync_handle, ev3); } if (!parent) @@ -1650,11 +1657,13 @@ static void iso_conn_ready(struct iso_conn *conn) hcon->sync_handle = iso_pi(parent)->sync_handle; } - if (ev2 && !ev2->status) { - iso_pi(sk)->sync_handle = iso_pi(parent)->sync_handle; + if (ev3) { iso_pi(sk)->qos = iso_pi(parent)->qos; + iso_pi(sk)->qos.bcast.encryption = ev3->encryption; + hcon->iso_qos = iso_pi(sk)->qos; iso_pi(sk)->bc_num_bis = iso_pi(parent)->bc_num_bis; memcpy(iso_pi(sk)->bc_bis, iso_pi(parent)->bc_bis, ISO_MAX_NUM_BIS); + set_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags); } bacpy(&iso_pi(sk)->dst, &hcon->dst); -- cgit v1.2.3 From 5af69ab9bc623bd9293c5a931fb0827ed5201b5c Mon Sep 17 00:00:00 2001 From: Vlad Pruteanu Date: Tue, 12 Sep 2023 09:33:29 +0300 Subject: Bluetooth: ISO: Set CIS bit only for devices with CIS support Currently the CIS bit that can be set by the host is set for any device that has CIS or BIS support. In reality, devices that support BIS may not allow that bit to be set and so, the HCI bring up fails for them. This commit fixes this by only setting the bit for CIS capable devices. Signed-off-by: Vlad Pruteanu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 3485602518fe..c6f57af88bfd 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4264,12 +4264,12 @@ static int hci_le_set_host_feature_sync(struct hci_dev *hdev) { struct hci_cp_le_set_host_feature cp; - if (!iso_capable(hdev)) + if (!cis_capable(hdev)) return 0; memset(&cp, 0, sizeof(cp)); - /* Isochronous Channels (Host Support) */ + /* Connected Isochronous Channels (Host Support) */ cp.bit_number = 32; cp.bit_value = 1; -- cgit v1.2.3 From 31ca583b38e55007d49ecc81722d30b6395fec30 Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Tue, 3 Oct 2023 17:49:33 +0300 Subject: Bluetooth: ISO: Allow binding a bcast listener to 0 bises This makes it possible to bind a broadcast listener to a broadcaster address without asking for any BIS indexes to sync with. Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 2 +- net/bluetooth/iso.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 271da46e7428..e9c56fcd99c4 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2147,7 +2147,7 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, } pdu; int err; - if (num_bis > sizeof(pdu.bis)) + if (num_bis < 0x01 || num_bis > sizeof(pdu.bis)) return -EINVAL; err = qos_set_big(hdev, qos); diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 2132a16be93c..8ab7ea5ebedf 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -790,8 +790,7 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr, BT_DBG("sk %p bc_sid %u bc_num_bis %u", sk, sa->iso_bc->bc_sid, sa->iso_bc->bc_num_bis); - if (addr_len > sizeof(*sa) + sizeof(*sa->iso_bc) || - sa->iso_bc->bc_num_bis < 0x01 || sa->iso_bc->bc_num_bis > 0x1f) + if (addr_len > sizeof(*sa) + sizeof(*sa->iso_bc)) return -EINVAL; bacpy(&iso_pi(sk)->dst, &sa->iso_bc->bc_bdaddr); -- cgit v1.2.3 From 71b7bb48b9837ca97c75a521cc68398641dcb1d6 Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Tue, 3 Oct 2023 17:37:39 +0300 Subject: Bluetooth: ISO: Match QoS adv handle with BIG handle In case the user binds multiple sockets for the same BIG, the BIG handle should be matched with the associated adv handle, if it has already been allocated previously. Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index e9c56fcd99c4..974631e652c1 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1494,6 +1494,18 @@ static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos) /* Allocate BIS if not set */ if (qos->bcast.bis == BT_ISO_QOS_BIS_UNSET) { + if (qos->bcast.big != BT_ISO_QOS_BIG_UNSET) { + conn = hci_conn_hash_lookup_big(hdev, qos->bcast.big); + + if (conn) { + /* If the BIG handle is already matched to an advertising + * handle, do not allocate a new one. + */ + qos->bcast.bis = conn->iso_qos.bcast.bis; + return 0; + } + } + /* Find an unused adv set to advertise BIS, skip instance 0x00 * since it is reserved as general purpose set. */ -- cgit v1.2.3 From 41c56aa94c647a0f84c5c33fffb3f283e6f0e5bf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 Oct 2023 14:19:23 +0300 Subject: Bluetooth: msft: __hci_cmd_sync() doesn't return NULL The __hci_cmd_sync() function doesn't return NULL. Checking for NULL doesn't make the code safer, it just confuses people. When a function returns both error pointers and NULL then generally the NULL is a kind of success case. For example, maybe we look up an item then errors mean we ran out of memory but NULL means the item is not found. Or if we request a feature, then error pointers mean that there was an error but NULL means that the feature has been deliberately turned off. In this code it's different. The NULL is handled as if there is a bug in __hci_cmd_sync() where it accidentally returns NULL instead of a proper error code. This was done consistently until commit 9e14606d8f38 ("Bluetooth: msft: Extended monitor tracking by address filter") which deleted the work around for the potential future bug and treated NULL as success. Predicting potential future bugs is complicated, but we should just fix them instead of working around them. Instead of debating whether NULL is failure or success, let's just say it's currently impossible and delete the dead code. Signed-off-by: Dan Carpenter Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/msft.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index abbafa6194ca..630e3023273b 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -150,10 +150,7 @@ static bool read_supported_features(struct hci_dev *hdev, skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { - if (!skb) - skb = ERR_PTR(-EIO); - + if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to read MSFT supported features (%ld)", PTR_ERR(skb)); return false; @@ -353,7 +350,7 @@ static void msft_remove_addr_filters_sync(struct hci_dev *hdev, u8 handle) skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { + if (IS_ERR(skb)) { kfree(address_filter); continue; } @@ -442,11 +439,8 @@ static int msft_remove_monitor_sync(struct hci_dev *hdev, skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { - if (!skb) - return -EIO; + if (IS_ERR(skb)) return PTR_ERR(skb); - } return msft_le_cancel_monitor_advertisement_cb(hdev, hdev->msft_opcode, monitor, skb); @@ -559,7 +553,7 @@ static int msft_add_monitor_sync(struct hci_dev *hdev, skb = __hci_cmd_sync(hdev, hdev->msft_opcode, total_size, cp, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { + if (IS_ERR(skb)) { err = PTR_ERR(skb); goto out_free; } @@ -740,10 +734,10 @@ static int msft_cancel_address_filter_sync(struct hci_dev *hdev, void *data) skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { + if (IS_ERR(skb)) { bt_dev_err(hdev, "MSFT: Failed to cancel address (%pMR) filter", &address_filter->bdaddr); - err = -EIO; + err = PTR_ERR(skb); goto done; } kfree_skb(skb); @@ -893,7 +887,7 @@ static int msft_add_address_filter_sync(struct hci_dev *hdev, void *data) skb = __hci_cmd_sync(hdev, hdev->msft_opcode, size, cp, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { + if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to enable address %pMR filter", &address_filter->bdaddr); skb = NULL; -- cgit v1.2.3 From fcb89f120376c054005e6d7a82bb89f95b3b04ef Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Wed, 11 Oct 2023 17:24:07 +0300 Subject: Bluetooth: ISO: Fix bcast listener cleanup This fixes the cleanup callback for slave bis and pa sync hcons. Closing all bis hcons will trigger BIG Terminate Sync, while closing all bises and the pa sync hcon will also trigger PA Terminate Sync. Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 43 +++++++++++++++++++--------------------- net/bluetooth/hci_conn.c | 38 +++++++++++++++++++++++++++-------- net/bluetooth/hci_event.c | 10 ---------- 3 files changed, 50 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f36c1fd5d64e..99865c23e461 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1290,29 +1290,6 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev, return NULL; } -static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev *hdev, - __u8 handle) -{ - struct hci_conn_hash *h = &hdev->conn_hash; - struct hci_conn *c; - - rcu_read_lock(); - - list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) - continue; - - if (handle != BT_ISO_QOS_BIG_UNSET && handle == c->iso_qos.bcast.big) { - rcu_read_unlock(); - return c; - } - } - - rcu_read_unlock(); - - return NULL; -} - static inline struct hci_conn * hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big) { @@ -1400,6 +1377,26 @@ static inline void hci_conn_hash_list_state(struct hci_dev *hdev, rcu_read_unlock(); } +static inline void hci_conn_hash_list_flag(struct hci_dev *hdev, + hci_conn_func_t func, __u8 type, + __u8 flag, void *data) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + if (!func) + return; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type == type && test_bit(flag, &c->flags)) + func(c, data); + } + + rcu_read_unlock(); +} + static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 974631e652c1..7727fe30e5c3 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -815,6 +815,17 @@ static int big_terminate_sync(struct hci_dev *hdev, void *data) return 0; } +static void find_bis(struct hci_conn *conn, void *data) +{ + struct iso_list_data *d = data; + + /* Ignore if BIG doesn't match */ + if (d->big != conn->iso_qos.bcast.big) + return; + + d->count++; +} + static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *conn) { struct iso_list_data *d; @@ -826,10 +837,27 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c if (!d) return -ENOMEM; + memset(d, 0, sizeof(*d)); d->big = big; d->sync_handle = conn->sync_handle; - d->pa_sync_term = test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags); - d->big_sync_term = test_and_clear_bit(HCI_CONN_BIG_SYNC, &conn->flags); + + if (test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { + hci_conn_hash_list_flag(hdev, find_bis, ISO_LINK, + HCI_CONN_PA_SYNC, d); + + if (!d->count) + d->pa_sync_term = true; + + d->count = 0; + } + + if (test_and_clear_bit(HCI_CONN_BIG_SYNC, &conn->flags)) { + hci_conn_hash_list_flag(hdev, find_bis, ISO_LINK, + HCI_CONN_BIG_SYNC, d); + + if (!d->count) + d->big_sync_term = true; + } ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d, terminate_big_destroy); @@ -865,12 +893,6 @@ static void bis_cleanup(struct hci_conn *conn) hci_le_terminate_big(hdev, conn); } else { - bis = hci_conn_hash_lookup_big_any_dst(hdev, - conn->iso_qos.bcast.big); - - if (bis) - return; - hci_le_big_terminate(hdev, conn->iso_qos.bcast.big, conn); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 9b34c9f8ee02..32fb2f102a12 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7112,7 +7112,6 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, { struct hci_evt_le_big_sync_estabilished *ev = data; struct hci_conn *bis; - struct hci_conn *pa_sync; int i; bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); @@ -7123,15 +7122,6 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); - if (!ev->status) { - pa_sync = hci_conn_hash_lookup_pa_sync_big_handle(hdev, ev->handle); - if (pa_sync) - /* Also mark the BIG sync established event on the - * associated PA sync hcon - */ - set_bit(HCI_CONN_BIG_SYNC, &pa_sync->flags); - } - for (i = 0; i < ev->num_bis; i++) { u16 handle = le16_to_cpu(ev->bis[i]); __le32 interval; -- cgit v1.2.3 From 181a42edddf51d5d9697ecdf365d72ebeab5afb0 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Wed, 11 Oct 2023 17:57:31 +0800 Subject: Bluetooth: Make handle of hci_conn be unique The handle of new hci_conn is always HCI_CONN_HANDLE_MAX + 1 if the handle of the first hci_conn entry in hci_dev->conn_hash->list is not HCI_CONN_HANDLE_MAX + 1. Use ida to manage the allocation of hci_conn->handle to make it be unique. Fixes: 9f78191cc9f1 ("Bluetooth: hci_conn: Always allocate unique handles") Signed-off-by: Ziyang Xuan Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 6 ++++- net/bluetooth/amp.c | 3 +-- net/bluetooth/hci_conn.c | 57 ++++++++++++++++++++++------------------ net/bluetooth/hci_core.c | 3 +++ net/bluetooth/hci_event.c | 38 +++++++++++---------------- 5 files changed, 56 insertions(+), 51 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 99865c23e461..20988623c5cc 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -350,6 +350,8 @@ struct hci_dev { struct list_head list; struct mutex lock; + struct ida unset_handle_ida; + const char *name; unsigned long flags; __u16 id; @@ -1446,7 +1448,9 @@ int hci_le_create_cis_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, - u8 role); + u8 role, u16 handle); +struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type, + bdaddr_t *dst, u8 role); void hci_conn_del(struct hci_conn *conn); void hci_conn_hash_flush(struct hci_dev *hdev); void hci_conn_check_pending(struct hci_dev *hdev); diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index 2134f92bd7ac..5d698f19868c 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -109,7 +109,7 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr, struct hci_conn *hcon; u8 role = out ? HCI_ROLE_MASTER : HCI_ROLE_SLAVE; - hcon = hci_conn_add(hdev, AMP_LINK, dst, role); + hcon = hci_conn_add(hdev, AMP_LINK, dst, role, __next_handle(mgr)); if (!hcon) return NULL; @@ -117,7 +117,6 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr, hcon->state = BT_CONNECT; hcon->attempt++; - hcon->handle = __next_handle(mgr); hcon->remote_id = remote_id; hcon->amp_mgr = amp_mgr_get(mgr); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7727fe30e5c3..3735764b3169 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -153,6 +153,9 @@ static void hci_conn_cleanup(struct hci_conn *conn) hci_conn_hash_del(hdev, conn); + if (HCI_CONN_HANDLE_UNSET(conn->handle)) + ida_free(&hdev->unset_handle_ida, conn->handle); + if (conn->cleanup) conn->cleanup(conn); @@ -951,31 +954,18 @@ static void cis_cleanup(struct hci_conn *conn) hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig); } -static u16 hci_conn_hash_alloc_unset(struct hci_dev *hdev) +static int hci_conn_hash_alloc_unset(struct hci_dev *hdev) { - struct hci_conn_hash *h = &hdev->conn_hash; - struct hci_conn *c; - u16 handle = HCI_CONN_HANDLE_MAX + 1; - - rcu_read_lock(); - - list_for_each_entry_rcu(c, &h->list, list) { - /* Find the first unused handle */ - if (handle == 0xffff || c->handle != handle) - break; - handle++; - } - rcu_read_unlock(); - - return handle; + return ida_alloc_range(&hdev->unset_handle_ida, HCI_CONN_HANDLE_MAX + 1, + U16_MAX, GFP_ATOMIC); } struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, - u8 role) + u8 role, u16 handle) { struct hci_conn *conn; - BT_DBG("%s dst %pMR", hdev->name, dst); + bt_dev_dbg(hdev, "dst %pMR handle 0x%4.4x", dst, handle); conn = kzalloc(sizeof(*conn), GFP_KERNEL); if (!conn) @@ -983,7 +973,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, bacpy(&conn->dst, dst); bacpy(&conn->src, &hdev->bdaddr); - conn->handle = hci_conn_hash_alloc_unset(hdev); + conn->handle = handle; conn->hdev = hdev; conn->type = type; conn->role = role; @@ -1068,6 +1058,20 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, return conn; } +struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type, + bdaddr_t *dst, u8 role) +{ + int handle; + + bt_dev_dbg(hdev, "dst %pMR", dst); + + handle = hci_conn_hash_alloc_unset(hdev); + if (unlikely(handle < 0)) + return NULL; + + return hci_conn_add(hdev, type, dst, role, handle); +} + static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason) { if (!reason) @@ -1304,6 +1308,9 @@ u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle) if (conn->abort_reason) return conn->abort_reason; + if (HCI_CONN_HANDLE_UNSET(conn->handle)) + ida_free(&hdev->unset_handle_ida, conn->handle); + conn->handle = handle; return 0; @@ -1411,7 +1418,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, if (conn) { bacpy(&conn->dst, dst); } else { - conn = hci_conn_add(hdev, LE_LINK, dst, role); + conn = hci_conn_add_unset(hdev, LE_LINK, dst, role); if (!conn) return ERR_PTR(-ENOMEM); hci_conn_hold(conn); @@ -1588,7 +1595,7 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst, memcmp(conn->le_per_adv_data, base, base_len))) return ERR_PTR(-EADDRINUSE); - conn = hci_conn_add(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); + conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); if (!conn) return ERR_PTR(-ENOMEM); @@ -1632,7 +1639,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, BT_DBG("requesting refresh of dst_addr"); - conn = hci_conn_add(hdev, LE_LINK, dst, HCI_ROLE_MASTER); + conn = hci_conn_add_unset(hdev, LE_LINK, dst, HCI_ROLE_MASTER); if (!conn) return ERR_PTR(-ENOMEM); @@ -1680,7 +1687,7 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { - acl = hci_conn_add(hdev, ACL_LINK, dst, HCI_ROLE_MASTER); + acl = hci_conn_add_unset(hdev, ACL_LINK, dst, HCI_ROLE_MASTER); if (!acl) return ERR_PTR(-ENOMEM); } @@ -1740,7 +1747,7 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, sco = hci_conn_hash_lookup_ba(hdev, type, dst); if (!sco) { - sco = hci_conn_add(hdev, type, dst, HCI_ROLE_MASTER); + sco = hci_conn_add_unset(hdev, type, dst, HCI_ROLE_MASTER); if (!sco) { hci_conn_drop(acl); return ERR_PTR(-ENOMEM); @@ -1932,7 +1939,7 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, cis = hci_conn_hash_lookup_cis(hdev, dst, dst_type, qos->ucast.cig, qos->ucast.cis); if (!cis) { - cis = hci_conn_add(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); + cis = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); if (!cis) return ERR_PTR(-ENOMEM); cis->cleanup = cis_cleanup; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 195aea2198a9..65601aa52e0d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2535,6 +2535,8 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); + ida_init(&hdev->unset_handle_ida); + INIT_LIST_HEAD(&hdev->mesh_pending); INIT_LIST_HEAD(&hdev->mgmt_pending); INIT_LIST_HEAD(&hdev->reject_list); @@ -2789,6 +2791,7 @@ void hci_release_dev(struct hci_dev *hdev) hci_codec_list_clear(&hdev->local_codecs); hci_dev_unlock(hdev); + ida_destroy(&hdev->unset_handle_ida); ida_simple_remove(&hci_index_ida, hdev->id); kfree_skb(hdev->sent_cmd); kfree_skb(hdev->recv_event); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 32fb2f102a12..0849e0dafa95 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2335,8 +2335,8 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) } } else { if (!conn) { - conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr, - HCI_ROLE_MASTER); + conn = hci_conn_add_unset(hdev, ACL_LINK, &cp->bdaddr, + HCI_ROLE_MASTER); if (!conn) bt_dev_err(hdev, "no memory for new connection"); } @@ -3151,8 +3151,8 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, hci_bdaddr_list_lookup_with_flags(&hdev->accept_list, &ev->bdaddr, BDADDR_BREDR)) { - conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr, - HCI_ROLE_SLAVE); + conn = hci_conn_add_unset(hdev, ev->link_type, + &ev->bdaddr, HCI_ROLE_SLAVE); if (!conn) { bt_dev_err(hdev, "no memory for new conn"); goto unlock; @@ -3317,8 +3317,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { - conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr, - HCI_ROLE_SLAVE); + conn = hci_conn_add_unset(hdev, ev->link_type, &ev->bdaddr, + HCI_ROLE_SLAVE); if (!conn) { bt_dev_err(hdev, "no memory for new connection"); goto unlock; @@ -5890,7 +5890,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, if (status) goto unlock; - conn = hci_conn_add(hdev, LE_LINK, bdaddr, role); + conn = hci_conn_add_unset(hdev, LE_LINK, bdaddr, role); if (!conn) { bt_dev_err(hdev, "no memory for new connection"); goto unlock; @@ -5952,17 +5952,11 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL); - if (handle > HCI_CONN_HANDLE_MAX) { - bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", handle, - HCI_CONN_HANDLE_MAX); - status = HCI_ERROR_INVALID_PARAMETERS; - } - /* All connection failure handling is taken care of by the * hci_conn_failed function which is triggered by the HCI * request completion callbacks used for connecting. */ - if (status) + if (status || hci_conn_set_handle(conn, handle)) goto unlock; /* Drop the connection if it has been aborted */ @@ -5986,7 +5980,6 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, mgmt_device_connected(hdev, conn, NULL, 0); conn->sec_level = BT_SECURITY_LOW; - conn->handle = handle; conn->state = BT_CONFIG; /* Store current advertising instance as connection advertising instance @@ -6622,8 +6615,8 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, if (ev->status) { /* Add connection to indicate the failed PA sync event */ - pa_sync = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY, - HCI_ROLE_SLAVE); + pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY, + HCI_ROLE_SLAVE); if (!pa_sync) goto unlock; @@ -7019,12 +7012,12 @@ static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data, cis = hci_conn_hash_lookup_handle(hdev, cis_handle); if (!cis) { - cis = hci_conn_add(hdev, ISO_LINK, &acl->dst, HCI_ROLE_SLAVE); + cis = hci_conn_add(hdev, ISO_LINK, &acl->dst, HCI_ROLE_SLAVE, + cis_handle); if (!cis) { hci_le_reject_cis(hdev, ev->cis_handle); goto unlock; } - cis->handle = cis_handle; } cis->iso_qos.ucast.cig = ev->cig_id; @@ -7129,10 +7122,9 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bis = hci_conn_hash_lookup_handle(hdev, handle); if (!bis) { bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY, - HCI_ROLE_SLAVE); + HCI_ROLE_SLAVE, handle); if (!bis) continue; - bis->handle = handle; } if (ev->status != 0x42) @@ -7198,8 +7190,8 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data, goto unlock; /* Add connection to indicate the PA sync event */ - pa_sync = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY, - HCI_ROLE_SLAVE); + pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY, + HCI_ROLE_SLAVE); if (!pa_sync) goto unlock; -- cgit v1.2.3 From f4da3ee15de9944482382181329bb6d7335ca003 Mon Sep 17 00:00:00 2001 From: Claudia Draghicescu Date: Thu, 28 Sep 2023 11:02:08 +0300 Subject: Bluetooth: ISO: Copy BASE if service data matches EIR_BAA_SERVICE_UUID Copy the content of a Periodic Advertisement Report to BASE only if the service UUID is Basic Audio Announcement Service UUID. Signed-off-by: Claudia Draghicescu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 8ab7ea5ebedf..07b80e97aead 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -14,6 +14,7 @@ #include #include #include +#include "eir.h" static const struct proto_ops iso_sock_ops; @@ -47,6 +48,7 @@ static void iso_sock_kill(struct sock *sk); #define EIR_SERVICE_DATA_LENGTH 4 #define BASE_MAX_LENGTH (HCI_MAX_PER_AD_LENGTH - EIR_SERVICE_DATA_LENGTH) +#define EIR_BAA_SERVICE_UUID 0x1851 /* iso_pinfo flags values */ enum { @@ -1460,6 +1462,8 @@ static int iso_sock_getsockopt(struct socket *sock, int level, int optname, len = min_t(unsigned int, len, base_len); if (copy_to_user(optval, base, len)) err = -EFAULT; + if (put_user(len, optlen)) + err = -EFAULT; break; @@ -1782,12 +1786,16 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) ev3 = hci_recv_event_data(hdev, HCI_EV_LE_PER_ADV_REPORT); if (ev3) { + size_t base_len = ev3->length; + u8 *base; + sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sync_handle_pa_report, ev3); - - if (sk) { - memcpy(iso_pi(sk)->base, ev3->data, ev3->length); - iso_pi(sk)->base_len = ev3->length; + base = eir_get_service_data(ev3->data, ev3->length, + EIR_BAA_SERVICE_UUID, &base_len); + if (base && sk && base_len <= sizeof(iso_pi(sk)->base)) { + memcpy(iso_pi(sk)->base, base, base_len); + iso_pi(sk)->base_len = base_len; } } else { sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL); -- cgit v1.2.3 From a85fb91e3d728bdfc80833167e8162cce8bc7004 Mon Sep 17 00:00:00 2001 From: ZhengHan Wang Date: Wed, 18 Oct 2023 12:30:55 +0200 Subject: Bluetooth: Fix double free in hci_conn_cleanup syzbot reports a slab use-after-free in hci_conn_hash_flush [1]. After releasing an object using hci_conn_del_sysfs in the hci_conn_cleanup function, releasing the same object again using the hci_dev_put and hci_conn_put functions causes a double free. Here's a simplified flow: hci_conn_del_sysfs: hci_dev_put put_device kobject_put kref_put kobject_release kobject_cleanup kfree_const kfree(name) hci_dev_put: ... kfree(name) hci_conn_put: put_device ... kfree(name) This patch drop the hci_dev_put and hci_conn_put function call in hci_conn_cleanup function, because the object is freed in hci_conn_del_sysfs function. This patch also fixes the refcounting in hci_conn_add_sysfs() and hci_conn_del_sysfs() to take into account device_add() failures. This fixes CVE-2023-28464. Link: https://syzkaller.appspot.com/bug?id=1bb51491ca5df96a5f724899d1dbb87afda61419 [1] Signed-off-by: ZhengHan Wang Co-developed-by: Luiz Augusto von Dentz Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 6 ++---- net/bluetooth/hci_sysfs.c | 23 ++++++++++++----------- 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3735764b3169..2cee330188ce 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -172,13 +172,11 @@ static void hci_conn_cleanup(struct hci_conn *conn) hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); } - hci_conn_del_sysfs(conn); - debugfs_remove_recursive(conn->debugfs); - hci_dev_put(hdev); + hci_conn_del_sysfs(conn); - hci_conn_put(conn); + hci_dev_put(hdev); } static void hci_acl_create_connection(struct hci_conn *conn) diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 15b33579007c..367e32fe30eb 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -35,7 +35,7 @@ void hci_conn_init_sysfs(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - BT_DBG("conn %p", conn); + bt_dev_dbg(hdev, "conn %p", conn); conn->dev.type = &bt_link; conn->dev.class = &bt_class; @@ -48,27 +48,30 @@ void hci_conn_add_sysfs(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - BT_DBG("conn %p", conn); + bt_dev_dbg(hdev, "conn %p", conn); if (device_is_registered(&conn->dev)) return; dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); - if (device_add(&conn->dev) < 0) { + if (device_add(&conn->dev) < 0) bt_dev_err(hdev, "failed to register connection device"); - return; - } - - hci_dev_hold(hdev); } void hci_conn_del_sysfs(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - if (!device_is_registered(&conn->dev)) + bt_dev_dbg(hdev, "conn %p", conn); + + if (!device_is_registered(&conn->dev)) { + /* If device_add() has *not* succeeded, use *only* put_device() + * to drop the reference count. + */ + put_device(&conn->dev); return; + } while (1) { struct device *dev; @@ -80,9 +83,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn) put_device(dev); } - device_del(&conn->dev); - - hci_dev_put(hdev); + device_unregister(&conn->dev); } static void bt_host_release(struct device *dev) -- cgit v1.2.3 From 530886897c789cf77c9a0d4a7cc5549f0768b5f8 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Wed, 18 Oct 2023 16:47:35 +0200 Subject: Bluetooth: hci_sync: Fix Opcode prints in bt_dev_dbg/err Printed Opcodes may be missing leading zeros: Bluetooth: hci0: Opcode 0x c03 failed: -110 Fix this by always printing leading zeros: Bluetooth: hci0: Opcode 0x0c03 failed: -110 Fixes: d0b137062b2d ("Bluetooth: hci_sync: Rework init stages") Fixes: 6a98e3836fa2 ("Bluetooth: Add helper for serialized HCI command execution") Signed-off-by: Marcel Ziswiler Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index c6f57af88bfd..d85a7091a116 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -152,7 +152,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, struct sk_buff *skb; int err = 0; - bt_dev_dbg(hdev, "Opcode 0x%4x", opcode); + bt_dev_dbg(hdev, "Opcode 0x%4.4x", opcode); hci_req_init(&req, hdev); @@ -248,7 +248,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, skb = __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout, sk); if (IS_ERR(skb)) { if (!event) - bt_dev_err(hdev, "Opcode 0x%4x failed: %ld", opcode, + bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld", opcode, PTR_ERR(skb)); return PTR_ERR(skb); } -- cgit v1.2.3 From 06e4dd18f86876bc29786d66165f781cd0265b7c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2023 20:00:53 +0000 Subject: net_sched: sch_fq: fix off-by-one error in fq_dequeue() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A last minute change went wrong. We need to look for a packet in all 3 bands, not only two. Fixes: 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202310201422.a22b0999-oliver.sang@intel.com Signed-off-by: Eric Dumazet Cc: Soheil Hassas Yeganeh Cc: Dave Taht Cc: Toke Høiland-Jørgensen Tested-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231020200053.675951-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_fq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 8eacdb54e72f..f6fd0de293e5 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -651,7 +651,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) begin: head = fq_pband_head_select(pband); if (!head) { - while (++retry < FQ_BANDS) { + while (++retry <= FQ_BANDS) { if (++q->band_nr == FQ_BANDS) q->band_nr = 0; pband = &q->band_flows[q->band_nr]; -- cgit v1.2.3 From 81a4169856987b65918a93e9b9f53a28496575cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2023 20:12:54 +0000 Subject: net_sched: sch_fq: fastpath needs to take care of sk->sk_pacing_status If packets of a TCP flows take the fast path, we need to make sure sk->sk_pacing_status is set to SK_PACING_FQ otherwise TCP might fallback to internal pacing, which is not optimal. Fixes: 076433bd78d7 ("net_sched: sch_fq: add fast path for mostly idle qdisc") Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231020201254.732527-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_fq.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index f6fd0de293e5..bf9d00518a60 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -383,6 +383,10 @@ static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb, if (fq_fastpath_check(sch, skb, now)) { q->internal.stat_fastpath_packets++; + if (skb->sk == sk && q->rate_enable && + READ_ONCE(sk->sk_pacing_status) != SK_PACING_FQ) + smp_store_release(&sk->sk_pacing_status, + SK_PACING_FQ); return &q->internal; } -- cgit v1.2.3 From f862ed2d0bf0cf51c28c1a69e3c2a1558d5a2978 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 21 Oct 2023 13:27:02 +0200 Subject: genetlink: don't merge dumpit split op for different cmds into single iter Currently, split ops of doit and dumpit are merged into a single iter item when they are subsequent. However, there is no guarantee that the dumpit op is for the same cmd as doit op. Fix this by checking if cmd is the same for both. This problem does not occur in existing families. Signed-off-by: Jiri Pirko Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20231021112711.660606-2-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- net/netlink/genetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 8315d31b53db..92ef5ed2e7b0 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -225,7 +225,8 @@ static void genl_op_from_split(struct genl_op_iter *iter) } if (i + cnt < family->n_split_ops && - family->split_ops[i + cnt].flags & GENL_CMD_CAP_DUMP) { + family->split_ops[i + cnt].flags & GENL_CMD_CAP_DUMP && + (!cnt || family->split_ops[i + cnt].cmd == iter->doit.cmd)) { iter->dumpit = family->split_ops[i + cnt]; genl_op_fill_in_reject_policy_split(family, &iter->dumpit); cnt++; -- cgit v1.2.3 From 53590934ba9549c55c57a32e2a6980139af00345 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 21 Oct 2023 13:27:08 +0200 Subject: devlink: rename netlink callback to be aligned with the generated ones All remaining doit and dumpit netlink callback functions are going to be used by generated split ops. They expect certain name format. Rename the callback to be aligned with generated names. Signed-off-by: Jiri Pirko Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20231021112711.660606-8-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- net/devlink/dev.c | 10 ++-- net/devlink/devl_internal.h | 108 ++++++++++++++++++++++---------------------- net/devlink/dpipe.c | 14 +++--- net/devlink/health.c | 24 +++++----- net/devlink/linecard.c | 3 +- net/devlink/netlink.c | 82 ++++++++++++++++----------------- net/devlink/param.c | 14 +++--- net/devlink/port.c | 11 ++--- net/devlink/rate.c | 6 +-- net/devlink/region.c | 8 ++-- net/devlink/resource.c | 4 +- net/devlink/sb.c | 17 ++++--- net/devlink/trap.c | 9 ++-- 13 files changed, 152 insertions(+), 158 deletions(-) (limited to 'net') diff --git a/net/devlink/dev.c b/net/devlink/dev.c index dc8039ca2b38..4fc7adb32663 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -492,7 +492,7 @@ free_msg: return -EMSGSIZE; } -int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; enum devlink_reload_action action; @@ -658,7 +658,7 @@ nla_put_failure: return err; } -int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; @@ -679,7 +679,7 @@ int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info) return genlmsg_reply(msg, info); } -int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; @@ -1108,7 +1108,7 @@ static int devlink_flash_component_get(struct devlink *devlink, return 0; } -int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_flash_update_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *nla_overwrite_mask, *nla_file_name; struct devlink_flash_update_params params = {}; @@ -1351,7 +1351,7 @@ static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG }, }; -int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1]; struct devlink *devlink = info->user_ptr[0]; diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 741d1bf1bec8..daf4c696a618 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -229,65 +229,63 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, unsigned int devlink_linecard_index(struct devlink_linecard *linecard); /* Devlink nl cmds */ -int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, +int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_flash_update_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_split_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_unsplit_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_new_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_del_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_sb_port_pool_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, +int devlink_nl_sb_tc_pool_bind_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb, +int devlink_nl_sb_occ_snapshot_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_sb_occ_max_clear_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb); -int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, - struct netlink_callback *cb); -int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, +int devlink_nl_dpipe_table_get_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_dpipe_entries_get_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_dpipe_headers_get_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_dpipe_table_counters_set_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_resource_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_param_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_param_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb); +int devlink_nl_port_param_get_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_port_param_set_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_region_new_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_region_del_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_region_read_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int devlink_nl_health_reporter_set_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_health_reporter_recover_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, - struct netlink_callback *cb); -int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb, +int devlink_nl_health_reporter_diagnose_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb, +int devlink_nl_health_reporter_dump_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int devlink_nl_health_reporter_dump_clear_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_health_reporter_test_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb, +int devlink_nl_trap_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_trap_group_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_trap_policer_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_rate_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_linecard_set_doit(struct sk_buff *skb, struct genl_info *info); diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c index 431227c412e5..a72a9292efc5 100644 --- a/net/devlink/dpipe.c +++ b/net/devlink/dpipe.c @@ -289,7 +289,7 @@ err_table_put: return err; } -int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_dpipe_table_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const char *table_name = NULL; @@ -562,8 +562,8 @@ send_done: return genlmsg_reply(dump_ctx.skb, info); } -int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_dpipe_entries_get_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_dpipe_table *table; @@ -712,8 +712,8 @@ err_table_put: return err; } -int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_dpipe_headers_get_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; @@ -746,8 +746,8 @@ static int devlink_dpipe_table_counters_set(struct devlink *devlink, return 0; } -int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_dpipe_table_counters_set_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const char *table_name; diff --git a/net/devlink/health.c b/net/devlink/health.c index 89405e59f45c..695df61f8ac2 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -452,8 +452,8 @@ int devlink_nl_health_reporter_get_dumpit(struct sk_buff *skb, devlink_nl_health_reporter_get_dump_one); } -int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_health_reporter_set_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; @@ -655,8 +655,8 @@ devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, } EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update); -int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_health_reporter_recover_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; @@ -1108,8 +1108,8 @@ nla_put_failure: return err; } -int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_health_reporter_diagnose_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; @@ -1163,8 +1163,8 @@ devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb) return reporter; } -int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) +int devlink_nl_health_reporter_dump_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_health_reporter *reporter; @@ -1202,8 +1202,8 @@ unlock: return err; } -int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_health_reporter_dump_clear_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; @@ -1219,8 +1219,8 @@ int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, return 0; } -int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_health_reporter_test_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c index 9ff1813f88c5..2f1c317b64cd 100644 --- a/net/devlink/linecard.c +++ b/net/devlink/linecard.c @@ -369,8 +369,7 @@ out: return err; } -int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_linecard_set_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index 809bfc3ba8c4..ca63e59a5e92 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -291,200 +291,200 @@ static const struct genl_small_ops devlink_nl_small_ops[40] = { { .cmd = DEVLINK_CMD_PORT_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_port_set_doit, + .doit = devlink_nl_port_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_RATE_SET, - .doit = devlink_nl_cmd_rate_set_doit, + .doit = devlink_nl_rate_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_RATE_NEW, - .doit = devlink_nl_cmd_rate_new_doit, + .doit = devlink_nl_rate_new_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_RATE_DEL, - .doit = devlink_nl_cmd_rate_del_doit, + .doit = devlink_nl_rate_del_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_PORT_SPLIT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_port_split_doit, + .doit = devlink_nl_port_split_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_PORT_UNSPLIT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_port_unsplit_doit, + .doit = devlink_nl_port_unsplit_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_PORT_NEW, - .doit = devlink_nl_cmd_port_new_doit, + .doit = devlink_nl_port_new_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_PORT_DEL, - .doit = devlink_nl_cmd_port_del_doit, + .doit = devlink_nl_port_del_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_LINECARD_SET, - .doit = devlink_nl_cmd_linecard_set_doit, + .doit = devlink_nl_linecard_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_SB_POOL_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_sb_pool_set_doit, + .doit = devlink_nl_sb_pool_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_SB_PORT_POOL_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_sb_port_pool_set_doit, + .doit = devlink_nl_sb_port_pool_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit, + .doit = devlink_nl_sb_tc_pool_bind_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_sb_occ_snapshot_doit, + .doit = devlink_nl_sb_occ_snapshot_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_sb_occ_max_clear_doit, + .doit = devlink_nl_sb_occ_max_clear_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_ESWITCH_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_eswitch_get_doit, + .doit = devlink_nl_eswitch_get_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_ESWITCH_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_eswitch_set_doit, + .doit = devlink_nl_eswitch_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_dpipe_table_get, + .doit = devlink_nl_dpipe_table_get_doit, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_dpipe_entries_get, + .doit = devlink_nl_dpipe_entries_get_doit, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_dpipe_headers_get, + .doit = devlink_nl_dpipe_headers_get_doit, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_dpipe_table_counters_set, + .doit = devlink_nl_dpipe_table_counters_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_RESOURCE_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_resource_set, + .doit = devlink_nl_resource_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_RESOURCE_DUMP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_resource_dump, + .doit = devlink_nl_resource_dump_doit, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_RELOAD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_reload, + .doit = devlink_nl_reload_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_PARAM_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_param_set_doit, + .doit = devlink_nl_param_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_PORT_PARAM_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_port_param_get_doit, - .dumpit = devlink_nl_cmd_port_param_get_dumpit, + .doit = devlink_nl_port_param_get_doit, + .dumpit = devlink_nl_port_param_get_dumpit, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_PORT_PARAM_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_port_param_set_doit, + .doit = devlink_nl_port_param_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_REGION_NEW, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_region_new, + .doit = devlink_nl_region_new_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_REGION_DEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_region_del, + .doit = devlink_nl_region_del_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_REGION_READ, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP_STRICT, - .dumpit = devlink_nl_cmd_region_read_dumpit, + .dumpit = devlink_nl_region_read_dumpit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_health_reporter_set_doit, + .doit = devlink_nl_health_reporter_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_health_reporter_recover_doit, + .doit = devlink_nl_health_reporter_recover_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_health_reporter_diagnose_doit, + .doit = devlink_nl_health_reporter_diagnose_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, @@ -492,47 +492,47 @@ static const struct genl_small_ops devlink_nl_small_ops[40] = { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP_STRICT, - .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit, + .dumpit = devlink_nl_health_reporter_dump_get_dumpit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, + .doit = devlink_nl_health_reporter_dump_clear_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_health_reporter_test_doit, + .doit = devlink_nl_health_reporter_test_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, }, { .cmd = DEVLINK_CMD_FLASH_UPDATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_flash_update, + .doit = devlink_nl_flash_update_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_TRAP_SET, - .doit = devlink_nl_cmd_trap_set_doit, + .doit = devlink_nl_trap_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_TRAP_GROUP_SET, - .doit = devlink_nl_cmd_trap_group_set_doit, + .doit = devlink_nl_trap_group_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_TRAP_POLICER_SET, - .doit = devlink_nl_cmd_trap_policer_set_doit, + .doit = devlink_nl_trap_policer_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = DEVLINK_CMD_SELFTESTS_RUN, - .doit = devlink_nl_cmd_selftests_run, + .doit = devlink_nl_selftests_run_doit, .flags = GENL_ADMIN_PERM, }, /* -- No new ops here! Use split ops going forward! -- */ diff --git a/net/devlink/param.c b/net/devlink/param.c index 31275f9d4cb7..d74df09311a9 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -581,7 +581,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, return 0; } -int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_param_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; @@ -589,22 +589,22 @@ int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info) info, DEVLINK_CMD_PARAM_NEW); } -int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +int devlink_nl_port_param_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) { NL_SET_ERR_MSG(cb->extack, "Port params are not supported"); return msg->len; } -int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_port_param_get_doit(struct sk_buff *skb, + struct genl_info *info) { NL_SET_ERR_MSG(info->extack, "Port params are not supported"); return -EINVAL; } -int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_port_param_set_doit(struct sk_buff *skb, + struct genl_info *info) { NL_SET_ERR_MSG(info->extack, "Port params are not supported"); return -EINVAL; diff --git a/net/devlink/port.c b/net/devlink/port.c index 4e9003242448..7634f187fa50 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -772,7 +772,7 @@ static int devlink_port_function_set(struct devlink_port *port, return err; } -int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_port_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; int err; @@ -798,7 +798,7 @@ int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) return 0; } -int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_port_split_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; @@ -829,8 +829,7 @@ int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info) info->extack); } -int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_port_unsplit_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; @@ -840,7 +839,7 @@ int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack); } -int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_port_new_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink_port_new_attrs new_attrs = {}; @@ -904,7 +903,7 @@ err_out_port_del: return err; } -int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_port_del_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct netlink_ext_ack *extack = info->extack; diff --git a/net/devlink/rate.c b/net/devlink/rate.c index dff1593b8406..94b289b93ff2 100644 --- a/net/devlink/rate.c +++ b/net/devlink/rate.c @@ -458,7 +458,7 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, return true; } -int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_rate_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_rate *devlink_rate; @@ -480,7 +480,7 @@ int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info) return err; } -int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_rate *rate_node; @@ -536,7 +536,7 @@ err_strdup: return err; } -int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_rate *rate_node; diff --git a/net/devlink/region.c b/net/devlink/region.c index d197cdb662db..0aab7b82d678 100644 --- a/net/devlink/region.c +++ b/net/devlink/region.c @@ -588,7 +588,7 @@ int devlink_nl_region_get_dumpit(struct sk_buff *skb, return devlink_nl_dumpit(skb, cb, devlink_nl_region_get_dump_one); } -int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_region_del_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_snapshot *snapshot; @@ -633,7 +633,7 @@ int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info) return 0; } -int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_region_new_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_snapshot *snapshot; @@ -863,8 +863,8 @@ devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size, curr_offset, chunk_size, chunk); } -int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) +int devlink_nl_region_read_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) { const struct genl_dumpit_info *info = genl_dumpit_info(cb); struct devlink_nl_dump_state *state = devlink_dump_state(cb); diff --git a/net/devlink/resource.c b/net/devlink/resource.c index c8b615e4c385..594c8aeb3bfa 100644 --- a/net/devlink/resource.c +++ b/net/devlink/resource.c @@ -105,7 +105,7 @@ devlink_resource_validate_size(struct devlink_resource *resource, u64 size, return err; } -int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_resource_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_resource *resource; @@ -285,7 +285,7 @@ err_resource_put: return err; } -int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; diff --git a/net/devlink/sb.c b/net/devlink/sb.c index bd677fff5ec8..0a76bb32502b 100644 --- a/net/devlink/sb.c +++ b/net/devlink/sb.c @@ -413,7 +413,7 @@ static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index, return -EOPNOTSUPP; } -int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; enum devlink_sb_threshold_type threshold_type; @@ -621,8 +621,8 @@ static int devlink_sb_port_pool_set(struct devlink_port *devlink_port, return -EOPNOTSUPP; } -int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_sb_port_pool_set_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; @@ -861,8 +861,8 @@ static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, return -EOPNOTSUPP; } -int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_sb_tc_pool_bind_set_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; @@ -900,8 +900,7 @@ int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, pool_index, threshold, info->extack); } -int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_sb_occ_snapshot_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; @@ -916,8 +915,8 @@ int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, return -EOPNOTSUPP; } -int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_sb_occ_max_clear_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; diff --git a/net/devlink/trap.c b/net/devlink/trap.c index c26bf9b29bca..c26313e7ca08 100644 --- a/net/devlink/trap.c +++ b/net/devlink/trap.c @@ -414,7 +414,7 @@ static int devlink_trap_action_set(struct devlink *devlink, info->extack); } -int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_trap_set_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; @@ -684,8 +684,7 @@ static int devlink_trap_group_set(struct devlink *devlink, return 0; } -int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_trap_group_set_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; @@ -926,8 +925,8 @@ devlink_trap_policer_set(struct devlink *devlink, return 0; } -int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_trap_policer_set_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink_trap_policer_item *policer_item; struct netlink_ext_ack *extack = info->extack; -- cgit v1.2.3 From f2f9dd164db079161a834c8698c68a94a50b4168 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 21 Oct 2023 13:27:09 +0200 Subject: netlink: specs: devlink: add the remaining command to generate complete split_ops Currently, some of the commands are not described in devlink yaml file and are manually filled in net/devlink/netlink.c in small_ops. To make all part of split_ops, add definitions of the rest of the commands alongside with needed attributes and enums. Note that this focuses on the kernel side. The requests are fully described in order to generate split_op alongside with policies. Follow-up will describe the replies in order to make the userspace helpers complete. Signed-off-by: Jiri Pirko Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20231021112711.660606-9-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 1530 ++++++++- net/devlink/netlink_gen.c | 757 ++++- net/devlink/netlink_gen.h | 64 +- tools/net/ynl/generated/devlink-user.c | 5062 ++++++++++++++++++++++++++---- tools/net/ynl/generated/devlink-user.h | 4213 ++++++++++++++++++++++--- 5 files changed, 10495 insertions(+), 1131 deletions(-) (limited to 'net') diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index dd035a8f5eb4..c6ba4889575a 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -15,6 +15,161 @@ definitions: name: ingress - name: egress + - + type: enum + name: port-type + entries: + - + name: notset + - + name: auto + - + name: eth + - + name: ib + - + type: enum + name: port-flavour + entries: + - + name: physical + - + name: cpu + - + name: dsa + - + name: pci_pf + - + name: pci_vf + - + name: virtual + - + name: unused + - + name: pci_sf + - + type: enum + name: port-fn-state + entries: + - + name: inactive + - + name: active + - + type: enum + name: port-fn-opstate + entries: + - + name: detached + - + name: attached + - + type: enum + name: port-fn-attr-cap + entries: + - + name: roce-bit + - + name: migratable-bit + - + type: enum + name: sb-threshold-type + entries: + - + name: static + - + name: dynamic + - + type: enum + name: eswitch-mode + entries: + - + name: legacy + - + name: switchdev + - + type: enum + name: eswitch-inline-mode + entries: + - + name: none + - + name: link + - + name: network + - + name: transport + - + type: enum + name: eswitch-encap-mode + entries: + - + name: none + - + name: basic + - + type: enum + name: dpipe-match-type + entries: + - + name: field-exact + - + type: enum + name: dpipe-action-type + entries: + - + name: field-modify + - + type: enum + name: dpipe-field-mapping-type + entries: + - + name: none + - + name: ifindex + - + type: enum + name: resource-unit + entries: + - + name: entry + - + type: enum + name: reload-action + entries: + - + name: driver-reinit + value: 1 + - + name: fw-activate + - + type: enum + name: param-cmode + entries: + - + name: runtime + - + name: driverinit + - + name: permanent + - + type: enum + name: flash-overwrite + entries: + - + name: settings-bit + - + name: identifiers-bit + - + type: enum + name: trap-action + entries: + - + name: drop + - + name: trap + - + name: mirror attribute-sets: - @@ -31,6 +186,17 @@ attribute-sets: - name: port-index type: u32 + - + name: port-type + type: u16 + enum: port-type + + # TODO: fill in the attributes in between + + - + name: port-split-count + type: u32 + value: 9 # TODO: fill in the attributes in between @@ -45,18 +211,224 @@ attribute-sets: name: sb-pool-index type: u16 value: 17 - - name: sb-pool-type type: u8 enum: sb-pool-type + - + name: sb-pool-size + type: u32 + - + name: sb-pool-threshold-type + type: u8 + enum: sb-threshold-type + - + name: sb-threshold + type: u32 + - + name: sb-tc-index + type: u16 + value: 22 # TODO: fill in the attributes in between - - name: sb-tc-index + name: eswitch-mode type: u16 - value: 22 + value: 25 + enum: eswitch-mode + + - + name: eswitch-inline-mode + type: u16 + enum: eswitch-inline-mode + - + name: dpipe-tables + type: nest + nested-attributes: dl-dpipe-tables + - + name: dpipe-table + type: nest + multi-attr: true + nested-attributes: dl-dpipe-table + - + name: dpipe-table-name + type: string + - + name: dpipe-table-size + type: u64 + - + name: dpipe-table-matches + type: nest + nested-attributes: dl-dpipe-table-matches + - + name: dpipe-table-actions + type: nest + nested-attributes: dl-dpipe-table-actions + - + name: dpipe-table-counters-enabled + type: u8 + - + name: dpipe-entries + type: nest + nested-attributes: dl-dpipe-entries + - + name: dpipe-entry + type: nest + multi-attr: true + nested-attributes: dl-dpipe-entry + - + name: dpipe-entry-index + type: u64 + - + name: dpipe-entry-match-values + type: nest + nested-attributes: dl-dpipe-entry-match-values + - + name: dpipe-entry-action-values + type: nest + nested-attributes: dl-dpipe-entry-action-values + - + name: dpipe-entry-counter + type: u64 + - + name: dpipe-match + type: nest + multi-attr: true + nested-attributes: dl-dpipe-match + - + name: dpipe-match-value + type: nest + multi-attr: true + nested-attributes: dl-dpipe-match-value + - + name: dpipe-match-type + type: u32 + enum: dpipe-match-type + - + name: dpipe-action + type: nest + multi-attr: true + nested-attributes: dl-dpipe-action + - + name: dpipe-action-value + type: nest + multi-attr: true + nested-attributes: dl-dpipe-action-value + - + name: dpipe-action-type + type: u32 + enum: dpipe-action-type + - + name: dpipe-value + type: binary + - + name: dpipe-value-mask + type: binary + - + name: dpipe-value-mapping + type: u32 + - + name: dpipe-headers + type: nest + nested-attributes: dl-dpipe-headers + - + name: dpipe-header + type: nest + multi-attr: true + nested-attributes: dl-dpipe-header + - + name: dpipe-header-name + type: string + - + name: dpipe-header-id + type: u32 + - + name: dpipe-header-fields + type: nest + nested-attributes: dl-dpipe-header-fields + - + name: dpipe-header-global + type: u8 + - + name: dpipe-header-index + type: u32 + - + name: dpipe-field + type: nest + multi-attr: true + nested-attributes: dl-dpipe-field + - + name: dpipe-field-name + type: string + - + name: dpipe-field-id + type: u32 + - + name: dpipe-field-bitwidth + type: u32 + - + name: dpipe-field-mapping-type + type: u32 + enum: dpipe-field-mapping-type + - + name: pad + type: pad + - + name: eswitch-encap-mode + type: u8 + value: 62 + enum: eswitch-encap-mode + - + name: resource-list + type: nest + nested-attributes: dl-resource-list + - + name: resource + type: nest + multi-attr: true + nested-attributes: dl-resource + - + name: resource-name + type: string + - + name: resource-id + type: u64 + - + name: resource-size + type: u64 + - + name: resource-size-new + type: u64 + - + name: resource-size-valid + type: u8 + - + name: resource-size-min + type: u64 + - + name: resource-size-max + type: u64 + - + name: resource-size-gran + type: u64 + - + name: resource-unit + type: u8 + enum: resource-unit + - + name: resource-occ + type: u64 + - + name: dpipe-table-resource-id + type: u64 + - + name: dpipe-table-resource-units + type: u64 + - + name: port-flavour + type: u16 + enum: port-flavour # TODO: fill in the attributes in between @@ -67,17 +439,41 @@ attribute-sets: # TODO: fill in the attributes in between + - + name: param-type + type: u8 + value: 83 + + # TODO: fill in the attributes in between + + - + name: param-value-cmode + type: u8 + enum: param-cmode + value: 87 - name: region-name type: string - value: 88 # TODO: fill in the attributes in between + - + name: region-snapshot-id + type: u32 + value: 92 + + # TODO: fill in the attributes in between + + - + name: region-chunk-addr + type: u64 + value: 96 + - + name: region-chunk-len + type: u64 - name: info-driver-name type: string - value: 98 - name: info-serial-number type: string @@ -105,6 +501,29 @@ attribute-sets: # TODO: fill in the attributes in between + - + name: fmsg + type: nest + nested-attributes: dl-fmsg + value: 106 + - + name: fmsg-obj-nest-start + type: flag + - + name: fmsg-pair-nest-start + type: flag + - + name: fmsg-arr-nest-start + type: flag + - + name: fmsg-nest-end + type: flag + - + name: fmsg-obj-name + type: string + + # TODO: fill in the attributes in between + - name: health-reporter-name type: string @@ -112,10 +531,37 @@ attribute-sets: # TODO: fill in the attributes in between + - + name: health-reporter-graceful-period + type: u64 + value: 120 + - + name: health-reporter-auto-recover + type: u8 + - + name: flash-update-file-name + type: string + - + name: flash-update-component + type: string + + # TODO: fill in the attributes in between + + - + name: port-pci-pf-number + type: u16 + value: 127 + + # TODO: fill in the attributes in between + - name: trap-name type: string value: 130 + - + name: trap-action + type: u8 + enum: trap-action # TODO: fill in the attributes in between @@ -131,23 +577,68 @@ attribute-sets: # TODO: fill in the attributes in between - - name: trap-policer-id + name: netns-fd + type: u32 + value: 138 + - + name: netns-pid + type: u32 + - + name: netns-id type: u32 - value: 142 # TODO: fill in the attributes in between - - name: reload-action + name: health-reporter-auto-dump type: u8 - value: 153 + value: 141 + - + name: trap-policer-id + type: u32 + - + name: trap-policer-rate + type: u64 + - + name: trap-policer-burst + type: u64 + - + name: port-function + type: nest + nested-attributes: dl-port-function + + # TODO: fill in the attributes in between + + - + name: port-controller-number + type: u32 + value: 150 # TODO: fill in the attributes in between + - + name: flash-update-overwrite-mask + type: bitfield32 + enum: flash-overwrite + enum-as-flags: True + value: 152 + - + name: reload-action + type: u8 + enum: reload-action + - + name: reload-actions-performed + type: bitfield32 + enum: reload-action + enum-as-flags: True + - + name: reload-limits + type: bitfield32 + enum: reload-action + enum-as-flags: True - name: dev-stats type: nest - value: 156 nested-attributes: dl-dev-stats - name: reload-stats @@ -181,10 +672,26 @@ attribute-sets: # TODO: fill in the attributes in between + - + name: port-pci-sf-number + type: u32 + value: 164 + + # TODO: fill in the attributes in between + + - + name: rate-tx-share + type: u64 + value: 166 + - + name: rate-tx-max + type: u64 - name: rate-node-name type: string - value: 168 + - + name: rate-parent-node-name + type: string # TODO: fill in the attributes in between @@ -193,6 +700,30 @@ attribute-sets: type: u32 value: 171 + # TODO: fill in the attributes in between + + - + name: linecard-type + type: string + value: 173 + + # TODO: fill in the attributes in between + + - + name: selftests + type: nest + value: 176 + nested-attributes: dl-selftest-id + - + name: rate-tx-priority + type: u32 + - + name: rate-tx-weight + type: u32 + - + name: region-direct + type: flag + - name: dl-dev-stats subset-of: devlink @@ -222,21 +753,276 @@ attribute-sets: - name: reload-stats-entry - - name: dl-reload-stats-entry + name: dl-reload-stats-entry + subset-of: devlink + attributes: + - + name: reload-stats-limit + - + name: reload-stats-value + - + name: dl-info-version + subset-of: devlink + attributes: + - + name: info-version-name + - + name: info-version-value + - + name: dl-port-function + name-prefix: devlink-port-fn-attr- + attr-max-name: devlink-port-function-attr-max + attributes: + - + name-prefix: devlink-port-function-attr- + name: hw-addr + type: binary + value: 1 + - + name: state + type: u8 + enum: port-fn-state + - + name: opstate + type: u8 + enum: port-fn-opstate + - + name: caps + type: bitfield32 + enum: port-fn-attr-cap + enum-as-flags: True + + - + name: dl-dpipe-tables + subset-of: devlink + attributes: + - + name: dpipe-table + + - + name: dl-dpipe-table + subset-of: devlink + attributes: + - + name: dpipe-table-name + - + name: dpipe-table-size + - + name: dpipe-table-name + - + name: dpipe-table-size + - + name: dpipe-table-matches + - + name: dpipe-table-actions + - + name: dpipe-table-counters-enabled + - + name: dpipe-table-resource-id + - + name: dpipe-table-resource-units + + - + name: dl-dpipe-table-matches + subset-of: devlink + attributes: + - + name: dpipe-match + + - + name: dl-dpipe-table-actions + subset-of: devlink + attributes: + - + name: dpipe-action + + - + name: dl-dpipe-entries + subset-of: devlink + attributes: + - + name: dpipe-entry + + - + name: dl-dpipe-entry + subset-of: devlink + attributes: + - + name: dpipe-entry-index + - + name: dpipe-entry-match-values + - + name: dpipe-entry-action-values + - + name: dpipe-entry-counter + + - + name: dl-dpipe-entry-match-values + subset-of: devlink + attributes: + - + name: dpipe-match-value + + - + name: dl-dpipe-entry-action-values + subset-of: devlink + attributes: + - + name: dpipe-action-value + + - + name: dl-dpipe-match + subset-of: devlink + attributes: + - + name: dpipe-match-type + - + name: dpipe-header-id + - + name: dpipe-header-global + - + name: dpipe-header-index + - + name: dpipe-field-id + + - + name: dl-dpipe-match-value + subset-of: devlink + attributes: + - + name: dpipe-match + - + name: dpipe-value + - + name: dpipe-value-mask + - + name: dpipe-value-mapping + + - + name: dl-dpipe-action + subset-of: devlink + attributes: + - + name: dpipe-action-type + - + name: dpipe-header-id + - + name: dpipe-header-global + - + name: dpipe-header-index + - + name: dpipe-field-id + + - + name: dl-dpipe-action-value + subset-of: devlink + attributes: + - + name: dpipe-action + - + name: dpipe-value + - + name: dpipe-value-mask + - + name: dpipe-value-mapping + + - + name: dl-dpipe-headers + subset-of: devlink + attributes: + - + name: dpipe-header + + - + name: dl-dpipe-header + subset-of: devlink + attributes: + - + name: dpipe-header-name + - + name: dpipe-header-id + - + name: dpipe-header-global + - + name: dpipe-header-fields + + - + name: dl-dpipe-header-fields + subset-of: devlink + attributes: + - + name: dpipe-field + + - + name: dl-dpipe-field + subset-of: devlink + attributes: + - + name: dpipe-field-name + - + name: dpipe-field-id + - + name: dpipe-field-bitwidth + - + name: dpipe-field-mapping-type + + - + name: dl-resource + subset-of: devlink + attributes: + # - + # name: resource-list + # This is currently unsupported due to circular dependency + - + name: resource-name + - + name: resource-id + - + name: resource-size + - + name: resource-size-new + - + name: resource-size-valid + - + name: resource-size-min + - + name: resource-size-max + - + name: resource-size-gran + - + name: resource-unit + - + name: resource-occ + + - + name: dl-resource-list + subset-of: devlink + attributes: + - + name: resource + + - + name: dl-fmsg subset-of: devlink attributes: - - name: reload-stats-limit + name: fmsg-obj-nest-start - - name: reload-stats-value + name: fmsg-pair-nest-start + - + name: fmsg-arr-nest-start + - + name: fmsg-nest-end + - + name: fmsg-obj-name + - - name: dl-info-version - subset-of: devlink + name: dl-selftest-id + name-prefix: devlink-attr-selftest-id- attributes: - - name: info-version-name - - - name: info-version-value + name: flash + type: flag operations: enum-model: directional @@ -287,8 +1073,84 @@ operations: reply: value: 3 # due to a bug, port dump returns DEVLINK_CMD_NEW attributes: *port-id-attrs + - + name: port-set + doc: Set devlink port instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - port-index + - port-type + - port-function + + - + name: port-new + doc: Create devlink port instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - port-index + - port-flavour + - port-pci-pf-number + - port-pci-sf-number + - port-controller-number + reply: + value: 7 + attributes: *port-id-attrs + + - + name: port-del + doc: Delete devlink port instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: *port-id-attrs - # TODO: fill in the operations in between + - + name: port-split + doc: Split devlink port instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - port-index + - port-split-count + + - + name: port-unsplit + doc: Unplit devlink port instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: *port-id-attrs - name: sb-get @@ -312,8 +1174,6 @@ operations: attributes: *dev-id-attrs reply: *sb-get-reply - # TODO: fill in the operations in between - - name: sb-pool-get doc: Get shared buffer pool instances. @@ -337,7 +1197,23 @@ operations: attributes: *dev-id-attrs reply: *sb-pool-get-reply - # TODO: fill in the operations in between + - + name: sb-pool-set + doc: Set shared buffer pool instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - sb-index + - sb-pool-index + - sb-pool-threshold-type + - sb-pool-size - name: sb-port-pool-get @@ -363,34 +1239,263 @@ operations: attributes: *dev-id-attrs reply: *sb-port-pool-get-reply - # TODO: fill in the operations in between + - + name: sb-port-pool-set + doc: Set shared buffer port-pool combinations and threshold. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - port-index + - sb-index + - sb-pool-index + - sb-threshold + + - + name: sb-tc-pool-bind-get + doc: Get shared buffer port-TC to pool bindings and threshold. + attribute-set: devlink + dont-validate: [ strict ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + value: 23 + attributes: &sb-tc-pool-bind-id-attrs + - bus-name + - dev-name + - port-index + - sb-index + - sb-pool-type + - sb-tc-index + reply: &sb-tc-pool-bind-get-reply + value: 25 + attributes: *sb-tc-pool-bind-id-attrs + dump: + request: + attributes: *dev-id-attrs + reply: *sb-tc-pool-bind-get-reply + + - + name: sb-tc-pool-bind-set + doc: Set shared buffer port-TC to pool bindings and threshold. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - port-index + - sb-index + - sb-pool-index + - sb-pool-type + - sb-tc-index + - sb-threshold + + - + name: sb-occ-snapshot + doc: Take occupancy snapshot of shared buffer. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + value: 27 + attributes: + - bus-name + - dev-name + - sb-index + + - + name: sb-occ-max-clear + doc: Clear occupancy watermarks of shared buffer. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - sb-index + + - + name: eswitch-get + doc: Get eswitch attributes. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: *dev-id-attrs + reply: + value: 29 + attributes: &eswitch-attrs + - bus-name + - dev-name + - eswitch-mode + - eswitch-inline-mode + - eswitch-encap-mode + + - + name: eswitch-set + doc: Set eswitch attributes. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: *eswitch-attrs + + - + name: dpipe-table-get + doc: Get dpipe table attributes. + attribute-set: devlink + dont-validate: [ strict ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - dpipe-table-name + reply: + value: 31 + attributes: + - bus-name + - dev-name + - dpipe-tables + + - + name: dpipe-entries-get + doc: Get dpipe entries attributes. + attribute-set: devlink + dont-validate: [ strict ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - dpipe-table-name + reply: + attributes: + - bus-name + - dev-name + - dpipe-entries + + - + name: dpipe-headers-get + doc: Get dpipe headers attributes. + attribute-set: devlink + dont-validate: [ strict ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + reply: + attributes: + - bus-name + - dev-name + - dpipe-headers + + - + name: dpipe-table-counters-set + doc: Set dpipe counter attributes. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - dpipe-table-name + - dpipe-table-counters-enabled + + - + name: resource-set + doc: Set resource attributes. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - resource-id + - resource-size + + - + name: resource-dump + doc: Get resource attributes. + attribute-set: devlink + dont-validate: [ strict ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + reply: + value: 36 + attributes: + - bus-name + - dev-name + - resource-list - - name: sb-tc-pool-bind-get - doc: Get shared buffer port-TC to pool bindings and threshold. + name: reload + doc: Reload devlink. attribute-set: devlink dont-validate: [ strict ] + flags: [ admin-perm ] do: - pre: devlink-nl-pre-doit-port + pre: devlink-nl-pre-doit post: devlink-nl-post-doit request: - value: 23 - attributes: &sb-tc-pool-bind-id-attrs + attributes: - bus-name - dev-name - - port-index - - sb-index - - sb-pool-type - - sb-tc-index - reply: &sb-tc-pool-bind-get-reply - value: 25 - attributes: *sb-tc-pool-bind-id-attrs - dump: - request: - attributes: *dev-id-attrs - reply: *sb-tc-pool-bind-get-reply - - # TODO: fill in the operations in between + - reload-action + - reload-limits + - netns-pid + - netns-fd + - netns-id + reply: + attributes: + - bus-name + - dev-name + - reload-actions-performed - name: param-get @@ -401,20 +1506,34 @@ operations: pre: devlink-nl-pre-doit post: devlink-nl-post-doit request: - value: 38 attributes: ¶m-id-attrs - bus-name - dev-name - param-name reply: ¶m-get-reply - value: 38 attributes: *param-id-attrs dump: request: attributes: *dev-id-attrs reply: *param-get-reply - # TODO: fill in the operations in between + - + name: param-set + doc: Set param instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - param-name + - param-type + # param-value-data is missing here as the type is variable + - param-value-cmode - name: region-get @@ -439,7 +1558,91 @@ operations: attributes: *dev-id-attrs reply: *region-get-reply - # TODO: fill in the operations in between + - + name: region-new + doc: Create region snapshot. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + value: 44 + attributes: ®ion-snapshot-id-attrs + - bus-name + - dev-name + - port-index + - region-name + - region-snapshot-id + reply: + value: 44 + attributes: *region-snapshot-id-attrs + + - + name: region-del + doc: Delete region snapshot. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + attributes: *region-snapshot-id-attrs + + - + name: region-read + doc: Read region data. + attribute-set: devlink + dont-validate: [ dump-strict ] + flags: [ admin-perm ] + dump: + request: + attributes: + - bus-name + - dev-name + - port-index + - region-name + - region-snapshot-id + - region-direct + - region-chunk-addr + - region-chunk-len + reply: + value: 46 + attributes: + - bus-name + - dev-name + - port-index + - region-name + + - + name: port-param-get + doc: Get port param instances. + attribute-set: devlink + dont-validate: [ strict, dump-strict ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: *port-id-attrs + reply: + attributes: *port-id-attrs + dump: + reply: + attributes: *port-id-attrs + + - + name: port-param-set + doc: Set port param instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: *port-id-attrs - name: info-get @@ -486,7 +1689,91 @@ operations: attributes: *port-id-attrs reply: *health-reporter-get-reply - # TODO: fill in the operations in between + - + name: health-reporter-set + doc: Set health reporter instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - port-index + - health-reporter-name + - health-reporter-graceful-period + - health-reporter-auto-recover + - health-reporter-auto-dump + + - + name: health-reporter-recover + doc: Recover health reporter instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + attributes: *health-reporter-id-attrs + + - + name: health-reporter-diagnose + doc: Diagnose health reporter instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + attributes: *health-reporter-id-attrs + + - + name: health-reporter-dump-get + doc: Dump health reporter instances. + attribute-set: devlink + dont-validate: [ dump-strict ] + flags: [ admin-perm ] + dump: + request: + attributes: *health-reporter-id-attrs + reply: + value: 56 + attributes: + - fmsg + + - + name: health-reporter-dump-clear + doc: Clear dump of health reporter instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + attributes: *health-reporter-id-attrs + + - + name: flash-update + doc: Flash update devlink instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - flash-update-file-name + - flash-update-component + - flash-update-overwrite-mask - name: trap-get @@ -510,7 +1797,21 @@ operations: attributes: *dev-id-attrs reply: *trap-get-reply - # TODO: fill in the operations in between + - + name: trap-set + doc: Set trap instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - trap-name + - trap-action - name: trap-group-get @@ -534,7 +1835,22 @@ operations: attributes: *dev-id-attrs reply: *trap-group-get-reply - # TODO: fill in the operations in between + - + name: trap-group-set + doc: Set trap group instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - trap-group-name + - trap-action + - trap-policer-id - name: trap-policer-get @@ -558,7 +1874,35 @@ operations: attributes: *dev-id-attrs reply: *trap-policer-get-reply - # TODO: fill in the operations in between + - + name: trap-policer-set + doc: Get trap policer instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - trap-policer-id + - trap-policer-rate + - trap-policer-burst + + - + name: health-reporter-test + doc: Test health reporter instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + value: 73 + attributes: *health-reporter-id-attrs - name: rate-get @@ -583,7 +1927,60 @@ operations: attributes: *dev-id-attrs reply: *rate-get-reply - # TODO: fill in the operations in between + - + name: rate-set + doc: Set rate instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - rate-node-name + - rate-tx-share + - rate-tx-max + - rate-tx-priority + - rate-tx-weight + - rate-parent-node-name + + - + name: rate-new + doc: Create rate instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - rate-node-name + - rate-tx-share + - rate-tx-max + - rate-tx-priority + - rate-tx-weight + - rate-parent-node-name + + - + name: rate-del + doc: Delete rate instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - rate-node-name - name: linecard-get @@ -607,7 +2004,21 @@ operations: attributes: *dev-id-attrs reply: *linecard-get-reply - # TODO: fill in the operations in between + - + name: linecard-set + doc: Set line card instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - linecard-index + - linecard-type - name: selftests-get @@ -625,3 +2036,18 @@ operations: attributes: *dev-id-attrs dump: reply: *selftests-get-reply + + - + name: selftests-run + doc: Run device selftest instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - selftests diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 467b7a431de1..9cbae0169249 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -10,6 +10,18 @@ #include +/* Common nested types */ +const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1] = { + [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY, }, + [DEVLINK_PORT_FN_ATTR_STATE] = NLA_POLICY_MAX(NLA_U8, 1), + [DEVLINK_PORT_FN_ATTR_OPSTATE] = NLA_POLICY_MAX(NLA_U8, 1), + [DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(3), +}; + +const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = { + [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG, }, +}; + /* DEVLINK_CMD_GET - do */ static const struct nla_policy devlink_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -29,6 +41,48 @@ static const struct nla_policy devlink_port_get_dump_nl_policy[DEVLINK_ATTR_DEV_ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_PORT_SET - do */ +static const struct nla_policy devlink_port_set_nl_policy[DEVLINK_ATTR_PORT_FUNCTION + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_MAX(NLA_U16, 3), + [DEVLINK_ATTR_PORT_FUNCTION] = NLA_POLICY_NESTED(devlink_dl_port_function_nl_policy), +}; + +/* DEVLINK_CMD_PORT_NEW - do */ +static const struct nla_policy devlink_port_new_nl_policy[DEVLINK_ATTR_PORT_PCI_SF_NUMBER + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_PORT_FLAVOUR] = NLA_POLICY_MAX(NLA_U16, 7), + [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16, }, + [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32, }, + [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_PORT_DEL - do */ +static const struct nla_policy devlink_port_del_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_PORT_SPLIT - do */ +static const struct nla_policy devlink_port_split_nl_policy[DEVLINK_ATTR_PORT_SPLIT_COUNT + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_PORT_UNSPLIT - do */ +static const struct nla_policy devlink_port_unsplit_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, +}; + /* DEVLINK_CMD_SB_GET - do */ static const struct nla_policy devlink_sb_get_do_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -56,6 +110,16 @@ static const struct nla_policy devlink_sb_pool_get_dump_nl_policy[DEVLINK_ATTR_D [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_SB_POOL_SET - do */ +static const struct nla_policy devlink_sb_pool_set_nl_policy[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, + [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = NLA_POLICY_MAX(NLA_U8, 1), + [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32, }, +}; + /* DEVLINK_CMD_SB_PORT_POOL_GET - do */ static const struct nla_policy devlink_sb_port_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -71,6 +135,16 @@ static const struct nla_policy devlink_sb_port_pool_get_dump_nl_policy[DEVLINK_A [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_SB_PORT_POOL_SET - do */ +static const struct nla_policy devlink_sb_port_pool_set_nl_policy[DEVLINK_ATTR_SB_THRESHOLD + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, + [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32, }, +}; + /* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */ static const struct nla_policy devlink_sb_tc_pool_bind_get_do_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -87,6 +161,100 @@ static const struct nla_policy devlink_sb_tc_pool_bind_get_dump_nl_policy[DEVLIN [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_SB_TC_POOL_BIND_SET - do */ +static const struct nla_policy devlink_sb_tc_pool_bind_set_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, + [DEVLINK_ATTR_SB_POOL_TYPE] = NLA_POLICY_MAX(NLA_U8, 1), + [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16, }, + [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_SB_OCC_SNAPSHOT - do */ +static const struct nla_policy devlink_sb_occ_snapshot_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_SB_OCC_MAX_CLEAR - do */ +static const struct nla_policy devlink_sb_occ_max_clear_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_ESWITCH_GET - do */ +static const struct nla_policy devlink_eswitch_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_ESWITCH_SET - do */ +static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITCH_ENCAP_MODE + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 1), + [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U16, 3), + [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = NLA_POLICY_MAX(NLA_U8, 1), +}; + +/* DEVLINK_CMD_DPIPE_TABLE_GET - do */ +static const struct nla_policy devlink_dpipe_table_get_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_DPIPE_ENTRIES_GET - do */ +static const struct nla_policy devlink_dpipe_entries_get_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_DPIPE_HEADERS_GET - do */ +static const struct nla_policy devlink_dpipe_headers_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET - do */ +static const struct nla_policy devlink_dpipe_table_counters_set_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8, }, +}; + +/* DEVLINK_CMD_RESOURCE_SET - do */ +static const struct nla_policy devlink_resource_set_nl_policy[DEVLINK_ATTR_RESOURCE_SIZE + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64, }, + [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64, }, +}; + +/* DEVLINK_CMD_RESOURCE_DUMP - do */ +static const struct nla_policy devlink_resource_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_RELOAD - do */ +static const struct nla_policy devlink_reload_nl_policy[DEVLINK_ATTR_RELOAD_LIMITS + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, 1, 2), + [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(6), + [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32, }, + [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32, }, + [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32, }, +}; + /* DEVLINK_CMD_PARAM_GET - do */ static const struct nla_policy devlink_param_get_do_nl_policy[DEVLINK_ATTR_PARAM_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -100,6 +268,15 @@ static const struct nla_policy devlink_param_get_dump_nl_policy[DEVLINK_ATTR_DEV [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_PARAM_SET - do */ +static const struct nla_policy devlink_param_set_nl_policy[DEVLINK_ATTR_PARAM_VALUE_CMODE + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8, }, + [DEVLINK_ATTR_PARAM_VALUE_CMODE] = NLA_POLICY_MAX(NLA_U8, 2), +}; + /* DEVLINK_CMD_REGION_GET - do */ static const struct nla_policy devlink_region_get_do_nl_policy[DEVLINK_ATTR_REGION_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -114,6 +291,50 @@ static const struct nla_policy devlink_region_get_dump_nl_policy[DEVLINK_ATTR_DE [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_REGION_NEW - do */ +static const struct nla_policy devlink_region_new_nl_policy[DEVLINK_ATTR_REGION_SNAPSHOT_ID + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_REGION_DEL - do */ +static const struct nla_policy devlink_region_del_nl_policy[DEVLINK_ATTR_REGION_SNAPSHOT_ID + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_REGION_READ - dump */ +static const struct nla_policy devlink_region_read_nl_policy[DEVLINK_ATTR_REGION_DIRECT + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, }, + [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG, }, + [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64, }, + [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64, }, +}; + +/* DEVLINK_CMD_PORT_PARAM_GET - do */ +static const struct nla_policy devlink_port_param_get_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_PORT_PARAM_SET - do */ +static const struct nla_policy devlink_port_param_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, +}; + /* DEVLINK_CMD_INFO_GET - do */ static const struct nla_policy devlink_info_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -135,6 +356,58 @@ static const struct nla_policy devlink_health_reporter_get_dump_nl_policy[DEVLIN [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; +/* DEVLINK_CMD_HEALTH_REPORTER_SET - do */ +static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64, }, + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8, }, + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8, }, +}; + +/* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */ +static const struct nla_policy devlink_health_reporter_recover_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE - do */ +static const struct nla_policy devlink_health_reporter_diagnose_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET - dump */ +static const struct nla_policy devlink_health_reporter_dump_get_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR - do */ +static const struct nla_policy devlink_health_reporter_dump_clear_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_FLASH_UPDATE - do */ +static const struct nla_policy devlink_flash_update_nl_policy[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] = NLA_POLICY_BITFIELD32(3), +}; + /* DEVLINK_CMD_TRAP_GET - do */ static const struct nla_policy devlink_trap_get_do_nl_policy[DEVLINK_ATTR_TRAP_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -148,6 +421,14 @@ static const struct nla_policy devlink_trap_get_dump_nl_policy[DEVLINK_ATTR_DEV_ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_TRAP_SET - do */ +static const struct nla_policy devlink_trap_set_nl_policy[DEVLINK_ATTR_TRAP_ACTION + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_TRAP_ACTION] = NLA_POLICY_MAX(NLA_U8, 2), +}; + /* DEVLINK_CMD_TRAP_GROUP_GET - do */ static const struct nla_policy devlink_trap_group_get_do_nl_policy[DEVLINK_ATTR_TRAP_GROUP_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -161,6 +442,15 @@ static const struct nla_policy devlink_trap_group_get_dump_nl_policy[DEVLINK_ATT [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_TRAP_GROUP_SET - do */ +static const struct nla_policy devlink_trap_group_set_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_TRAP_ACTION] = NLA_POLICY_MAX(NLA_U8, 2), + [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, }, +}; + /* DEVLINK_CMD_TRAP_POLICER_GET - do */ static const struct nla_policy devlink_trap_policer_get_do_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -174,6 +464,23 @@ static const struct nla_policy devlink_trap_policer_get_dump_nl_policy[DEVLINK_A [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_TRAP_POLICER_SET - do */ +static const struct nla_policy devlink_trap_policer_set_nl_policy[DEVLINK_ATTR_TRAP_POLICER_BURST + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, }, + [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64, }, + [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64, }, +}; + +/* DEVLINK_CMD_HEALTH_REPORTER_TEST - do */ +static const struct nla_policy devlink_health_reporter_test_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, +}; + /* DEVLINK_CMD_RATE_GET - do */ static const struct nla_policy devlink_rate_get_do_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -188,6 +495,37 @@ static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_RATE_SET - do */ +static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64, }, + [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64, }, + [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, }, + [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, }, + [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_RATE_NEW - do */ +static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64, }, + [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64, }, + [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, }, + [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, }, + [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_RATE_DEL - do */ +static const struct nla_policy devlink_rate_del_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, +}; + /* DEVLINK_CMD_LINECARD_GET - do */ static const struct nla_policy devlink_linecard_get_do_nl_policy[DEVLINK_ATTR_LINECARD_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -201,14 +539,29 @@ static const struct nla_policy devlink_linecard_get_dump_nl_policy[DEVLINK_ATTR_ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_LINECARD_SET - do */ +static const struct nla_policy devlink_linecard_set_nl_policy[DEVLINK_ATTR_LINECARD_TYPE + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING, }, +}; + /* DEVLINK_CMD_SELFTESTS_GET - do */ static const struct nla_policy devlink_selftests_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_SELFTESTS_RUN - do */ +static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_SELFTESTS + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_SELFTESTS] = NLA_POLICY_NESTED(devlink_dl_selftest_id_nl_policy), +}; + /* Ops table for devlink */ -const struct genl_split_ops devlink_nl_ops[32] = { +const struct genl_split_ops devlink_nl_ops[73] = { { .cmd = DEVLINK_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -242,6 +595,56 @@ const struct genl_split_ops devlink_nl_ops[32] = { .maxattr = DEVLINK_ATTR_DEV_NAME, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_PORT_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_port_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_set_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_FUNCTION, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_PORT_NEW, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_port_new_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_new_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_PCI_SF_NUMBER, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_PORT_DEL, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_port_del_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_del_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_INDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_PORT_SPLIT, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_port_split_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_split_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_SPLIT_COUNT, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_PORT_UNSPLIT, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_port_unsplit_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_unsplit_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_INDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -276,6 +679,16 @@ const struct genl_split_ops devlink_nl_ops[32] = { .maxattr = DEVLINK_ATTR_DEV_NAME, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_SB_POOL_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_sb_pool_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_sb_pool_set_nl_policy, + .maxattr = DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, { .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -293,6 +706,16 @@ const struct genl_split_ops devlink_nl_ops[32] = { .maxattr = DEVLINK_ATTR_DEV_NAME, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_SB_PORT_POOL_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_sb_port_pool_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_sb_port_pool_set_nl_policy, + .maxattr = DEVLINK_ATTR_SB_THRESHOLD, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -310,6 +733,126 @@ const struct genl_split_ops devlink_nl_ops[32] = { .maxattr = DEVLINK_ATTR_DEV_NAME, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_sb_tc_pool_bind_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_sb_tc_pool_bind_set_nl_policy, + .maxattr = DEVLINK_ATTR_SB_TC_INDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_sb_occ_snapshot_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_sb_occ_snapshot_nl_policy, + .maxattr = DEVLINK_ATTR_SB_INDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_sb_occ_max_clear_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_sb_occ_max_clear_nl_policy, + .maxattr = DEVLINK_ATTR_SB_INDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_ESWITCH_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_eswitch_get_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_eswitch_get_nl_policy, + .maxattr = DEVLINK_ATTR_DEV_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_ESWITCH_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_eswitch_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_eswitch_set_nl_policy, + .maxattr = DEVLINK_ATTR_ESWITCH_ENCAP_MODE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_dpipe_table_get_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_dpipe_table_get_nl_policy, + .maxattr = DEVLINK_ATTR_DPIPE_TABLE_NAME, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_dpipe_entries_get_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_dpipe_entries_get_nl_policy, + .maxattr = DEVLINK_ATTR_DPIPE_TABLE_NAME, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_dpipe_headers_get_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_dpipe_headers_get_nl_policy, + .maxattr = DEVLINK_ATTR_DEV_NAME, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_dpipe_table_counters_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_dpipe_table_counters_set_nl_policy, + .maxattr = DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_RESOURCE_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_resource_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_resource_set_nl_policy, + .maxattr = DEVLINK_ATTR_RESOURCE_SIZE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_RESOURCE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_resource_dump_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_resource_dump_nl_policy, + .maxattr = DEVLINK_ATTR_DEV_NAME, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_RELOAD, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_reload_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_reload_nl_policy, + .maxattr = DEVLINK_ATTR_RELOAD_LIMITS, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, { .cmd = DEVLINK_CMD_PARAM_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -327,6 +870,16 @@ const struct genl_split_ops devlink_nl_ops[32] = { .maxattr = DEVLINK_ATTR_DEV_NAME, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_PARAM_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_param_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_param_set_nl_policy, + .maxattr = DEVLINK_ATTR_PARAM_VALUE_CMODE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, { .cmd = DEVLINK_CMD_REGION_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -344,6 +897,60 @@ const struct genl_split_ops devlink_nl_ops[32] = { .maxattr = DEVLINK_ATTR_DEV_NAME, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_REGION_NEW, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_region_new_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_region_new_nl_policy, + .maxattr = DEVLINK_ATTR_REGION_SNAPSHOT_ID, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_REGION_DEL, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_region_del_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_region_del_nl_policy, + .maxattr = DEVLINK_ATTR_REGION_SNAPSHOT_ID, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_REGION_READ, + .validate = GENL_DONT_VALIDATE_DUMP_STRICT, + .dumpit = devlink_nl_region_read_dumpit, + .policy = devlink_region_read_nl_policy, + .maxattr = DEVLINK_ATTR_REGION_DIRECT, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, + }, + { + .cmd = DEVLINK_CMD_PORT_PARAM_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_port_param_get_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_param_get_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_INDEX, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_PORT_PARAM_GET, + .validate = GENL_DONT_VALIDATE_DUMP_STRICT, + .dumpit = devlink_nl_port_param_get_dumpit, + .flags = GENL_CMD_CAP_DUMP, + }, + { + .cmd = DEVLINK_CMD_PORT_PARAM_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_port_param_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_param_set_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_INDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, { .cmd = DEVLINK_CMD_INFO_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -377,6 +984,64 @@ const struct genl_split_ops devlink_nl_ops[32] = { .maxattr = DEVLINK_ATTR_PORT_INDEX, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_health_reporter_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_health_reporter_set_nl_policy, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_health_reporter_recover_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_health_reporter_recover_nl_policy, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_health_reporter_diagnose_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_health_reporter_diagnose_nl_policy, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + .validate = GENL_DONT_VALIDATE_DUMP_STRICT, + .dumpit = devlink_nl_health_reporter_dump_get_dumpit, + .policy = devlink_health_reporter_dump_get_nl_policy, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_health_reporter_dump_clear_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_health_reporter_dump_clear_nl_policy, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_FLASH_UPDATE, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_flash_update_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_flash_update_nl_policy, + .maxattr = DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, { .cmd = DEVLINK_CMD_TRAP_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -394,6 +1059,16 @@ const struct genl_split_ops devlink_nl_ops[32] = { .maxattr = DEVLINK_ATTR_DEV_NAME, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_TRAP_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_trap_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_trap_set_nl_policy, + .maxattr = DEVLINK_ATTR_TRAP_ACTION, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, { .cmd = DEVLINK_CMD_TRAP_GROUP_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -411,6 +1086,16 @@ const struct genl_split_ops devlink_nl_ops[32] = { .maxattr = DEVLINK_ATTR_DEV_NAME, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_TRAP_GROUP_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_trap_group_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_trap_group_set_nl_policy, + .maxattr = DEVLINK_ATTR_TRAP_POLICER_ID, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, { .cmd = DEVLINK_CMD_TRAP_POLICER_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -428,6 +1113,26 @@ const struct genl_split_ops devlink_nl_ops[32] = { .maxattr = DEVLINK_ATTR_DEV_NAME, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_TRAP_POLICER_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_trap_policer_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_trap_policer_set_nl_policy, + .maxattr = DEVLINK_ATTR_TRAP_POLICER_BURST, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_health_reporter_test_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_health_reporter_test_nl_policy, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, { .cmd = DEVLINK_CMD_RATE_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -445,6 +1150,36 @@ const struct genl_split_ops devlink_nl_ops[32] = { .maxattr = DEVLINK_ATTR_DEV_NAME, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_RATE_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_rate_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_rate_set_nl_policy, + .maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_RATE_NEW, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_rate_new_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_rate_new_nl_policy, + .maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_RATE_DEL, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_rate_del_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_rate_del_nl_policy, + .maxattr = DEVLINK_ATTR_RATE_NODE_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, { .cmd = DEVLINK_CMD_LINECARD_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -462,6 +1197,16 @@ const struct genl_split_ops devlink_nl_ops[32] = { .maxattr = DEVLINK_ATTR_DEV_NAME, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_LINECARD_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_linecard_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_linecard_set_nl_policy, + .maxattr = DEVLINK_ATTR_LINECARD_TYPE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, { .cmd = DEVLINK_CMD_SELFTESTS_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -478,4 +1223,14 @@ const struct genl_split_ops devlink_nl_ops[32] = { .dumpit = devlink_nl_selftests_get_dumpit, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_SELFTESTS_RUN, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_selftests_run_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_selftests_run_nl_policy, + .maxattr = DEVLINK_ATTR_SELFTESTS, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, }; diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index f8bbc93e39be..0e9e89c31c31 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -11,8 +11,12 @@ #include +/* Common nested types */ +extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1]; +extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1]; + /* Ops table for devlink */ -extern const struct genl_split_ops devlink_nl_ops[32]; +extern const struct genl_split_ops devlink_nl_ops[73]; int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); @@ -30,25 +34,61 @@ int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int devlink_nl_port_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_port_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_port_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_new_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_del_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_split_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_unsplit_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_sb_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_sb_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int devlink_nl_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_sb_pool_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_sb_port_pool_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_sb_port_pool_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_sb_port_pool_set_doit(struct sk_buff *skb, + struct genl_info *info); int devlink_nl_sb_tc_pool_bind_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_sb_tc_pool_bind_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_sb_tc_pool_bind_set_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_sb_occ_snapshot_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_sb_occ_max_clear_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_dpipe_table_get_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_dpipe_entries_get_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_dpipe_headers_get_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_dpipe_table_counters_set_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_resource_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_param_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_param_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_param_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_region_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_region_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_region_new_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_region_del_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_region_read_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int devlink_nl_port_param_get_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_param_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int devlink_nl_port_param_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_info_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_info_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); @@ -56,24 +96,46 @@ int devlink_nl_health_reporter_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_health_reporter_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_health_reporter_set_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_health_reporter_recover_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_health_reporter_diagnose_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_health_reporter_dump_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int devlink_nl_health_reporter_dump_clear_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_flash_update_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_trap_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_trap_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_trap_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_trap_group_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_trap_group_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_trap_group_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_trap_policer_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_trap_policer_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_trap_policer_set_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_health_reporter_test_doit(struct sk_buff *skb, + struct genl_info *info); int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_rate_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_rate_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_linecard_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_linecard_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_linecard_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_selftests_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info); #endif /* _LINUX_DEVLINK_GEN_H */ diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c index a002f71d6068..75b744b47986 100644 --- a/tools/net/ynl/generated/devlink-user.c +++ b/tools/net/ynl/generated/devlink-user.c @@ -16,14 +16,25 @@ static const char * const devlink_op_strmap[] = { [3] = "get", [7] = "port-get", + [DEVLINK_CMD_PORT_NEW] = "port-new", [13] = "sb-get", [17] = "sb-pool-get", [21] = "sb-port-pool-get", [25] = "sb-tc-pool-bind-get", + [DEVLINK_CMD_ESWITCH_GET] = "eswitch-get", + [DEVLINK_CMD_DPIPE_TABLE_GET] = "dpipe-table-get", + [DEVLINK_CMD_DPIPE_ENTRIES_GET] = "dpipe-entries-get", + [DEVLINK_CMD_DPIPE_HEADERS_GET] = "dpipe-headers-get", + [DEVLINK_CMD_RESOURCE_DUMP] = "resource-dump", + [DEVLINK_CMD_RELOAD] = "reload", [DEVLINK_CMD_PARAM_GET] = "param-get", [DEVLINK_CMD_REGION_GET] = "region-get", + [DEVLINK_CMD_REGION_NEW] = "region-new", + [DEVLINK_CMD_REGION_READ] = "region-read", + [DEVLINK_CMD_PORT_PARAM_GET] = "port-param-get", [DEVLINK_CMD_INFO_GET] = "info-get", [DEVLINK_CMD_HEALTH_REPORTER_GET] = "health-reporter-get", + [DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET] = "health-reporter-dump-get", [63] = "trap-get", [67] = "trap-group-get", [71] = "trap-policer-get", @@ -51,7 +62,303 @@ const char *devlink_sb_pool_type_str(enum devlink_sb_pool_type value) return devlink_sb_pool_type_strmap[value]; } +static const char * const devlink_port_type_strmap[] = { + [0] = "notset", + [1] = "auto", + [2] = "eth", + [3] = "ib", +}; + +const char *devlink_port_type_str(enum devlink_port_type value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_type_strmap)) + return NULL; + return devlink_port_type_strmap[value]; +} + +static const char * const devlink_port_flavour_strmap[] = { + [0] = "physical", + [1] = "cpu", + [2] = "dsa", + [3] = "pci_pf", + [4] = "pci_vf", + [5] = "virtual", + [6] = "unused", + [7] = "pci_sf", +}; + +const char *devlink_port_flavour_str(enum devlink_port_flavour value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_flavour_strmap)) + return NULL; + return devlink_port_flavour_strmap[value]; +} + +static const char * const devlink_port_fn_state_strmap[] = { + [0] = "inactive", + [1] = "active", +}; + +const char *devlink_port_fn_state_str(enum devlink_port_fn_state value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_fn_state_strmap)) + return NULL; + return devlink_port_fn_state_strmap[value]; +} + +static const char * const devlink_port_fn_opstate_strmap[] = { + [0] = "detached", + [1] = "attached", +}; + +const char *devlink_port_fn_opstate_str(enum devlink_port_fn_opstate value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_fn_opstate_strmap)) + return NULL; + return devlink_port_fn_opstate_strmap[value]; +} + +static const char * const devlink_port_fn_attr_cap_strmap[] = { + [0] = "roce-bit", + [1] = "migratable-bit", +}; + +const char *devlink_port_fn_attr_cap_str(enum devlink_port_fn_attr_cap value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_fn_attr_cap_strmap)) + return NULL; + return devlink_port_fn_attr_cap_strmap[value]; +} + +static const char * const devlink_sb_threshold_type_strmap[] = { + [0] = "static", + [1] = "dynamic", +}; + +const char *devlink_sb_threshold_type_str(enum devlink_sb_threshold_type value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_sb_threshold_type_strmap)) + return NULL; + return devlink_sb_threshold_type_strmap[value]; +} + +static const char * const devlink_eswitch_mode_strmap[] = { + [0] = "legacy", + [1] = "switchdev", +}; + +const char *devlink_eswitch_mode_str(enum devlink_eswitch_mode value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_eswitch_mode_strmap)) + return NULL; + return devlink_eswitch_mode_strmap[value]; +} + +static const char * const devlink_eswitch_inline_mode_strmap[] = { + [0] = "none", + [1] = "link", + [2] = "network", + [3] = "transport", +}; + +const char * +devlink_eswitch_inline_mode_str(enum devlink_eswitch_inline_mode value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_eswitch_inline_mode_strmap)) + return NULL; + return devlink_eswitch_inline_mode_strmap[value]; +} + +static const char * const devlink_eswitch_encap_mode_strmap[] = { + [0] = "none", + [1] = "basic", +}; + +const char * +devlink_eswitch_encap_mode_str(enum devlink_eswitch_encap_mode value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_eswitch_encap_mode_strmap)) + return NULL; + return devlink_eswitch_encap_mode_strmap[value]; +} + +static const char * const devlink_dpipe_match_type_strmap[] = { + [0] = "field-exact", +}; + +const char *devlink_dpipe_match_type_str(enum devlink_dpipe_match_type value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_dpipe_match_type_strmap)) + return NULL; + return devlink_dpipe_match_type_strmap[value]; +} + +static const char * const devlink_dpipe_action_type_strmap[] = { + [0] = "field-modify", +}; + +const char *devlink_dpipe_action_type_str(enum devlink_dpipe_action_type value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_dpipe_action_type_strmap)) + return NULL; + return devlink_dpipe_action_type_strmap[value]; +} + +static const char * const devlink_dpipe_field_mapping_type_strmap[] = { + [0] = "none", + [1] = "ifindex", +}; + +const char * +devlink_dpipe_field_mapping_type_str(enum devlink_dpipe_field_mapping_type value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_dpipe_field_mapping_type_strmap)) + return NULL; + return devlink_dpipe_field_mapping_type_strmap[value]; +} + +static const char * const devlink_resource_unit_strmap[] = { + [0] = "entry", +}; + +const char *devlink_resource_unit_str(enum devlink_resource_unit value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_resource_unit_strmap)) + return NULL; + return devlink_resource_unit_strmap[value]; +} + +static const char * const devlink_reload_action_strmap[] = { + [1] = "driver-reinit", + [2] = "fw-activate", +}; + +const char *devlink_reload_action_str(enum devlink_reload_action value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_reload_action_strmap)) + return NULL; + return devlink_reload_action_strmap[value]; +} + +static const char * const devlink_param_cmode_strmap[] = { + [0] = "runtime", + [1] = "driverinit", + [2] = "permanent", +}; + +const char *devlink_param_cmode_str(enum devlink_param_cmode value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_param_cmode_strmap)) + return NULL; + return devlink_param_cmode_strmap[value]; +} + +static const char * const devlink_flash_overwrite_strmap[] = { + [0] = "settings-bit", + [1] = "identifiers-bit", +}; + +const char *devlink_flash_overwrite_str(enum devlink_flash_overwrite value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_flash_overwrite_strmap)) + return NULL; + return devlink_flash_overwrite_strmap[value]; +} + +static const char * const devlink_trap_action_strmap[] = { + [0] = "drop", + [1] = "trap", + [2] = "mirror", +}; + +const char *devlink_trap_action_str(enum devlink_trap_action value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_trap_action_strmap)) + return NULL; + return devlink_trap_action_strmap[value]; +} + /* Policies */ +struct ynl_policy_attr devlink_dl_dpipe_match_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_MATCH_TYPE] = { .name = "dpipe-match-type", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_HEADER_ID] = { .name = "dpipe-header-id", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_HEADER_GLOBAL] = { .name = "dpipe-header-global", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_DPIPE_HEADER_INDEX] = { .name = "dpipe-header-index", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_FIELD_ID] = { .name = "dpipe-field-id", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_match_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_match_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_match_value_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_MATCH] = { .name = "dpipe-match", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_nest, }, + [DEVLINK_ATTR_DPIPE_VALUE] = { .name = "dpipe-value", .type = YNL_PT_BINARY,}, + [DEVLINK_ATTR_DPIPE_VALUE_MASK] = { .name = "dpipe-value-mask", .type = YNL_PT_BINARY,}, + [DEVLINK_ATTR_DPIPE_VALUE_MAPPING] = { .name = "dpipe-value-mapping", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_match_value_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_match_value_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_action_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_ACTION_TYPE] = { .name = "dpipe-action-type", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_HEADER_ID] = { .name = "dpipe-header-id", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_HEADER_GLOBAL] = { .name = "dpipe-header-global", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_DPIPE_HEADER_INDEX] = { .name = "dpipe-header-index", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_FIELD_ID] = { .name = "dpipe-field-id", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_action_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_action_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_action_value_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_ACTION] = { .name = "dpipe-action", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_nest, }, + [DEVLINK_ATTR_DPIPE_VALUE] = { .name = "dpipe-value", .type = YNL_PT_BINARY,}, + [DEVLINK_ATTR_DPIPE_VALUE_MASK] = { .name = "dpipe-value-mask", .type = YNL_PT_BINARY,}, + [DEVLINK_ATTR_DPIPE_VALUE_MAPPING] = { .name = "dpipe-value-mapping", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_action_value_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_action_value_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_field_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_FIELD_NAME] = { .name = "dpipe-field-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_DPIPE_FIELD_ID] = { .name = "dpipe-field-id", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH] = { .name = "dpipe-field-bitwidth", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE] = { .name = "dpipe-field-mapping-type", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_field_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_field_policy, +}; + +struct ynl_policy_attr devlink_dl_resource_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_RESOURCE_NAME] = { .name = "resource-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_RESOURCE_ID] = { .name = "resource-id", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RESOURCE_SIZE] = { .name = "resource-size", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RESOURCE_SIZE_NEW] = { .name = "resource-size-new", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RESOURCE_SIZE_VALID] = { .name = "resource-size-valid", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_RESOURCE_SIZE_MIN] = { .name = "resource-size-min", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RESOURCE_SIZE_MAX] = { .name = "resource-size-max", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RESOURCE_SIZE_GRAN] = { .name = "resource-size-gran", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RESOURCE_UNIT] = { .name = "resource-unit", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_RESOURCE_OCC] = { .name = "resource-occ", .type = YNL_PT_U64, }, +}; + +struct ynl_policy_nest devlink_dl_resource_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_resource_policy, +}; + struct ynl_policy_attr devlink_dl_info_version_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_INFO_VERSION_NAME] = { .name = "info-version-name", .type = YNL_PT_NUL_STR, }, [DEVLINK_ATTR_INFO_VERSION_VALUE] = { .name = "info-version-value", .type = YNL_PT_NUL_STR, }, @@ -62,6 +369,31 @@ struct ynl_policy_nest devlink_dl_info_version_nest = { .table = devlink_dl_info_version_policy, }; +struct ynl_policy_attr devlink_dl_fmsg_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_FMSG_OBJ_NEST_START] = { .name = "fmsg-obj-nest-start", .type = YNL_PT_FLAG, }, + [DEVLINK_ATTR_FMSG_PAIR_NEST_START] = { .name = "fmsg-pair-nest-start", .type = YNL_PT_FLAG, }, + [DEVLINK_ATTR_FMSG_ARR_NEST_START] = { .name = "fmsg-arr-nest-start", .type = YNL_PT_FLAG, }, + [DEVLINK_ATTR_FMSG_NEST_END] = { .name = "fmsg-nest-end", .type = YNL_PT_FLAG, }, + [DEVLINK_ATTR_FMSG_OBJ_NAME] = { .name = "fmsg-obj-name", .type = YNL_PT_NUL_STR, }, +}; + +struct ynl_policy_nest devlink_dl_fmsg_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_fmsg_policy, +}; + +struct ynl_policy_attr devlink_dl_port_function_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = { + [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .name = "hw-addr", .type = YNL_PT_BINARY,}, + [DEVLINK_PORT_FN_ATTR_STATE] = { .name = "state", .type = YNL_PT_U8, }, + [DEVLINK_PORT_FN_ATTR_OPSTATE] = { .name = "opstate", .type = YNL_PT_U8, }, + [DEVLINK_PORT_FN_ATTR_CAPS] = { .name = "caps", .type = YNL_PT_BITFIELD32, }, +}; + +struct ynl_policy_nest devlink_dl_port_function_nest = { + .max_attr = DEVLINK_PORT_FUNCTION_ATTR_MAX, + .table = devlink_dl_port_function_policy, +}; + struct ynl_policy_attr devlink_dl_reload_stats_entry_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RELOAD_STATS_LIMIT] = { .name = "reload-stats-limit", .type = YNL_PT_U8, }, [DEVLINK_ATTR_RELOAD_STATS_VALUE] = { .name = "reload-stats-value", .type = YNL_PT_U32, }, @@ -81,6 +413,69 @@ struct ynl_policy_nest devlink_dl_reload_act_stats_nest = { .table = devlink_dl_reload_act_stats_policy, }; +struct ynl_policy_attr devlink_dl_selftest_id_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = { + [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .name = "flash", .type = YNL_PT_FLAG, }, +}; + +struct ynl_policy_nest devlink_dl_selftest_id_nest = { + .max_attr = DEVLINK_ATTR_SELFTEST_ID_MAX, + .table = devlink_dl_selftest_id_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_table_matches_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_MATCH] = { .name = "dpipe-match", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_nest, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_table_matches_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_table_matches_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_table_actions_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_ACTION] = { .name = "dpipe-action", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_nest, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_table_actions_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_table_actions_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_entry_match_values_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_MATCH_VALUE] = { .name = "dpipe-match-value", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_value_nest, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_entry_match_values_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_entry_match_values_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_entry_action_values_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_ACTION_VALUE] = { .name = "dpipe-action-value", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_value_nest, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_entry_action_values_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_entry_action_values_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_header_fields_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_FIELD] = { .name = "dpipe-field", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_field_nest, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_header_fields_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_header_fields_policy, +}; + +struct ynl_policy_attr devlink_dl_resource_list_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_RESOURCE] = { .name = "resource", .type = YNL_PT_NEST, .nest = &devlink_dl_resource_nest, }, +}; + +struct ynl_policy_nest devlink_dl_resource_list_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_resource_list_policy, +}; + struct ynl_policy_attr devlink_dl_reload_act_info_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RELOAD_ACTION] = { .name = "reload-action", .type = YNL_PT_U8, }, [DEVLINK_ATTR_RELOAD_ACTION_STATS] = { .name = "reload-action-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_stats_nest, }, @@ -91,6 +486,45 @@ struct ynl_policy_nest devlink_dl_reload_act_info_nest = { .table = devlink_dl_reload_act_info_policy, }; +struct ynl_policy_attr devlink_dl_dpipe_table_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .name = "dpipe-table-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_DPIPE_TABLE_SIZE] = { .name = "dpipe-table-size", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_DPIPE_TABLE_MATCHES] = { .name = "dpipe-table-matches", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_matches_nest, }, + [DEVLINK_ATTR_DPIPE_TABLE_ACTIONS] = { .name = "dpipe-table-actions", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_actions_nest, }, + [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .name = "dpipe-table-counters-enabled", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID] = { .name = "dpipe-table-resource-id", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS] = { .name = "dpipe-table-resource-units", .type = YNL_PT_U64, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_table_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_table_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_entry_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_ENTRY_INDEX] = { .name = "dpipe-entry-index", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES] = { .name = "dpipe-entry-match-values", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_match_values_nest, }, + [DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES] = { .name = "dpipe-entry-action-values", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_action_values_nest, }, + [DEVLINK_ATTR_DPIPE_ENTRY_COUNTER] = { .name = "dpipe-entry-counter", .type = YNL_PT_U64, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_entry_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_entry_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_header_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_HEADER_NAME] = { .name = "dpipe-header-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_DPIPE_HEADER_ID] = { .name = "dpipe-header-id", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_HEADER_GLOBAL] = { .name = "dpipe-header-global", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_DPIPE_HEADER_FIELDS] = { .name = "dpipe-header-fields", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_header_fields_nest, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_header_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_header_policy, +}; + struct ynl_policy_attr devlink_dl_reload_stats_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RELOAD_ACTION_INFO] = { .name = "reload-action-info", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_info_nest, }, }; @@ -100,6 +534,33 @@ struct ynl_policy_nest devlink_dl_reload_stats_nest = { .table = devlink_dl_reload_stats_policy, }; +struct ynl_policy_attr devlink_dl_dpipe_tables_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_TABLE] = { .name = "dpipe-table", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_nest, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_tables_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_tables_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_entries_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_ENTRY] = { .name = "dpipe-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_nest, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_entries_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_entries_policy, +}; + +struct ynl_policy_attr devlink_dl_dpipe_headers_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_DPIPE_HEADER] = { .name = "dpipe-header", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_header_nest, }, +}; + +struct ynl_policy_nest devlink_dl_dpipe_headers_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dpipe_headers_policy, +}; + struct ynl_policy_attr devlink_dl_dev_stats_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RELOAD_STATS] = { .name = "reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, }, [DEVLINK_ATTR_REMOTE_RELOAD_STATS] = { .name = "remote-reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, }, @@ -114,12 +575,75 @@ struct ynl_policy_attr devlink_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .name = "bus-name", .type = YNL_PT_NUL_STR, }, [DEVLINK_ATTR_DEV_NAME] = { .name = "dev-name", .type = YNL_PT_NUL_STR, }, [DEVLINK_ATTR_PORT_INDEX] = { .name = "port-index", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_PORT_TYPE] = { .name = "port-type", .type = YNL_PT_U16, }, + [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .name = "port-split-count", .type = YNL_PT_U32, }, [DEVLINK_ATTR_SB_INDEX] = { .name = "sb-index", .type = YNL_PT_U32, }, [DEVLINK_ATTR_SB_POOL_INDEX] = { .name = "sb-pool-index", .type = YNL_PT_U16, }, [DEVLINK_ATTR_SB_POOL_TYPE] = { .name = "sb-pool-type", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_SB_POOL_SIZE] = { .name = "sb-pool-size", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .name = "sb-pool-threshold-type", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_SB_THRESHOLD] = { .name = "sb-threshold", .type = YNL_PT_U32, }, [DEVLINK_ATTR_SB_TC_INDEX] = { .name = "sb-tc-index", .type = YNL_PT_U16, }, + [DEVLINK_ATTR_ESWITCH_MODE] = { .name = "eswitch-mode", .type = YNL_PT_U16, }, + [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .name = "eswitch-inline-mode", .type = YNL_PT_U16, }, + [DEVLINK_ATTR_DPIPE_TABLES] = { .name = "dpipe-tables", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_tables_nest, }, + [DEVLINK_ATTR_DPIPE_TABLE] = { .name = "dpipe-table", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_nest, }, + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .name = "dpipe-table-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_DPIPE_TABLE_SIZE] = { .name = "dpipe-table-size", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_DPIPE_TABLE_MATCHES] = { .name = "dpipe-table-matches", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_matches_nest, }, + [DEVLINK_ATTR_DPIPE_TABLE_ACTIONS] = { .name = "dpipe-table-actions", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_actions_nest, }, + [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .name = "dpipe-table-counters-enabled", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_DPIPE_ENTRIES] = { .name = "dpipe-entries", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entries_nest, }, + [DEVLINK_ATTR_DPIPE_ENTRY] = { .name = "dpipe-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_nest, }, + [DEVLINK_ATTR_DPIPE_ENTRY_INDEX] = { .name = "dpipe-entry-index", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES] = { .name = "dpipe-entry-match-values", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_match_values_nest, }, + [DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES] = { .name = "dpipe-entry-action-values", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_action_values_nest, }, + [DEVLINK_ATTR_DPIPE_ENTRY_COUNTER] = { .name = "dpipe-entry-counter", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_DPIPE_MATCH] = { .name = "dpipe-match", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_nest, }, + [DEVLINK_ATTR_DPIPE_MATCH_VALUE] = { .name = "dpipe-match-value", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_value_nest, }, + [DEVLINK_ATTR_DPIPE_MATCH_TYPE] = { .name = "dpipe-match-type", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_ACTION] = { .name = "dpipe-action", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_nest, }, + [DEVLINK_ATTR_DPIPE_ACTION_VALUE] = { .name = "dpipe-action-value", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_value_nest, }, + [DEVLINK_ATTR_DPIPE_ACTION_TYPE] = { .name = "dpipe-action-type", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_VALUE] = { .name = "dpipe-value", .type = YNL_PT_BINARY,}, + [DEVLINK_ATTR_DPIPE_VALUE_MASK] = { .name = "dpipe-value-mask", .type = YNL_PT_BINARY,}, + [DEVLINK_ATTR_DPIPE_VALUE_MAPPING] = { .name = "dpipe-value-mapping", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_HEADERS] = { .name = "dpipe-headers", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_headers_nest, }, + [DEVLINK_ATTR_DPIPE_HEADER] = { .name = "dpipe-header", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_header_nest, }, + [DEVLINK_ATTR_DPIPE_HEADER_NAME] = { .name = "dpipe-header-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_DPIPE_HEADER_ID] = { .name = "dpipe-header-id", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_HEADER_FIELDS] = { .name = "dpipe-header-fields", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_header_fields_nest, }, + [DEVLINK_ATTR_DPIPE_HEADER_GLOBAL] = { .name = "dpipe-header-global", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_DPIPE_HEADER_INDEX] = { .name = "dpipe-header-index", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_FIELD] = { .name = "dpipe-field", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_field_nest, }, + [DEVLINK_ATTR_DPIPE_FIELD_NAME] = { .name = "dpipe-field-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_DPIPE_FIELD_ID] = { .name = "dpipe-field-id", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH] = { .name = "dpipe-field-bitwidth", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE] = { .name = "dpipe-field-mapping-type", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, }, + [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .name = "eswitch-encap-mode", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_RESOURCE_LIST] = { .name = "resource-list", .type = YNL_PT_NEST, .nest = &devlink_dl_resource_list_nest, }, + [DEVLINK_ATTR_RESOURCE] = { .name = "resource", .type = YNL_PT_NEST, .nest = &devlink_dl_resource_nest, }, + [DEVLINK_ATTR_RESOURCE_NAME] = { .name = "resource-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_RESOURCE_ID] = { .name = "resource-id", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RESOURCE_SIZE] = { .name = "resource-size", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RESOURCE_SIZE_NEW] = { .name = "resource-size-new", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RESOURCE_SIZE_VALID] = { .name = "resource-size-valid", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_RESOURCE_SIZE_MIN] = { .name = "resource-size-min", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RESOURCE_SIZE_MAX] = { .name = "resource-size-max", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RESOURCE_SIZE_GRAN] = { .name = "resource-size-gran", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RESOURCE_UNIT] = { .name = "resource-unit", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_RESOURCE_OCC] = { .name = "resource-occ", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID] = { .name = "dpipe-table-resource-id", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS] = { .name = "dpipe-table-resource-units", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_PORT_FLAVOUR] = { .name = "port-flavour", .type = YNL_PT_U16, }, [DEVLINK_ATTR_PARAM_NAME] = { .name = "param-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_PARAM_TYPE] = { .name = "param-type", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .name = "param-value-cmode", .type = YNL_PT_U8, }, [DEVLINK_ATTR_REGION_NAME] = { .name = "region-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .name = "region-snapshot-id", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .name = "region-chunk-addr", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .name = "region-chunk-len", .type = YNL_PT_U64, }, [DEVLINK_ATTR_INFO_DRIVER_NAME] = { .name = "info-driver-name", .type = YNL_PT_NUL_STR, }, [DEVLINK_ATTR_INFO_SERIAL_NUMBER] = { .name = "info-serial-number", .type = YNL_PT_NUL_STR, }, [DEVLINK_ATTR_INFO_VERSION_FIXED] = { .name = "info-version-fixed", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, }, @@ -127,12 +651,35 @@ struct ynl_policy_attr devlink_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_INFO_VERSION_STORED] = { .name = "info-version-stored", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, }, [DEVLINK_ATTR_INFO_VERSION_NAME] = { .name = "info-version-name", .type = YNL_PT_NUL_STR, }, [DEVLINK_ATTR_INFO_VERSION_VALUE] = { .name = "info-version-value", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_FMSG] = { .name = "fmsg", .type = YNL_PT_NEST, .nest = &devlink_dl_fmsg_nest, }, + [DEVLINK_ATTR_FMSG_OBJ_NEST_START] = { .name = "fmsg-obj-nest-start", .type = YNL_PT_FLAG, }, + [DEVLINK_ATTR_FMSG_PAIR_NEST_START] = { .name = "fmsg-pair-nest-start", .type = YNL_PT_FLAG, }, + [DEVLINK_ATTR_FMSG_ARR_NEST_START] = { .name = "fmsg-arr-nest-start", .type = YNL_PT_FLAG, }, + [DEVLINK_ATTR_FMSG_NEST_END] = { .name = "fmsg-nest-end", .type = YNL_PT_FLAG, }, + [DEVLINK_ATTR_FMSG_OBJ_NAME] = { .name = "fmsg-obj-name", .type = YNL_PT_NUL_STR, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .name = "health-reporter-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .name = "health-reporter-graceful-period", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .name = "health-reporter-auto-recover", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .name = "flash-update-file-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .name = "flash-update-component", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .name = "port-pci-pf-number", .type = YNL_PT_U16, }, [DEVLINK_ATTR_TRAP_NAME] = { .name = "trap-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_TRAP_ACTION] = { .name = "trap-action", .type = YNL_PT_U8, }, [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .name = "trap-group-name", .type = YNL_PT_NUL_STR, }, [DEVLINK_ATTR_RELOAD_FAILED] = { .name = "reload-failed", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_NETNS_FD] = { .name = "netns-fd", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_NETNS_PID] = { .name = "netns-pid", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_NETNS_ID] = { .name = "netns-id", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .name = "health-reporter-auto-dump", .type = YNL_PT_U8, }, [DEVLINK_ATTR_TRAP_POLICER_ID] = { .name = "trap-policer-id", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .name = "trap-policer-rate", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .name = "trap-policer-burst", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_PORT_FUNCTION] = { .name = "port-function", .type = YNL_PT_NEST, .nest = &devlink_dl_port_function_nest, }, + [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .name = "port-controller-number", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] = { .name = "flash-update-overwrite-mask", .type = YNL_PT_BITFIELD32, }, [DEVLINK_ATTR_RELOAD_ACTION] = { .name = "reload-action", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED] = { .name = "reload-actions-performed", .type = YNL_PT_BITFIELD32, }, + [DEVLINK_ATTR_RELOAD_LIMITS] = { .name = "reload-limits", .type = YNL_PT_BITFIELD32, }, [DEVLINK_ATTR_DEV_STATS] = { .name = "dev-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_dev_stats_nest, }, [DEVLINK_ATTR_RELOAD_STATS] = { .name = "reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, }, [DEVLINK_ATTR_RELOAD_STATS_ENTRY] = { .name = "reload-stats-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_entry_nest, }, @@ -141,8 +688,17 @@ struct ynl_policy_attr devlink_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_REMOTE_RELOAD_STATS] = { .name = "remote-reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, }, [DEVLINK_ATTR_RELOAD_ACTION_INFO] = { .name = "reload-action-info", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_info_nest, }, [DEVLINK_ATTR_RELOAD_ACTION_STATS] = { .name = "reload-action-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_stats_nest, }, + [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .name = "port-pci-sf-number", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_RATE_TX_SHARE] = { .name = "rate-tx-share", .type = YNL_PT_U64, }, + [DEVLINK_ATTR_RATE_TX_MAX] = { .name = "rate-tx-max", .type = YNL_PT_U64, }, [DEVLINK_ATTR_RATE_NODE_NAME] = { .name = "rate-node-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .name = "rate-parent-node-name", .type = YNL_PT_NUL_STR, }, [DEVLINK_ATTR_LINECARD_INDEX] = { .name = "linecard-index", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_LINECARD_TYPE] = { .name = "linecard-type", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_SELFTESTS] = { .name = "selftests", .type = YNL_PT_NEST, .nest = &devlink_dl_selftest_id_nest, }, + [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .name = "rate-tx-priority", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .name = "rate-tx-weight", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_REGION_DIRECT] = { .name = "region-direct", .type = YNL_PT_FLAG, }, }; struct ynl_policy_nest devlink_nest = { @@ -151,43 +707,44 @@ struct ynl_policy_nest devlink_nest = { }; /* Common nested types */ -void devlink_dl_info_version_free(struct devlink_dl_info_version *obj) +void devlink_dl_dpipe_match_free(struct devlink_dl_dpipe_match *obj) { - free(obj->info_version_name); - free(obj->info_version_value); } -int devlink_dl_info_version_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) +int devlink_dl_dpipe_match_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) { - struct devlink_dl_info_version *dst = yarg->data; + struct devlink_dl_dpipe_match *dst = yarg->data; const struct nlattr *attr; mnl_attr_for_each_nested(attr, nested) { unsigned int type = mnl_attr_get_type(attr); - if (type == DEVLINK_ATTR_INFO_VERSION_NAME) { - unsigned int len; - + if (type == DEVLINK_ATTR_DPIPE_MATCH_TYPE) { if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.info_version_name_len = len; - dst->info_version_name = malloc(len + 1); - memcpy(dst->info_version_name, mnl_attr_get_str(attr), len); - dst->info_version_name[len] = 0; - } else if (type == DEVLINK_ATTR_INFO_VERSION_VALUE) { - unsigned int len; - + dst->_present.dpipe_match_type = 1; + dst->dpipe_match_type = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_DPIPE_HEADER_ID) { if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.info_version_value_len = len; - dst->info_version_value = malloc(len + 1); - memcpy(dst->info_version_value, mnl_attr_get_str(attr), len); - dst->info_version_value[len] = 0; + dst->_present.dpipe_header_id = 1; + dst->dpipe_header_id = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_DPIPE_HEADER_GLOBAL) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_header_global = 1; + dst->dpipe_header_global = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_DPIPE_HEADER_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_header_index = 1; + dst->dpipe_header_index = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_DPIPE_FIELD_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_field_id = 1; + dst->dpipe_field_id = mnl_attr_get_u32(attr); } } @@ -195,75 +752,194 @@ int devlink_dl_info_version_parse(struct ynl_parse_arg *yarg, } void -devlink_dl_reload_stats_entry_free(struct devlink_dl_reload_stats_entry *obj) +devlink_dl_dpipe_match_value_free(struct devlink_dl_dpipe_match_value *obj) { + unsigned int i; + + for (i = 0; i < obj->n_dpipe_match; i++) + devlink_dl_dpipe_match_free(&obj->dpipe_match[i]); + free(obj->dpipe_match); + free(obj->dpipe_value); + free(obj->dpipe_value_mask); } -int devlink_dl_reload_stats_entry_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) +int devlink_dl_dpipe_match_value_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) { - struct devlink_dl_reload_stats_entry *dst = yarg->data; + struct devlink_dl_dpipe_match_value *dst = yarg->data; + unsigned int n_dpipe_match = 0; const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->dpipe_match) + return ynl_error_parse(yarg, "attribute already present (dl-dpipe-match-value.dpipe-match)"); mnl_attr_for_each_nested(attr, nested) { unsigned int type = mnl_attr_get_type(attr); - if (type == DEVLINK_ATTR_RELOAD_STATS_LIMIT) { + if (type == DEVLINK_ATTR_DPIPE_MATCH) { + n_dpipe_match++; + } else if (type == DEVLINK_ATTR_DPIPE_VALUE) { + unsigned int len; + if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - dst->_present.reload_stats_limit = 1; - dst->reload_stats_limit = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_RELOAD_STATS_VALUE) { + + len = mnl_attr_get_payload_len(attr); + dst->_present.dpipe_value_len = len; + dst->dpipe_value = malloc(len); + memcpy(dst->dpipe_value, mnl_attr_get_payload(attr), len); + } else if (type == DEVLINK_ATTR_DPIPE_VALUE_MASK) { + unsigned int len; + if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - dst->_present.reload_stats_value = 1; - dst->reload_stats_value = mnl_attr_get_u32(attr); + + len = mnl_attr_get_payload_len(attr); + dst->_present.dpipe_value_mask_len = len; + dst->dpipe_value_mask = malloc(len); + memcpy(dst->dpipe_value_mask, mnl_attr_get_payload(attr), len); + } else if (type == DEVLINK_ATTR_DPIPE_VALUE_MAPPING) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_value_mapping = 1; + dst->dpipe_value_mapping = mnl_attr_get_u32(attr); + } + } + + if (n_dpipe_match) { + dst->dpipe_match = calloc(n_dpipe_match, sizeof(*dst->dpipe_match)); + dst->n_dpipe_match = n_dpipe_match; + i = 0; + parg.rsp_policy = &devlink_dl_dpipe_match_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_MATCH) { + parg.data = &dst->dpipe_match[i]; + if (devlink_dl_dpipe_match_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } } } return 0; } -void devlink_dl_reload_act_stats_free(struct devlink_dl_reload_act_stats *obj) +void devlink_dl_dpipe_action_free(struct devlink_dl_dpipe_action *obj) +{ +} + +int devlink_dl_dpipe_action_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dpipe_action *dst = yarg->data; + const struct nlattr *attr; + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_DPIPE_ACTION_TYPE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_action_type = 1; + dst->dpipe_action_type = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_DPIPE_HEADER_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_header_id = 1; + dst->dpipe_header_id = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_DPIPE_HEADER_GLOBAL) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_header_global = 1; + dst->dpipe_header_global = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_DPIPE_HEADER_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_header_index = 1; + dst->dpipe_header_index = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_DPIPE_FIELD_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_field_id = 1; + dst->dpipe_field_id = mnl_attr_get_u32(attr); + } + } + + return 0; +} + +void +devlink_dl_dpipe_action_value_free(struct devlink_dl_dpipe_action_value *obj) { unsigned int i; - for (i = 0; i < obj->n_reload_stats_entry; i++) - devlink_dl_reload_stats_entry_free(&obj->reload_stats_entry[i]); - free(obj->reload_stats_entry); + for (i = 0; i < obj->n_dpipe_action; i++) + devlink_dl_dpipe_action_free(&obj->dpipe_action[i]); + free(obj->dpipe_action); + free(obj->dpipe_value); + free(obj->dpipe_value_mask); } -int devlink_dl_reload_act_stats_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) +int devlink_dl_dpipe_action_value_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) { - struct devlink_dl_reload_act_stats *dst = yarg->data; - unsigned int n_reload_stats_entry = 0; + struct devlink_dl_dpipe_action_value *dst = yarg->data; + unsigned int n_dpipe_action = 0; const struct nlattr *attr; struct ynl_parse_arg parg; int i; parg.ys = yarg->ys; - if (dst->reload_stats_entry) - return ynl_error_parse(yarg, "attribute already present (dl-reload-act-stats.reload-stats-entry)"); + if (dst->dpipe_action) + return ynl_error_parse(yarg, "attribute already present (dl-dpipe-action-value.dpipe-action)"); mnl_attr_for_each_nested(attr, nested) { unsigned int type = mnl_attr_get_type(attr); - if (type == DEVLINK_ATTR_RELOAD_STATS_ENTRY) { - n_reload_stats_entry++; + if (type == DEVLINK_ATTR_DPIPE_ACTION) { + n_dpipe_action++; + } else if (type == DEVLINK_ATTR_DPIPE_VALUE) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = mnl_attr_get_payload_len(attr); + dst->_present.dpipe_value_len = len; + dst->dpipe_value = malloc(len); + memcpy(dst->dpipe_value, mnl_attr_get_payload(attr), len); + } else if (type == DEVLINK_ATTR_DPIPE_VALUE_MASK) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = mnl_attr_get_payload_len(attr); + dst->_present.dpipe_value_mask_len = len; + dst->dpipe_value_mask = malloc(len); + memcpy(dst->dpipe_value_mask, mnl_attr_get_payload(attr), len); + } else if (type == DEVLINK_ATTR_DPIPE_VALUE_MAPPING) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_value_mapping = 1; + dst->dpipe_value_mapping = mnl_attr_get_u32(attr); } } - if (n_reload_stats_entry) { - dst->reload_stats_entry = calloc(n_reload_stats_entry, sizeof(*dst->reload_stats_entry)); - dst->n_reload_stats_entry = n_reload_stats_entry; + if (n_dpipe_action) { + dst->dpipe_action = calloc(n_dpipe_action, sizeof(*dst->dpipe_action)); + dst->n_dpipe_action = n_dpipe_action; i = 0; - parg.rsp_policy = &devlink_dl_reload_stats_entry_nest; + parg.rsp_policy = &devlink_dl_dpipe_action_nest; mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_STATS_ENTRY) { - parg.data = &dst->reload_stats_entry[i]; - if (devlink_dl_reload_stats_entry_parse(&parg, attr)) + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_ACTION) { + parg.data = &dst->dpipe_action[i]; + if (devlink_dl_dpipe_action_parse(&parg, attr)) return MNL_CB_ERROR; i++; } @@ -273,172 +949,2645 @@ int devlink_dl_reload_act_stats_parse(struct ynl_parse_arg *yarg, return 0; } -void devlink_dl_reload_act_info_free(struct devlink_dl_reload_act_info *obj) +void devlink_dl_dpipe_field_free(struct devlink_dl_dpipe_field *obj) { - unsigned int i; - - for (i = 0; i < obj->n_reload_action_stats; i++) - devlink_dl_reload_act_stats_free(&obj->reload_action_stats[i]); - free(obj->reload_action_stats); + free(obj->dpipe_field_name); } -int devlink_dl_reload_act_info_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) +int devlink_dl_dpipe_field_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) { - struct devlink_dl_reload_act_info *dst = yarg->data; - unsigned int n_reload_action_stats = 0; + struct devlink_dl_dpipe_field *dst = yarg->data; const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->reload_action_stats) - return ynl_error_parse(yarg, "attribute already present (dl-reload-act-info.reload-action-stats)"); mnl_attr_for_each_nested(attr, nested) { unsigned int type = mnl_attr_get_type(attr); - if (type == DEVLINK_ATTR_RELOAD_ACTION) { + if (type == DEVLINK_ATTR_DPIPE_FIELD_NAME) { + unsigned int len; + if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - dst->_present.reload_action = 1; - dst->reload_action = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_RELOAD_ACTION_STATS) { - n_reload_action_stats++; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dpipe_field_name_len = len; + dst->dpipe_field_name = malloc(len + 1); + memcpy(dst->dpipe_field_name, mnl_attr_get_str(attr), len); + dst->dpipe_field_name[len] = 0; + } else if (type == DEVLINK_ATTR_DPIPE_FIELD_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_field_id = 1; + dst->dpipe_field_id = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_field_bitwidth = 1; + dst->dpipe_field_bitwidth = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_field_mapping_type = 1; + dst->dpipe_field_mapping_type = mnl_attr_get_u32(attr); } } - if (n_reload_action_stats) { - dst->reload_action_stats = calloc(n_reload_action_stats, sizeof(*dst->reload_action_stats)); - dst->n_reload_action_stats = n_reload_action_stats; - i = 0; - parg.rsp_policy = &devlink_dl_reload_act_stats_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_ACTION_STATS) { - parg.data = &dst->reload_action_stats[i]; - if (devlink_dl_reload_act_stats_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } + return 0; +} + +void devlink_dl_resource_free(struct devlink_dl_resource *obj) +{ + free(obj->resource_name); +} + +int devlink_dl_resource_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_resource *dst = yarg->data; + const struct nlattr *attr; + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_RESOURCE_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.resource_name_len = len; + dst->resource_name = malloc(len + 1); + memcpy(dst->resource_name, mnl_attr_get_str(attr), len); + dst->resource_name[len] = 0; + } else if (type == DEVLINK_ATTR_RESOURCE_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.resource_id = 1; + dst->resource_id = mnl_attr_get_u64(attr); + } else if (type == DEVLINK_ATTR_RESOURCE_SIZE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.resource_size = 1; + dst->resource_size = mnl_attr_get_u64(attr); + } else if (type == DEVLINK_ATTR_RESOURCE_SIZE_NEW) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.resource_size_new = 1; + dst->resource_size_new = mnl_attr_get_u64(attr); + } else if (type == DEVLINK_ATTR_RESOURCE_SIZE_VALID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.resource_size_valid = 1; + dst->resource_size_valid = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_RESOURCE_SIZE_MIN) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.resource_size_min = 1; + dst->resource_size_min = mnl_attr_get_u64(attr); + } else if (type == DEVLINK_ATTR_RESOURCE_SIZE_MAX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.resource_size_max = 1; + dst->resource_size_max = mnl_attr_get_u64(attr); + } else if (type == DEVLINK_ATTR_RESOURCE_SIZE_GRAN) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.resource_size_gran = 1; + dst->resource_size_gran = mnl_attr_get_u64(attr); + } else if (type == DEVLINK_ATTR_RESOURCE_UNIT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.resource_unit = 1; + dst->resource_unit = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_RESOURCE_OCC) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.resource_occ = 1; + dst->resource_occ = mnl_attr_get_u64(attr); } } return 0; } -void devlink_dl_reload_stats_free(struct devlink_dl_reload_stats *obj) +void devlink_dl_info_version_free(struct devlink_dl_info_version *obj) { - unsigned int i; - - for (i = 0; i < obj->n_reload_action_info; i++) - devlink_dl_reload_act_info_free(&obj->reload_action_info[i]); - free(obj->reload_action_info); + free(obj->info_version_name); + free(obj->info_version_value); } -int devlink_dl_reload_stats_parse(struct ynl_parse_arg *yarg, +int devlink_dl_info_version_parse(struct ynl_parse_arg *yarg, const struct nlattr *nested) { - struct devlink_dl_reload_stats *dst = yarg->data; - unsigned int n_reload_action_info = 0; + struct devlink_dl_info_version *dst = yarg->data; const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - parg.ys = yarg->ys; + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); - if (dst->reload_action_info) - return ynl_error_parse(yarg, "attribute already present (dl-reload-stats.reload-action-info)"); + if (type == DEVLINK_ATTR_INFO_VERSION_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.info_version_name_len = len; + dst->info_version_name = malloc(len + 1); + memcpy(dst->info_version_name, mnl_attr_get_str(attr), len); + dst->info_version_name[len] = 0; + } else if (type == DEVLINK_ATTR_INFO_VERSION_VALUE) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.info_version_value_len = len; + dst->info_version_value = malloc(len + 1); + memcpy(dst->info_version_value, mnl_attr_get_str(attr), len); + dst->info_version_value[len] = 0; + } + } + + return 0; +} + +void devlink_dl_fmsg_free(struct devlink_dl_fmsg *obj) +{ + free(obj->fmsg_obj_name); +} + +int devlink_dl_fmsg_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_fmsg *dst = yarg->data; + const struct nlattr *attr; mnl_attr_for_each_nested(attr, nested) { unsigned int type = mnl_attr_get_type(attr); - if (type == DEVLINK_ATTR_RELOAD_ACTION_INFO) { - n_reload_action_info++; + if (type == DEVLINK_ATTR_FMSG_OBJ_NEST_START) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.fmsg_obj_nest_start = 1; + } else if (type == DEVLINK_ATTR_FMSG_PAIR_NEST_START) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.fmsg_pair_nest_start = 1; + } else if (type == DEVLINK_ATTR_FMSG_ARR_NEST_START) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.fmsg_arr_nest_start = 1; + } else if (type == DEVLINK_ATTR_FMSG_NEST_END) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.fmsg_nest_end = 1; + } else if (type == DEVLINK_ATTR_FMSG_OBJ_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.fmsg_obj_name_len = len; + dst->fmsg_obj_name = malloc(len + 1); + memcpy(dst->fmsg_obj_name, mnl_attr_get_str(attr), len); + dst->fmsg_obj_name[len] = 0; } } - if (n_reload_action_info) { - dst->reload_action_info = calloc(n_reload_action_info, sizeof(*dst->reload_action_info)); - dst->n_reload_action_info = n_reload_action_info; - i = 0; - parg.rsp_policy = &devlink_dl_reload_act_info_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_ACTION_INFO) { - parg.data = &dst->reload_action_info[i]; - if (devlink_dl_reload_act_info_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } + return 0; +} + +void devlink_dl_port_function_free(struct devlink_dl_port_function *obj) +{ + free(obj->hw_addr); +} + +int devlink_dl_port_function_put(struct nlmsghdr *nlh, unsigned int attr_type, + struct devlink_dl_port_function *obj) +{ + struct nlattr *nest; + + nest = mnl_attr_nest_start(nlh, attr_type); + if (obj->_present.hw_addr_len) + mnl_attr_put(nlh, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, obj->_present.hw_addr_len, obj->hw_addr); + if (obj->_present.state) + mnl_attr_put_u8(nlh, DEVLINK_PORT_FN_ATTR_STATE, obj->state); + if (obj->_present.opstate) + mnl_attr_put_u8(nlh, DEVLINK_PORT_FN_ATTR_OPSTATE, obj->opstate); + if (obj->_present.caps) + mnl_attr_put(nlh, DEVLINK_PORT_FN_ATTR_CAPS, sizeof(struct nla_bitfield32), &obj->caps); + mnl_attr_nest_end(nlh, nest); + + return 0; +} + +void +devlink_dl_reload_stats_entry_free(struct devlink_dl_reload_stats_entry *obj) +{ +} + +int devlink_dl_reload_stats_entry_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_reload_stats_entry *dst = yarg->data; + const struct nlattr *attr; + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_RELOAD_STATS_LIMIT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.reload_stats_limit = 1; + dst->reload_stats_limit = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_RELOAD_STATS_VALUE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.reload_stats_value = 1; + dst->reload_stats_value = mnl_attr_get_u32(attr); + } + } + + return 0; +} + +void devlink_dl_reload_act_stats_free(struct devlink_dl_reload_act_stats *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_reload_stats_entry; i++) + devlink_dl_reload_stats_entry_free(&obj->reload_stats_entry[i]); + free(obj->reload_stats_entry); +} + +int devlink_dl_reload_act_stats_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_reload_act_stats *dst = yarg->data; + unsigned int n_reload_stats_entry = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->reload_stats_entry) + return ynl_error_parse(yarg, "attribute already present (dl-reload-act-stats.reload-stats-entry)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_RELOAD_STATS_ENTRY) { + n_reload_stats_entry++; + } + } + + if (n_reload_stats_entry) { + dst->reload_stats_entry = calloc(n_reload_stats_entry, sizeof(*dst->reload_stats_entry)); + dst->n_reload_stats_entry = n_reload_stats_entry; + i = 0; + parg.rsp_policy = &devlink_dl_reload_stats_entry_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_STATS_ENTRY) { + parg.data = &dst->reload_stats_entry[i]; + if (devlink_dl_reload_stats_entry_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void devlink_dl_selftest_id_free(struct devlink_dl_selftest_id *obj) +{ +} + +int devlink_dl_selftest_id_put(struct nlmsghdr *nlh, unsigned int attr_type, + struct devlink_dl_selftest_id *obj) +{ + struct nlattr *nest; + + nest = mnl_attr_nest_start(nlh, attr_type); + if (obj->_present.flash) + mnl_attr_put(nlh, DEVLINK_ATTR_SELFTEST_ID_FLASH, 0, NULL); + mnl_attr_nest_end(nlh, nest); + + return 0; +} + +void +devlink_dl_dpipe_table_matches_free(struct devlink_dl_dpipe_table_matches *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_dpipe_match; i++) + devlink_dl_dpipe_match_free(&obj->dpipe_match[i]); + free(obj->dpipe_match); +} + +int devlink_dl_dpipe_table_matches_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dpipe_table_matches *dst = yarg->data; + unsigned int n_dpipe_match = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->dpipe_match) + return ynl_error_parse(yarg, "attribute already present (dl-dpipe-table-matches.dpipe-match)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_DPIPE_MATCH) { + n_dpipe_match++; + } + } + + if (n_dpipe_match) { + dst->dpipe_match = calloc(n_dpipe_match, sizeof(*dst->dpipe_match)); + dst->n_dpipe_match = n_dpipe_match; + i = 0; + parg.rsp_policy = &devlink_dl_dpipe_match_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_MATCH) { + parg.data = &dst->dpipe_match[i]; + if (devlink_dl_dpipe_match_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void +devlink_dl_dpipe_table_actions_free(struct devlink_dl_dpipe_table_actions *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_dpipe_action; i++) + devlink_dl_dpipe_action_free(&obj->dpipe_action[i]); + free(obj->dpipe_action); +} + +int devlink_dl_dpipe_table_actions_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dpipe_table_actions *dst = yarg->data; + unsigned int n_dpipe_action = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->dpipe_action) + return ynl_error_parse(yarg, "attribute already present (dl-dpipe-table-actions.dpipe-action)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_DPIPE_ACTION) { + n_dpipe_action++; + } + } + + if (n_dpipe_action) { + dst->dpipe_action = calloc(n_dpipe_action, sizeof(*dst->dpipe_action)); + dst->n_dpipe_action = n_dpipe_action; + i = 0; + parg.rsp_policy = &devlink_dl_dpipe_action_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_ACTION) { + parg.data = &dst->dpipe_action[i]; + if (devlink_dl_dpipe_action_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void +devlink_dl_dpipe_entry_match_values_free(struct devlink_dl_dpipe_entry_match_values *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_dpipe_match_value; i++) + devlink_dl_dpipe_match_value_free(&obj->dpipe_match_value[i]); + free(obj->dpipe_match_value); +} + +int devlink_dl_dpipe_entry_match_values_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dpipe_entry_match_values *dst = yarg->data; + unsigned int n_dpipe_match_value = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->dpipe_match_value) + return ynl_error_parse(yarg, "attribute already present (dl-dpipe-entry-match-values.dpipe-match-value)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_DPIPE_MATCH_VALUE) { + n_dpipe_match_value++; + } + } + + if (n_dpipe_match_value) { + dst->dpipe_match_value = calloc(n_dpipe_match_value, sizeof(*dst->dpipe_match_value)); + dst->n_dpipe_match_value = n_dpipe_match_value; + i = 0; + parg.rsp_policy = &devlink_dl_dpipe_match_value_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_MATCH_VALUE) { + parg.data = &dst->dpipe_match_value[i]; + if (devlink_dl_dpipe_match_value_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void +devlink_dl_dpipe_entry_action_values_free(struct devlink_dl_dpipe_entry_action_values *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_dpipe_action_value; i++) + devlink_dl_dpipe_action_value_free(&obj->dpipe_action_value[i]); + free(obj->dpipe_action_value); +} + +int devlink_dl_dpipe_entry_action_values_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dpipe_entry_action_values *dst = yarg->data; + unsigned int n_dpipe_action_value = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->dpipe_action_value) + return ynl_error_parse(yarg, "attribute already present (dl-dpipe-entry-action-values.dpipe-action-value)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_DPIPE_ACTION_VALUE) { + n_dpipe_action_value++; + } + } + + if (n_dpipe_action_value) { + dst->dpipe_action_value = calloc(n_dpipe_action_value, sizeof(*dst->dpipe_action_value)); + dst->n_dpipe_action_value = n_dpipe_action_value; + i = 0; + parg.rsp_policy = &devlink_dl_dpipe_action_value_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_ACTION_VALUE) { + parg.data = &dst->dpipe_action_value[i]; + if (devlink_dl_dpipe_action_value_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void +devlink_dl_dpipe_header_fields_free(struct devlink_dl_dpipe_header_fields *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_dpipe_field; i++) + devlink_dl_dpipe_field_free(&obj->dpipe_field[i]); + free(obj->dpipe_field); +} + +int devlink_dl_dpipe_header_fields_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dpipe_header_fields *dst = yarg->data; + unsigned int n_dpipe_field = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->dpipe_field) + return ynl_error_parse(yarg, "attribute already present (dl-dpipe-header-fields.dpipe-field)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_DPIPE_FIELD) { + n_dpipe_field++; + } + } + + if (n_dpipe_field) { + dst->dpipe_field = calloc(n_dpipe_field, sizeof(*dst->dpipe_field)); + dst->n_dpipe_field = n_dpipe_field; + i = 0; + parg.rsp_policy = &devlink_dl_dpipe_field_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_FIELD) { + parg.data = &dst->dpipe_field[i]; + if (devlink_dl_dpipe_field_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void devlink_dl_resource_list_free(struct devlink_dl_resource_list *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_resource; i++) + devlink_dl_resource_free(&obj->resource[i]); + free(obj->resource); +} + +int devlink_dl_resource_list_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_resource_list *dst = yarg->data; + unsigned int n_resource = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->resource) + return ynl_error_parse(yarg, "attribute already present (dl-resource-list.resource)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_RESOURCE) { + n_resource++; + } + } + + if (n_resource) { + dst->resource = calloc(n_resource, sizeof(*dst->resource)); + dst->n_resource = n_resource; + i = 0; + parg.rsp_policy = &devlink_dl_resource_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RESOURCE) { + parg.data = &dst->resource[i]; + if (devlink_dl_resource_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void devlink_dl_reload_act_info_free(struct devlink_dl_reload_act_info *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_reload_action_stats; i++) + devlink_dl_reload_act_stats_free(&obj->reload_action_stats[i]); + free(obj->reload_action_stats); +} + +int devlink_dl_reload_act_info_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_reload_act_info *dst = yarg->data; + unsigned int n_reload_action_stats = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->reload_action_stats) + return ynl_error_parse(yarg, "attribute already present (dl-reload-act-info.reload-action-stats)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_RELOAD_ACTION) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.reload_action = 1; + dst->reload_action = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_RELOAD_ACTION_STATS) { + n_reload_action_stats++; + } + } + + if (n_reload_action_stats) { + dst->reload_action_stats = calloc(n_reload_action_stats, sizeof(*dst->reload_action_stats)); + dst->n_reload_action_stats = n_reload_action_stats; + i = 0; + parg.rsp_policy = &devlink_dl_reload_act_stats_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_ACTION_STATS) { + parg.data = &dst->reload_action_stats[i]; + if (devlink_dl_reload_act_stats_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void devlink_dl_dpipe_table_free(struct devlink_dl_dpipe_table *obj) +{ + free(obj->dpipe_table_name); + devlink_dl_dpipe_table_matches_free(&obj->dpipe_table_matches); + devlink_dl_dpipe_table_actions_free(&obj->dpipe_table_actions); +} + +int devlink_dl_dpipe_table_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dpipe_table *dst = yarg->data; + const struct nlattr *attr; + struct ynl_parse_arg parg; + + parg.ys = yarg->ys; + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_DPIPE_TABLE_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dpipe_table_name_len = len; + dst->dpipe_table_name = malloc(len + 1); + memcpy(dst->dpipe_table_name, mnl_attr_get_str(attr), len); + dst->dpipe_table_name[len] = 0; + } else if (type == DEVLINK_ATTR_DPIPE_TABLE_SIZE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_table_size = 1; + dst->dpipe_table_size = mnl_attr_get_u64(attr); + } else if (type == DEVLINK_ATTR_DPIPE_TABLE_MATCHES) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_table_matches = 1; + + parg.rsp_policy = &devlink_dl_dpipe_table_matches_nest; + parg.data = &dst->dpipe_table_matches; + if (devlink_dl_dpipe_table_matches_parse(&parg, attr)) + return MNL_CB_ERROR; + } else if (type == DEVLINK_ATTR_DPIPE_TABLE_ACTIONS) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_table_actions = 1; + + parg.rsp_policy = &devlink_dl_dpipe_table_actions_nest; + parg.data = &dst->dpipe_table_actions; + if (devlink_dl_dpipe_table_actions_parse(&parg, attr)) + return MNL_CB_ERROR; + } else if (type == DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_table_counters_enabled = 1; + dst->dpipe_table_counters_enabled = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_table_resource_id = 1; + dst->dpipe_table_resource_id = mnl_attr_get_u64(attr); + } else if (type == DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_table_resource_units = 1; + dst->dpipe_table_resource_units = mnl_attr_get_u64(attr); + } + } + + return 0; +} + +void devlink_dl_dpipe_entry_free(struct devlink_dl_dpipe_entry *obj) +{ + devlink_dl_dpipe_entry_match_values_free(&obj->dpipe_entry_match_values); + devlink_dl_dpipe_entry_action_values_free(&obj->dpipe_entry_action_values); +} + +int devlink_dl_dpipe_entry_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dpipe_entry *dst = yarg->data; + const struct nlattr *attr; + struct ynl_parse_arg parg; + + parg.ys = yarg->ys; + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_DPIPE_ENTRY_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_entry_index = 1; + dst->dpipe_entry_index = mnl_attr_get_u64(attr); + } else if (type == DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_entry_match_values = 1; + + parg.rsp_policy = &devlink_dl_dpipe_entry_match_values_nest; + parg.data = &dst->dpipe_entry_match_values; + if (devlink_dl_dpipe_entry_match_values_parse(&parg, attr)) + return MNL_CB_ERROR; + } else if (type == DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_entry_action_values = 1; + + parg.rsp_policy = &devlink_dl_dpipe_entry_action_values_nest; + parg.data = &dst->dpipe_entry_action_values; + if (devlink_dl_dpipe_entry_action_values_parse(&parg, attr)) + return MNL_CB_ERROR; + } else if (type == DEVLINK_ATTR_DPIPE_ENTRY_COUNTER) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_entry_counter = 1; + dst->dpipe_entry_counter = mnl_attr_get_u64(attr); + } + } + + return 0; +} + +void devlink_dl_dpipe_header_free(struct devlink_dl_dpipe_header *obj) +{ + free(obj->dpipe_header_name); + devlink_dl_dpipe_header_fields_free(&obj->dpipe_header_fields); +} + +int devlink_dl_dpipe_header_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dpipe_header *dst = yarg->data; + const struct nlattr *attr; + struct ynl_parse_arg parg; + + parg.ys = yarg->ys; + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_DPIPE_HEADER_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dpipe_header_name_len = len; + dst->dpipe_header_name = malloc(len + 1); + memcpy(dst->dpipe_header_name, mnl_attr_get_str(attr), len); + dst->dpipe_header_name[len] = 0; + } else if (type == DEVLINK_ATTR_DPIPE_HEADER_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_header_id = 1; + dst->dpipe_header_id = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_DPIPE_HEADER_GLOBAL) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_header_global = 1; + dst->dpipe_header_global = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_DPIPE_HEADER_FIELDS) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_header_fields = 1; + + parg.rsp_policy = &devlink_dl_dpipe_header_fields_nest; + parg.data = &dst->dpipe_header_fields; + if (devlink_dl_dpipe_header_fields_parse(&parg, attr)) + return MNL_CB_ERROR; + } + } + + return 0; +} + +void devlink_dl_reload_stats_free(struct devlink_dl_reload_stats *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_reload_action_info; i++) + devlink_dl_reload_act_info_free(&obj->reload_action_info[i]); + free(obj->reload_action_info); +} + +int devlink_dl_reload_stats_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_reload_stats *dst = yarg->data; + unsigned int n_reload_action_info = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->reload_action_info) + return ynl_error_parse(yarg, "attribute already present (dl-reload-stats.reload-action-info)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_RELOAD_ACTION_INFO) { + n_reload_action_info++; + } + } + + if (n_reload_action_info) { + dst->reload_action_info = calloc(n_reload_action_info, sizeof(*dst->reload_action_info)); + dst->n_reload_action_info = n_reload_action_info; + i = 0; + parg.rsp_policy = &devlink_dl_reload_act_info_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_ACTION_INFO) { + parg.data = &dst->reload_action_info[i]; + if (devlink_dl_reload_act_info_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void devlink_dl_dpipe_tables_free(struct devlink_dl_dpipe_tables *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_dpipe_table; i++) + devlink_dl_dpipe_table_free(&obj->dpipe_table[i]); + free(obj->dpipe_table); +} + +int devlink_dl_dpipe_tables_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dpipe_tables *dst = yarg->data; + unsigned int n_dpipe_table = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->dpipe_table) + return ynl_error_parse(yarg, "attribute already present (dl-dpipe-tables.dpipe-table)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_DPIPE_TABLE) { + n_dpipe_table++; + } + } + + if (n_dpipe_table) { + dst->dpipe_table = calloc(n_dpipe_table, sizeof(*dst->dpipe_table)); + dst->n_dpipe_table = n_dpipe_table; + i = 0; + parg.rsp_policy = &devlink_dl_dpipe_table_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_TABLE) { + parg.data = &dst->dpipe_table[i]; + if (devlink_dl_dpipe_table_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void devlink_dl_dpipe_entries_free(struct devlink_dl_dpipe_entries *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_dpipe_entry; i++) + devlink_dl_dpipe_entry_free(&obj->dpipe_entry[i]); + free(obj->dpipe_entry); +} + +int devlink_dl_dpipe_entries_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dpipe_entries *dst = yarg->data; + unsigned int n_dpipe_entry = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->dpipe_entry) + return ynl_error_parse(yarg, "attribute already present (dl-dpipe-entries.dpipe-entry)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_DPIPE_ENTRY) { + n_dpipe_entry++; + } + } + + if (n_dpipe_entry) { + dst->dpipe_entry = calloc(n_dpipe_entry, sizeof(*dst->dpipe_entry)); + dst->n_dpipe_entry = n_dpipe_entry; + i = 0; + parg.rsp_policy = &devlink_dl_dpipe_entry_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_ENTRY) { + parg.data = &dst->dpipe_entry[i]; + if (devlink_dl_dpipe_entry_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void devlink_dl_dpipe_headers_free(struct devlink_dl_dpipe_headers *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_dpipe_header; i++) + devlink_dl_dpipe_header_free(&obj->dpipe_header[i]); + free(obj->dpipe_header); +} + +int devlink_dl_dpipe_headers_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dpipe_headers *dst = yarg->data; + unsigned int n_dpipe_header = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->dpipe_header) + return ynl_error_parse(yarg, "attribute already present (dl-dpipe-headers.dpipe-header)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_DPIPE_HEADER) { + n_dpipe_header++; + } + } + + if (n_dpipe_header) { + dst->dpipe_header = calloc(n_dpipe_header, sizeof(*dst->dpipe_header)); + dst->n_dpipe_header = n_dpipe_header; + i = 0; + parg.rsp_policy = &devlink_dl_dpipe_header_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_HEADER) { + parg.data = &dst->dpipe_header[i]; + if (devlink_dl_dpipe_header_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void devlink_dl_dev_stats_free(struct devlink_dl_dev_stats *obj) +{ + devlink_dl_reload_stats_free(&obj->reload_stats); + devlink_dl_reload_stats_free(&obj->remote_reload_stats); +} + +int devlink_dl_dev_stats_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dev_stats *dst = yarg->data; + const struct nlattr *attr; + struct ynl_parse_arg parg; + + parg.ys = yarg->ys; + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_RELOAD_STATS) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.reload_stats = 1; + + parg.rsp_policy = &devlink_dl_reload_stats_nest; + parg.data = &dst->reload_stats; + if (devlink_dl_reload_stats_parse(&parg, attr)) + return MNL_CB_ERROR; + } else if (type == DEVLINK_ATTR_REMOTE_RELOAD_STATS) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.remote_reload_stats = 1; + + parg.rsp_policy = &devlink_dl_reload_stats_nest; + parg.data = &dst->remote_reload_stats; + if (devlink_dl_reload_stats_parse(&parg, attr)) + return MNL_CB_ERROR; + } + } + + return 0; +} + +/* ============== DEVLINK_CMD_GET ============== */ +/* DEVLINK_CMD_GET - do */ +void devlink_get_req_free(struct devlink_get_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +void devlink_get_rsp_free(struct devlink_get_rsp *rsp) +{ + free(rsp->bus_name); + free(rsp->dev_name); + devlink_dl_dev_stats_free(&rsp->dev_stats); + free(rsp); +} + +int devlink_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + struct devlink_get_rsp *dst; + const struct nlattr *attr; + struct ynl_parse_arg parg; + + dst = yarg->data; + parg.ys = yarg->ys; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_BUS_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.bus_name_len = len; + dst->bus_name = malloc(len + 1); + memcpy(dst->bus_name, mnl_attr_get_str(attr), len); + dst->bus_name[len] = 0; + } else if (type == DEVLINK_ATTR_DEV_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dev_name_len = len; + dst->dev_name = malloc(len + 1); + memcpy(dst->dev_name, mnl_attr_get_str(attr), len); + dst->dev_name[len] = 0; + } else if (type == DEVLINK_ATTR_RELOAD_FAILED) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.reload_failed = 1; + dst->reload_failed = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_DEV_STATS) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dev_stats = 1; + + parg.rsp_policy = &devlink_dl_dev_stats_nest; + parg.data = &dst->dev_stats; + if (devlink_dl_dev_stats_parse(&parg, attr)) + return MNL_CB_ERROR; + } + } + + return MNL_CB_OK; +} + +struct devlink_get_rsp * +devlink_get(struct ynl_sock *ys, struct devlink_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct devlink_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_GET, 1); + ys->req_policy = &devlink_nest; + yrs.yarg.rsp_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = devlink_get_rsp_parse; + yrs.rsp_cmd = 3; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + devlink_get_rsp_free(rsp); + return NULL; +} + +/* DEVLINK_CMD_GET - dump */ +void devlink_get_list_free(struct devlink_get_list *rsp) +{ + struct devlink_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp->obj.bus_name); + free(rsp->obj.dev_name); + devlink_dl_dev_stats_free(&rsp->obj.dev_stats); + free(rsp); + } +} + +struct devlink_get_list *devlink_get_dump(struct ynl_sock *ys) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct devlink_get_list); + yds.cb = devlink_get_rsp_parse; + yds.rsp_cmd = 3; + yds.rsp_policy = &devlink_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_GET, 1); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + devlink_get_list_free(yds.first); + return NULL; +} + +/* ============== DEVLINK_CMD_PORT_GET ============== */ +/* DEVLINK_CMD_PORT_GET - do */ +void devlink_port_get_req_free(struct devlink_port_get_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +void devlink_port_get_rsp_free(struct devlink_port_get_rsp *rsp) +{ + free(rsp->bus_name); + free(rsp->dev_name); + free(rsp); +} + +int devlink_port_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + struct devlink_port_get_rsp *dst; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_BUS_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.bus_name_len = len; + dst->bus_name = malloc(len + 1); + memcpy(dst->bus_name, mnl_attr_get_str(attr), len); + dst->bus_name[len] = 0; + } else if (type == DEVLINK_ATTR_DEV_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dev_name_len = len; + dst->dev_name = malloc(len + 1); + memcpy(dst->dev_name, mnl_attr_get_str(attr), len); + dst->dev_name[len] = 0; + } else if (type == DEVLINK_ATTR_PORT_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.port_index = 1; + dst->port_index = mnl_attr_get_u32(attr); + } + } + + return MNL_CB_OK; +} + +struct devlink_port_get_rsp * +devlink_port_get(struct ynl_sock *ys, struct devlink_port_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct devlink_port_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_GET, 1); + ys->req_policy = &devlink_nest; + yrs.yarg.rsp_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = devlink_port_get_rsp_parse; + yrs.rsp_cmd = 7; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + devlink_port_get_rsp_free(rsp); + return NULL; +} + +/* DEVLINK_CMD_PORT_GET - dump */ +int devlink_port_get_rsp_dump_parse(const struct nlmsghdr *nlh, void *data) +{ + struct devlink_port_get_rsp_dump *dst; + struct ynl_parse_arg *yarg = data; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_BUS_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.bus_name_len = len; + dst->bus_name = malloc(len + 1); + memcpy(dst->bus_name, mnl_attr_get_str(attr), len); + dst->bus_name[len] = 0; + } else if (type == DEVLINK_ATTR_DEV_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dev_name_len = len; + dst->dev_name = malloc(len + 1); + memcpy(dst->dev_name, mnl_attr_get_str(attr), len); + dst->dev_name[len] = 0; + } else if (type == DEVLINK_ATTR_PORT_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.port_index = 1; + dst->port_index = mnl_attr_get_u32(attr); + } + } + + return MNL_CB_OK; +} + +void devlink_port_get_rsp_list_free(struct devlink_port_get_rsp_list *rsp) +{ + struct devlink_port_get_rsp_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp->obj.bus_name); + free(rsp->obj.dev_name); + free(rsp); + } +} + +struct devlink_port_get_rsp_list * +devlink_port_get_dump(struct ynl_sock *ys, + struct devlink_port_get_req_dump *req) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct devlink_port_get_rsp_list); + yds.cb = devlink_port_get_rsp_dump_parse; + yds.rsp_cmd = 7; + yds.rsp_policy = &devlink_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_PORT_GET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + devlink_port_get_rsp_list_free(yds.first); + return NULL; +} + +/* ============== DEVLINK_CMD_PORT_SET ============== */ +/* DEVLINK_CMD_PORT_SET - do */ +void devlink_port_set_req_free(struct devlink_port_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + devlink_dl_port_function_free(&req->port_function); + free(req); +} + +int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.port_type) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_PORT_TYPE, req->port_type); + if (req->_present.port_function) + devlink_dl_port_function_put(nlh, DEVLINK_ATTR_PORT_FUNCTION, &req->port_function); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_PORT_NEW ============== */ +/* DEVLINK_CMD_PORT_NEW - do */ +void devlink_port_new_req_free(struct devlink_port_new_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +void devlink_port_new_rsp_free(struct devlink_port_new_rsp *rsp) +{ + free(rsp->bus_name); + free(rsp->dev_name); + free(rsp); +} + +int devlink_port_new_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + struct devlink_port_new_rsp *dst; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_BUS_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.bus_name_len = len; + dst->bus_name = malloc(len + 1); + memcpy(dst->bus_name, mnl_attr_get_str(attr), len); + dst->bus_name[len] = 0; + } else if (type == DEVLINK_ATTR_DEV_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dev_name_len = len; + dst->dev_name = malloc(len + 1); + memcpy(dst->dev_name, mnl_attr_get_str(attr), len); + dst->dev_name[len] = 0; + } else if (type == DEVLINK_ATTR_PORT_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.port_index = 1; + dst->port_index = mnl_attr_get_u32(attr); + } + } + + return MNL_CB_OK; +} + +struct devlink_port_new_rsp * +devlink_port_new(struct ynl_sock *ys, struct devlink_port_new_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct devlink_port_new_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_NEW, 1); + ys->req_policy = &devlink_nest; + yrs.yarg.rsp_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.port_flavour) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_PORT_FLAVOUR, req->port_flavour); + if (req->_present.port_pci_pf_number) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, req->port_pci_pf_number); + if (req->_present.port_pci_sf_number) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_PCI_SF_NUMBER, req->port_pci_sf_number); + if (req->_present.port_controller_number) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, req->port_controller_number); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = devlink_port_new_rsp_parse; + yrs.rsp_cmd = DEVLINK_CMD_PORT_NEW; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + devlink_port_new_rsp_free(rsp); + return NULL; +} + +/* ============== DEVLINK_CMD_PORT_DEL ============== */ +/* DEVLINK_CMD_PORT_DEL - do */ +void devlink_port_del_req_free(struct devlink_port_del_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_DEL, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_PORT_SPLIT ============== */ +/* DEVLINK_CMD_PORT_SPLIT - do */ +void devlink_port_split_req_free(struct devlink_port_split_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_SPLIT, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.port_split_count) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_SPLIT_COUNT, req->port_split_count); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_PORT_UNSPLIT ============== */ +/* DEVLINK_CMD_PORT_UNSPLIT - do */ +void devlink_port_unsplit_req_free(struct devlink_port_unsplit_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +int devlink_port_unsplit(struct ynl_sock *ys, + struct devlink_port_unsplit_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_UNSPLIT, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_SB_GET ============== */ +/* DEVLINK_CMD_SB_GET - do */ +void devlink_sb_get_req_free(struct devlink_sb_get_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +void devlink_sb_get_rsp_free(struct devlink_sb_get_rsp *rsp) +{ + free(rsp->bus_name); + free(rsp->dev_name); + free(rsp); +} + +int devlink_sb_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + struct devlink_sb_get_rsp *dst; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_BUS_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.bus_name_len = len; + dst->bus_name = malloc(len + 1); + memcpy(dst->bus_name, mnl_attr_get_str(attr), len); + dst->bus_name[len] = 0; + } else if (type == DEVLINK_ATTR_DEV_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dev_name_len = len; + dst->dev_name = malloc(len + 1); + memcpy(dst->dev_name, mnl_attr_get_str(attr), len); + dst->dev_name[len] = 0; + } else if (type == DEVLINK_ATTR_SB_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.sb_index = 1; + dst->sb_index = mnl_attr_get_u32(attr); + } + } + + return MNL_CB_OK; +} + +struct devlink_sb_get_rsp * +devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct devlink_sb_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_GET, 1); + ys->req_policy = &devlink_nest; + yrs.yarg.rsp_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.sb_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = devlink_sb_get_rsp_parse; + yrs.rsp_cmd = 13; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + devlink_sb_get_rsp_free(rsp); + return NULL; +} + +/* DEVLINK_CMD_SB_GET - dump */ +void devlink_sb_get_list_free(struct devlink_sb_get_list *rsp) +{ + struct devlink_sb_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp->obj.bus_name); + free(rsp->obj.dev_name); + free(rsp); + } +} + +struct devlink_sb_get_list * +devlink_sb_get_dump(struct ynl_sock *ys, struct devlink_sb_get_req_dump *req) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct devlink_sb_get_list); + yds.cb = devlink_sb_get_rsp_parse; + yds.rsp_cmd = 13; + yds.rsp_policy = &devlink_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_GET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + devlink_sb_get_list_free(yds.first); + return NULL; +} + +/* ============== DEVLINK_CMD_SB_POOL_GET ============== */ +/* DEVLINK_CMD_SB_POOL_GET - do */ +void devlink_sb_pool_get_req_free(struct devlink_sb_pool_get_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +void devlink_sb_pool_get_rsp_free(struct devlink_sb_pool_get_rsp *rsp) +{ + free(rsp->bus_name); + free(rsp->dev_name); + free(rsp); +} + +int devlink_sb_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct devlink_sb_pool_get_rsp *dst; + struct ynl_parse_arg *yarg = data; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_BUS_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.bus_name_len = len; + dst->bus_name = malloc(len + 1); + memcpy(dst->bus_name, mnl_attr_get_str(attr), len); + dst->bus_name[len] = 0; + } else if (type == DEVLINK_ATTR_DEV_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dev_name_len = len; + dst->dev_name = malloc(len + 1); + memcpy(dst->dev_name, mnl_attr_get_str(attr), len); + dst->dev_name[len] = 0; + } else if (type == DEVLINK_ATTR_SB_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.sb_index = 1; + dst->sb_index = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_SB_POOL_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.sb_pool_index = 1; + dst->sb_pool_index = mnl_attr_get_u16(attr); + } + } + + return MNL_CB_OK; +} + +struct devlink_sb_pool_get_rsp * +devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct devlink_sb_pool_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_POOL_GET, 1); + ys->req_policy = &devlink_nest; + yrs.yarg.rsp_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.sb_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); + if (req->_present.sb_pool_index) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = devlink_sb_pool_get_rsp_parse; + yrs.rsp_cmd = 17; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + devlink_sb_pool_get_rsp_free(rsp); + return NULL; +} + +/* DEVLINK_CMD_SB_POOL_GET - dump */ +void devlink_sb_pool_get_list_free(struct devlink_sb_pool_get_list *rsp) +{ + struct devlink_sb_pool_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp->obj.bus_name); + free(rsp->obj.dev_name); + free(rsp); + } +} + +struct devlink_sb_pool_get_list * +devlink_sb_pool_get_dump(struct ynl_sock *ys, + struct devlink_sb_pool_get_req_dump *req) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct devlink_sb_pool_get_list); + yds.cb = devlink_sb_pool_get_rsp_parse; + yds.rsp_cmd = 17; + yds.rsp_policy = &devlink_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_POOL_GET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + devlink_sb_pool_get_list_free(yds.first); + return NULL; +} + +/* ============== DEVLINK_CMD_SB_POOL_SET ============== */ +/* DEVLINK_CMD_SB_POOL_SET - do */ +void devlink_sb_pool_set_req_free(struct devlink_sb_pool_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +int devlink_sb_pool_set(struct ynl_sock *ys, + struct devlink_sb_pool_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_POOL_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.sb_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); + if (req->_present.sb_pool_index) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index); + if (req->_present.sb_pool_threshold_type) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, req->sb_pool_threshold_type); + if (req->_present.sb_pool_size) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_POOL_SIZE, req->sb_pool_size); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_SB_PORT_POOL_GET ============== */ +/* DEVLINK_CMD_SB_PORT_POOL_GET - do */ +void +devlink_sb_port_pool_get_req_free(struct devlink_sb_port_pool_get_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +void +devlink_sb_port_pool_get_rsp_free(struct devlink_sb_port_pool_get_rsp *rsp) +{ + free(rsp->bus_name); + free(rsp->dev_name); + free(rsp); +} + +int devlink_sb_port_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct devlink_sb_port_pool_get_rsp *dst; + struct ynl_parse_arg *yarg = data; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_BUS_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.bus_name_len = len; + dst->bus_name = malloc(len + 1); + memcpy(dst->bus_name, mnl_attr_get_str(attr), len); + dst->bus_name[len] = 0; + } else if (type == DEVLINK_ATTR_DEV_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dev_name_len = len; + dst->dev_name = malloc(len + 1); + memcpy(dst->dev_name, mnl_attr_get_str(attr), len); + dst->dev_name[len] = 0; + } else if (type == DEVLINK_ATTR_PORT_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.port_index = 1; + dst->port_index = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_SB_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.sb_index = 1; + dst->sb_index = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_SB_POOL_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.sb_pool_index = 1; + dst->sb_pool_index = mnl_attr_get_u16(attr); + } + } + + return MNL_CB_OK; +} + +struct devlink_sb_port_pool_get_rsp * +devlink_sb_port_pool_get(struct ynl_sock *ys, + struct devlink_sb_port_pool_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct devlink_sb_port_pool_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_GET, 1); + ys->req_policy = &devlink_nest; + yrs.yarg.rsp_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.sb_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); + if (req->_present.sb_pool_index) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = devlink_sb_port_pool_get_rsp_parse; + yrs.rsp_cmd = 21; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + devlink_sb_port_pool_get_rsp_free(rsp); + return NULL; +} + +/* DEVLINK_CMD_SB_PORT_POOL_GET - dump */ +void +devlink_sb_port_pool_get_list_free(struct devlink_sb_port_pool_get_list *rsp) +{ + struct devlink_sb_port_pool_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp->obj.bus_name); + free(rsp->obj.dev_name); + free(rsp); + } +} + +struct devlink_sb_port_pool_get_list * +devlink_sb_port_pool_get_dump(struct ynl_sock *ys, + struct devlink_sb_port_pool_get_req_dump *req) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct devlink_sb_port_pool_get_list); + yds.cb = devlink_sb_port_pool_get_rsp_parse; + yds.rsp_cmd = 21; + yds.rsp_policy = &devlink_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_GET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + devlink_sb_port_pool_get_list_free(yds.first); + return NULL; +} + +/* ============== DEVLINK_CMD_SB_PORT_POOL_SET ============== */ +/* DEVLINK_CMD_SB_PORT_POOL_SET - do */ +void +devlink_sb_port_pool_set_req_free(struct devlink_sb_port_pool_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +int devlink_sb_port_pool_set(struct ynl_sock *ys, + struct devlink_sb_port_pool_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.sb_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); + if (req->_present.sb_pool_index) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index); + if (req->_present.sb_threshold) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_GET ============== */ +/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */ +void +devlink_sb_tc_pool_bind_get_req_free(struct devlink_sb_tc_pool_bind_get_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +void +devlink_sb_tc_pool_bind_get_rsp_free(struct devlink_sb_tc_pool_bind_get_rsp *rsp) +{ + free(rsp->bus_name); + free(rsp->dev_name); + free(rsp); +} + +int devlink_sb_tc_pool_bind_get_rsp_parse(const struct nlmsghdr *nlh, + void *data) +{ + struct devlink_sb_tc_pool_bind_get_rsp *dst; + struct ynl_parse_arg *yarg = data; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_BUS_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.bus_name_len = len; + dst->bus_name = malloc(len + 1); + memcpy(dst->bus_name, mnl_attr_get_str(attr), len); + dst->bus_name[len] = 0; + } else if (type == DEVLINK_ATTR_DEV_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dev_name_len = len; + dst->dev_name = malloc(len + 1); + memcpy(dst->dev_name, mnl_attr_get_str(attr), len); + dst->dev_name[len] = 0; + } else if (type == DEVLINK_ATTR_PORT_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.port_index = 1; + dst->port_index = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_SB_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.sb_index = 1; + dst->sb_index = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_SB_POOL_TYPE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.sb_pool_type = 1; + dst->sb_pool_type = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_SB_TC_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.sb_tc_index = 1; + dst->sb_tc_index = mnl_attr_get_u16(attr); + } + } + + return MNL_CB_OK; +} + +struct devlink_sb_tc_pool_bind_get_rsp * +devlink_sb_tc_pool_bind_get(struct ynl_sock *ys, + struct devlink_sb_tc_pool_bind_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct devlink_sb_tc_pool_bind_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_GET, 1); + ys->req_policy = &devlink_nest; + yrs.yarg.rsp_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.sb_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); + if (req->_present.sb_pool_type) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_SB_POOL_TYPE, req->sb_pool_type); + if (req->_present.sb_tc_index) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_TC_INDEX, req->sb_tc_index); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = devlink_sb_tc_pool_bind_get_rsp_parse; + yrs.rsp_cmd = 25; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + devlink_sb_tc_pool_bind_get_rsp_free(rsp); + return NULL; +} + +/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */ +void +devlink_sb_tc_pool_bind_get_list_free(struct devlink_sb_tc_pool_bind_get_list *rsp) +{ + struct devlink_sb_tc_pool_bind_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp->obj.bus_name); + free(rsp->obj.dev_name); + free(rsp); + } +} + +struct devlink_sb_tc_pool_bind_get_list * +devlink_sb_tc_pool_bind_get_dump(struct ynl_sock *ys, + struct devlink_sb_tc_pool_bind_get_req_dump *req) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct devlink_sb_tc_pool_bind_get_list); + yds.cb = devlink_sb_tc_pool_bind_get_rsp_parse; + yds.rsp_cmd = 25; + yds.rsp_policy = &devlink_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_GET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + devlink_sb_tc_pool_bind_get_list_free(yds.first); + return NULL; +} + +/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_SET ============== */ +/* DEVLINK_CMD_SB_TC_POOL_BIND_SET - do */ +void +devlink_sb_tc_pool_bind_set_req_free(struct devlink_sb_tc_pool_bind_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys, + struct devlink_sb_tc_pool_bind_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.sb_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); + if (req->_present.sb_pool_index) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index); + if (req->_present.sb_pool_type) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_SB_POOL_TYPE, req->sb_pool_type); + if (req->_present.sb_tc_index) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_TC_INDEX, req->sb_tc_index); + if (req->_present.sb_threshold) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_SB_OCC_SNAPSHOT ============== */ +/* DEVLINK_CMD_SB_OCC_SNAPSHOT - do */ +void devlink_sb_occ_snapshot_req_free(struct devlink_sb_occ_snapshot_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +int devlink_sb_occ_snapshot(struct ynl_sock *ys, + struct devlink_sb_occ_snapshot_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_OCC_SNAPSHOT, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.sb_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_SB_OCC_MAX_CLEAR ============== */ +/* DEVLINK_CMD_SB_OCC_MAX_CLEAR - do */ +void +devlink_sb_occ_max_clear_req_free(struct devlink_sb_occ_max_clear_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +int devlink_sb_occ_max_clear(struct ynl_sock *ys, + struct devlink_sb_occ_max_clear_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_OCC_MAX_CLEAR, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.sb_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_ESWITCH_GET ============== */ +/* DEVLINK_CMD_ESWITCH_GET - do */ +void devlink_eswitch_get_req_free(struct devlink_eswitch_get_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +void devlink_eswitch_get_rsp_free(struct devlink_eswitch_get_rsp *rsp) +{ + free(rsp->bus_name); + free(rsp->dev_name); + free(rsp); +} + +int devlink_eswitch_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct devlink_eswitch_get_rsp *dst; + struct ynl_parse_arg *yarg = data; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_BUS_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.bus_name_len = len; + dst->bus_name = malloc(len + 1); + memcpy(dst->bus_name, mnl_attr_get_str(attr), len); + dst->bus_name[len] = 0; + } else if (type == DEVLINK_ATTR_DEV_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dev_name_len = len; + dst->dev_name = malloc(len + 1); + memcpy(dst->dev_name, mnl_attr_get_str(attr), len); + dst->dev_name[len] = 0; + } else if (type == DEVLINK_ATTR_ESWITCH_MODE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.eswitch_mode = 1; + dst->eswitch_mode = mnl_attr_get_u16(attr); + } else if (type == DEVLINK_ATTR_ESWITCH_INLINE_MODE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.eswitch_inline_mode = 1; + dst->eswitch_inline_mode = mnl_attr_get_u16(attr); + } else if (type == DEVLINK_ATTR_ESWITCH_ENCAP_MODE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.eswitch_encap_mode = 1; + dst->eswitch_encap_mode = mnl_attr_get_u8(attr); } } - return 0; + return MNL_CB_OK; } -void devlink_dl_dev_stats_free(struct devlink_dl_dev_stats *obj) +struct devlink_eswitch_get_rsp * +devlink_eswitch_get(struct ynl_sock *ys, struct devlink_eswitch_get_req *req) { - devlink_dl_reload_stats_free(&obj->reload_stats); - devlink_dl_reload_stats_free(&obj->remote_reload_stats); -} + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct devlink_eswitch_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; -int devlink_dl_dev_stats_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dev_stats *dst = yarg->data; - const struct nlattr *attr; - struct ynl_parse_arg parg; + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_ESWITCH_GET, 1); + ys->req_policy = &devlink_nest; + yrs.yarg.rsp_policy = &devlink_nest; - parg.ys = yarg->ys; + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = devlink_eswitch_get_rsp_parse; + yrs.rsp_cmd = DEVLINK_CMD_ESWITCH_GET; - if (type == DEVLINK_ATTR_RELOAD_STATS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.reload_stats = 1; + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; - parg.rsp_policy = &devlink_dl_reload_stats_nest; - parg.data = &dst->reload_stats; - if (devlink_dl_reload_stats_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == DEVLINK_ATTR_REMOTE_RELOAD_STATS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.remote_reload_stats = 1; + return rsp; - parg.rsp_policy = &devlink_dl_reload_stats_nest; - parg.data = &dst->remote_reload_stats; - if (devlink_dl_reload_stats_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } +err_free: + devlink_eswitch_get_rsp_free(rsp); + return NULL; +} + +/* ============== DEVLINK_CMD_ESWITCH_SET ============== */ +/* DEVLINK_CMD_ESWITCH_SET - do */ +void devlink_eswitch_set_req_free(struct devlink_eswitch_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +int devlink_eswitch_set(struct ynl_sock *ys, + struct devlink_eswitch_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_ESWITCH_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.eswitch_mode) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_ESWITCH_MODE, req->eswitch_mode); + if (req->_present.eswitch_inline_mode) + mnl_attr_put_u16(nlh, DEVLINK_ATTR_ESWITCH_INLINE_MODE, req->eswitch_inline_mode); + if (req->_present.eswitch_encap_mode) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, req->eswitch_encap_mode); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; return 0; } -/* ============== DEVLINK_CMD_GET ============== */ -/* DEVLINK_CMD_GET - do */ -void devlink_get_req_free(struct devlink_get_req *req) +/* ============== DEVLINK_CMD_DPIPE_TABLE_GET ============== */ +/* DEVLINK_CMD_DPIPE_TABLE_GET - do */ +void devlink_dpipe_table_get_req_free(struct devlink_dpipe_table_get_req *req) { free(req->bus_name); free(req->dev_name); + free(req->dpipe_table_name); free(req); } -void devlink_get_rsp_free(struct devlink_get_rsp *rsp) +void devlink_dpipe_table_get_rsp_free(struct devlink_dpipe_table_get_rsp *rsp) { free(rsp->bus_name); free(rsp->dev_name); - devlink_dl_dev_stats_free(&rsp->dev_stats); + devlink_dl_dpipe_tables_free(&rsp->dpipe_tables); free(rsp); } -int devlink_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +int devlink_dpipe_table_get_rsp_parse(const struct nlmsghdr *nlh, void *data) { + struct devlink_dpipe_table_get_rsp *dst; struct ynl_parse_arg *yarg = data; - struct devlink_get_rsp *dst; const struct nlattr *attr; struct ynl_parse_arg parg; @@ -470,19 +3619,14 @@ int devlink_get_rsp_parse(const struct nlmsghdr *nlh, void *data) dst->dev_name = malloc(len + 1); memcpy(dst->dev_name, mnl_attr_get_str(attr), len); dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_RELOAD_FAILED) { + } else if (type == DEVLINK_ATTR_DPIPE_TABLES) { if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - dst->_present.reload_failed = 1; - dst->reload_failed = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_DEV_STATS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dev_stats = 1; + dst->_present.dpipe_tables = 1; - parg.rsp_policy = &devlink_dl_dev_stats_nest; - parg.data = &dst->dev_stats; - if (devlink_dl_dev_stats_parse(&parg, attr)) + parg.rsp_policy = &devlink_dl_dpipe_tables_nest; + parg.data = &dst->dpipe_tables; + if (devlink_dl_dpipe_tables_parse(&parg, attr)) return MNL_CB_ERROR; } } @@ -490,15 +3634,16 @@ int devlink_get_rsp_parse(const struct nlmsghdr *nlh, void *data) return MNL_CB_OK; } -struct devlink_get_rsp * -devlink_get(struct ynl_sock *ys, struct devlink_get_req *req) +struct devlink_dpipe_table_get_rsp * +devlink_dpipe_table_get(struct ynl_sock *ys, + struct devlink_dpipe_table_get_req *req) { struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_get_rsp *rsp; + struct devlink_dpipe_table_get_rsp *rsp; struct nlmsghdr *nlh; int err; - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_GET, 1); + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_DPIPE_TABLE_GET, 1); ys->req_policy = &devlink_nest; yrs.yarg.rsp_policy = &devlink_nest; @@ -506,11 +3651,13 @@ devlink_get(struct ynl_sock *ys, struct devlink_get_req *req) mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); if (req->_present.dev_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.dpipe_table_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DPIPE_TABLE_NAME, req->dpipe_table_name); rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; - yrs.cb = devlink_get_rsp_parse; - yrs.rsp_cmd = 3; + yrs.cb = devlink_dpipe_table_get_rsp_parse; + yrs.rsp_cmd = DEVLINK_CMD_DPIPE_TABLE_GET; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -519,74 +3666,144 @@ devlink_get(struct ynl_sock *ys, struct devlink_get_req *req) return rsp; err_free: - devlink_get_rsp_free(rsp); + devlink_dpipe_table_get_rsp_free(rsp); return NULL; } -/* DEVLINK_CMD_GET - dump */ -void devlink_get_list_free(struct devlink_get_list *rsp) +/* ============== DEVLINK_CMD_DPIPE_ENTRIES_GET ============== */ +/* DEVLINK_CMD_DPIPE_ENTRIES_GET - do */ +void +devlink_dpipe_entries_get_req_free(struct devlink_dpipe_entries_get_req *req) { - struct devlink_get_list *next = rsp; + free(req->bus_name); + free(req->dev_name); + free(req->dpipe_table_name); + free(req); +} - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; +void +devlink_dpipe_entries_get_rsp_free(struct devlink_dpipe_entries_get_rsp *rsp) +{ + free(rsp->bus_name); + free(rsp->dev_name); + devlink_dl_dpipe_entries_free(&rsp->dpipe_entries); + free(rsp); +} - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - devlink_dl_dev_stats_free(&rsp->obj.dev_stats); - free(rsp); +int devlink_dpipe_entries_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct devlink_dpipe_entries_get_rsp *dst; + struct ynl_parse_arg *yarg = data; + const struct nlattr *attr; + struct ynl_parse_arg parg; + + dst = yarg->data; + parg.ys = yarg->ys; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_BUS_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.bus_name_len = len; + dst->bus_name = malloc(len + 1); + memcpy(dst->bus_name, mnl_attr_get_str(attr), len); + dst->bus_name[len] = 0; + } else if (type == DEVLINK_ATTR_DEV_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dev_name_len = len; + dst->dev_name = malloc(len + 1); + memcpy(dst->dev_name, mnl_attr_get_str(attr), len); + dst->dev_name[len] = 0; + } else if (type == DEVLINK_ATTR_DPIPE_ENTRIES) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dpipe_entries = 1; + + parg.rsp_policy = &devlink_dl_dpipe_entries_nest; + parg.data = &dst->dpipe_entries; + if (devlink_dl_dpipe_entries_parse(&parg, attr)) + return MNL_CB_ERROR; + } } + + return MNL_CB_OK; } -struct devlink_get_list *devlink_get_dump(struct ynl_sock *ys) +struct devlink_dpipe_entries_get_rsp * +devlink_dpipe_entries_get(struct ynl_sock *ys, + struct devlink_dpipe_entries_get_req *req) { - struct ynl_dump_state yds = {}; + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct devlink_dpipe_entries_get_rsp *rsp; struct nlmsghdr *nlh; int err; - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_get_list); - yds.cb = devlink_get_rsp_parse; - yds.rsp_cmd = 3; - yds.rsp_policy = &devlink_nest; + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_DPIPE_ENTRIES_GET, 1); + ys->req_policy = &devlink_nest; + yrs.yarg.rsp_policy = &devlink_nest; - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_GET, 1); + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.dpipe_table_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DPIPE_TABLE_NAME, req->dpipe_table_name); - err = ynl_exec_dump(ys, nlh, &yds); + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = devlink_dpipe_entries_get_rsp_parse; + yrs.rsp_cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET; + + err = ynl_exec(ys, nlh, &yrs); if (err < 0) - goto free_list; + goto err_free; - return yds.first; + return rsp; -free_list: - devlink_get_list_free(yds.first); +err_free: + devlink_dpipe_entries_get_rsp_free(rsp); return NULL; } -/* ============== DEVLINK_CMD_PORT_GET ============== */ -/* DEVLINK_CMD_PORT_GET - do */ -void devlink_port_get_req_free(struct devlink_port_get_req *req) +/* ============== DEVLINK_CMD_DPIPE_HEADERS_GET ============== */ +/* DEVLINK_CMD_DPIPE_HEADERS_GET - do */ +void +devlink_dpipe_headers_get_req_free(struct devlink_dpipe_headers_get_req *req) { free(req->bus_name); free(req->dev_name); free(req); } -void devlink_port_get_rsp_free(struct devlink_port_get_rsp *rsp) +void +devlink_dpipe_headers_get_rsp_free(struct devlink_dpipe_headers_get_rsp *rsp) { free(rsp->bus_name); free(rsp->dev_name); + devlink_dl_dpipe_headers_free(&rsp->dpipe_headers); free(rsp); } -int devlink_port_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +int devlink_dpipe_headers_get_rsp_parse(const struct nlmsghdr *nlh, void *data) { + struct devlink_dpipe_headers_get_rsp *dst; struct ynl_parse_arg *yarg = data; - struct devlink_port_get_rsp *dst; const struct nlattr *attr; + struct ynl_parse_arg parg; dst = yarg->data; + parg.ys = yarg->ys; mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { unsigned int type = mnl_attr_get_type(attr); @@ -613,26 +3830,31 @@ int devlink_port_get_rsp_parse(const struct nlmsghdr *nlh, void *data) dst->dev_name = malloc(len + 1); memcpy(dst->dev_name, mnl_attr_get_str(attr), len); dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { + } else if (type == DEVLINK_ATTR_DPIPE_HEADERS) { if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); + dst->_present.dpipe_headers = 1; + + parg.rsp_policy = &devlink_dl_dpipe_headers_nest; + parg.data = &dst->dpipe_headers; + if (devlink_dl_dpipe_headers_parse(&parg, attr)) + return MNL_CB_ERROR; } } return MNL_CB_OK; } -struct devlink_port_get_rsp * -devlink_port_get(struct ynl_sock *ys, struct devlink_port_get_req *req) +struct devlink_dpipe_headers_get_rsp * +devlink_dpipe_headers_get(struct ynl_sock *ys, + struct devlink_dpipe_headers_get_req *req) { struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_port_get_rsp *rsp; + struct devlink_dpipe_headers_get_rsp *rsp; struct nlmsghdr *nlh; int err; - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_GET, 1); + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_DPIPE_HEADERS_GET, 1); ys->req_policy = &devlink_nest; yrs.yarg.rsp_policy = &devlink_nest; @@ -640,33 +3862,119 @@ devlink_port_get(struct ynl_sock *ys, struct devlink_port_get_req *req) mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); if (req->_present.dev_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; - yrs.cb = devlink_port_get_rsp_parse; - yrs.rsp_cmd = 7; + yrs.cb = devlink_dpipe_headers_get_rsp_parse; + yrs.rsp_cmd = DEVLINK_CMD_DPIPE_HEADERS_GET; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + devlink_dpipe_headers_get_rsp_free(rsp); + return NULL; +} + +/* ============== DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET ============== */ +/* DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET - do */ +void +devlink_dpipe_table_counters_set_req_free(struct devlink_dpipe_table_counters_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->dpipe_table_name); + free(req); +} + +int devlink_dpipe_table_counters_set(struct ynl_sock *ys, + struct devlink_dpipe_table_counters_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.dpipe_table_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DPIPE_TABLE_NAME, req->dpipe_table_name); + if (req->_present.dpipe_table_counters_enabled) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, req->dpipe_table_counters_enabled); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_RESOURCE_SET ============== */ +/* DEVLINK_CMD_RESOURCE_SET - do */ +void devlink_resource_set_req_free(struct devlink_resource_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +int devlink_resource_set(struct ynl_sock *ys, + struct devlink_resource_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RESOURCE_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.resource_id) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_ID, req->resource_id); + if (req->_present.resource_size) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_SIZE, req->resource_size); - err = ynl_exec(ys, nlh, &yrs); + err = ynl_exec(ys, nlh, NULL); if (err < 0) - goto err_free; + return -1; - return rsp; + return 0; +} -err_free: - devlink_port_get_rsp_free(rsp); - return NULL; +/* ============== DEVLINK_CMD_RESOURCE_DUMP ============== */ +/* DEVLINK_CMD_RESOURCE_DUMP - do */ +void devlink_resource_dump_req_free(struct devlink_resource_dump_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); } -/* DEVLINK_CMD_PORT_GET - dump */ -int devlink_port_get_rsp_dump_parse(const struct nlmsghdr *nlh, void *data) +void devlink_resource_dump_rsp_free(struct devlink_resource_dump_rsp *rsp) { - struct devlink_port_get_rsp_dump *dst; + free(rsp->bus_name); + free(rsp->dev_name); + devlink_dl_resource_list_free(&rsp->resource_list); + free(rsp); +} + +int devlink_resource_dump_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct devlink_resource_dump_rsp *dst; struct ynl_parse_arg *yarg = data; const struct nlattr *attr; + struct ynl_parse_arg parg; dst = yarg->data; + parg.ys = yarg->ys; mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { unsigned int type = mnl_attr_get_type(attr); @@ -693,84 +4001,75 @@ int devlink_port_get_rsp_dump_parse(const struct nlmsghdr *nlh, void *data) dst->dev_name = malloc(len + 1); memcpy(dst->dev_name, mnl_attr_get_str(attr), len); dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { + } else if (type == DEVLINK_ATTR_RESOURCE_LIST) { if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); + dst->_present.resource_list = 1; + + parg.rsp_policy = &devlink_dl_resource_list_nest; + parg.data = &dst->resource_list; + if (devlink_dl_resource_list_parse(&parg, attr)) + return MNL_CB_ERROR; } } return MNL_CB_OK; } -void devlink_port_get_rsp_list_free(struct devlink_port_get_rsp_list *rsp) -{ - struct devlink_port_get_rsp_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp); - } -} - -struct devlink_port_get_rsp_list * -devlink_port_get_dump(struct ynl_sock *ys, - struct devlink_port_get_req_dump *req) +struct devlink_resource_dump_rsp * +devlink_resource_dump(struct ynl_sock *ys, + struct devlink_resource_dump_req *req) { - struct ynl_dump_state yds = {}; + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct devlink_resource_dump_rsp *rsp; struct nlmsghdr *nlh; int err; - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_port_get_rsp_list); - yds.cb = devlink_port_get_rsp_dump_parse; - yds.rsp_cmd = 7; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_PORT_GET, 1); + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RESOURCE_DUMP, 1); ys->req_policy = &devlink_nest; + yrs.yarg.rsp_policy = &devlink_nest; if (req->_present.bus_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); if (req->_present.dev_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - err = ynl_exec_dump(ys, nlh, &yds); + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = devlink_resource_dump_rsp_parse; + yrs.rsp_cmd = DEVLINK_CMD_RESOURCE_DUMP; + + err = ynl_exec(ys, nlh, &yrs); if (err < 0) - goto free_list; + goto err_free; - return yds.first; + return rsp; -free_list: - devlink_port_get_rsp_list_free(yds.first); +err_free: + devlink_resource_dump_rsp_free(rsp); return NULL; } -/* ============== DEVLINK_CMD_SB_GET ============== */ -/* DEVLINK_CMD_SB_GET - do */ -void devlink_sb_get_req_free(struct devlink_sb_get_req *req) +/* ============== DEVLINK_CMD_RELOAD ============== */ +/* DEVLINK_CMD_RELOAD - do */ +void devlink_reload_req_free(struct devlink_reload_req *req) { free(req->bus_name); free(req->dev_name); free(req); } -void devlink_sb_get_rsp_free(struct devlink_sb_get_rsp *rsp) +void devlink_reload_rsp_free(struct devlink_reload_rsp *rsp) { free(rsp->bus_name); free(rsp->dev_name); free(rsp); } -int devlink_sb_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +int devlink_reload_rsp_parse(const struct nlmsghdr *nlh, void *data) { struct ynl_parse_arg *yarg = data; - struct devlink_sb_get_rsp *dst; + struct devlink_reload_rsp *dst; const struct nlattr *attr; dst = yarg->data; @@ -800,26 +4099,26 @@ int devlink_sb_get_rsp_parse(const struct nlmsghdr *nlh, void *data) dst->dev_name = malloc(len + 1); memcpy(dst->dev_name, mnl_attr_get_str(attr), len); dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_SB_INDEX) { + } else if (type == DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED) { if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - dst->_present.sb_index = 1; - dst->sb_index = mnl_attr_get_u32(attr); + dst->_present.reload_actions_performed = 1; + memcpy(&dst->reload_actions_performed, mnl_attr_get_payload(attr), sizeof(struct nla_bitfield32)); } } return MNL_CB_OK; } -struct devlink_sb_get_rsp * -devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req) +struct devlink_reload_rsp * +devlink_reload(struct ynl_sock *ys, struct devlink_reload_req *req) { struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_sb_get_rsp *rsp; + struct devlink_reload_rsp *rsp; struct nlmsghdr *nlh; int err; - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_GET, 1); + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RELOAD, 1); ys->req_policy = &devlink_nest; yrs.yarg.rsp_policy = &devlink_nest; @@ -827,13 +4126,21 @@ devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req) mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); if (req->_present.dev_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); + if (req->_present.reload_action) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_RELOAD_ACTION, req->reload_action); + if (req->_present.reload_limits) + mnl_attr_put(nlh, DEVLINK_ATTR_RELOAD_LIMITS, sizeof(struct nla_bitfield32), &req->reload_limits); + if (req->_present.netns_pid) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_NETNS_PID, req->netns_pid); + if (req->_present.netns_fd) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_NETNS_FD, req->netns_fd); + if (req->_present.netns_id) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_NETNS_ID, req->netns_id); rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; - yrs.cb = devlink_sb_get_rsp_parse; - yrs.rsp_cmd = 13; + yrs.cb = devlink_reload_rsp_parse; + yrs.rsp_cmd = DEVLINK_CMD_RELOAD; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -842,76 +4149,31 @@ devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req) return rsp; err_free: - devlink_sb_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_SB_GET - dump */ -void devlink_sb_get_list_free(struct devlink_sb_get_list *rsp) -{ - struct devlink_sb_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp); - } -} - -struct devlink_sb_get_list * -devlink_sb_get_dump(struct ynl_sock *ys, struct devlink_sb_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_sb_get_list); - yds.cb = devlink_sb_get_rsp_parse; - yds.rsp_cmd = 13; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_sb_get_list_free(yds.first); + devlink_reload_rsp_free(rsp); return NULL; } -/* ============== DEVLINK_CMD_SB_POOL_GET ============== */ -/* DEVLINK_CMD_SB_POOL_GET - do */ -void devlink_sb_pool_get_req_free(struct devlink_sb_pool_get_req *req) +/* ============== DEVLINK_CMD_PARAM_GET ============== */ +/* DEVLINK_CMD_PARAM_GET - do */ +void devlink_param_get_req_free(struct devlink_param_get_req *req) { free(req->bus_name); free(req->dev_name); + free(req->param_name); free(req); } -void devlink_sb_pool_get_rsp_free(struct devlink_sb_pool_get_rsp *rsp) +void devlink_param_get_rsp_free(struct devlink_param_get_rsp *rsp) { free(rsp->bus_name); free(rsp->dev_name); + free(rsp->param_name); free(rsp); } -int devlink_sb_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +int devlink_param_get_rsp_parse(const struct nlmsghdr *nlh, void *data) { - struct devlink_sb_pool_get_rsp *dst; + struct devlink_param_get_rsp *dst; struct ynl_parse_arg *yarg = data; const struct nlattr *attr; @@ -942,31 +4204,32 @@ int devlink_sb_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data) dst->dev_name = malloc(len + 1); memcpy(dst->dev_name, mnl_attr_get_str(attr), len); dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_SB_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sb_index = 1; - dst->sb_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_SB_POOL_INDEX) { + } else if (type == DEVLINK_ATTR_PARAM_NAME) { + unsigned int len; + if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - dst->_present.sb_pool_index = 1; - dst->sb_pool_index = mnl_attr_get_u16(attr); + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.param_name_len = len; + dst->param_name = malloc(len + 1); + memcpy(dst->param_name, mnl_attr_get_str(attr), len); + dst->param_name[len] = 0; } } return MNL_CB_OK; } -struct devlink_sb_pool_get_rsp * -devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req) +struct devlink_param_get_rsp * +devlink_param_get(struct ynl_sock *ys, struct devlink_param_get_req *req) { struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_sb_pool_get_rsp *rsp; + struct devlink_param_get_rsp *rsp; struct nlmsghdr *nlh; int err; - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_POOL_GET, 1); + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PARAM_GET, 1); ys->req_policy = &devlink_nest; yrs.yarg.rsp_policy = &devlink_nest; @@ -974,15 +4237,13 @@ devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req) mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); if (req->_present.dev_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - if (req->_present.sb_pool_index) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index); + if (req->_present.param_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_PARAM_NAME, req->param_name); rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; - yrs.cb = devlink_sb_pool_get_rsp_parse; - yrs.rsp_cmd = 17; + yrs.cb = devlink_param_get_rsp_parse; + yrs.rsp_cmd = DEVLINK_CMD_PARAM_GET; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -991,14 +4252,14 @@ devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req) return rsp; err_free: - devlink_sb_pool_get_rsp_free(rsp); + devlink_param_get_rsp_free(rsp); return NULL; } -/* DEVLINK_CMD_SB_POOL_GET - dump */ -void devlink_sb_pool_get_list_free(struct devlink_sb_pool_get_list *rsp) +/* DEVLINK_CMD_PARAM_GET - dump */ +void devlink_param_get_list_free(struct devlink_param_get_list *rsp) { - struct devlink_sb_pool_get_list *next = rsp; + struct devlink_param_get_list *next = rsp; while ((void *)next != YNL_LIST_END) { rsp = next; @@ -1006,25 +4267,26 @@ void devlink_sb_pool_get_list_free(struct devlink_sb_pool_get_list *rsp) free(rsp->obj.bus_name); free(rsp->obj.dev_name); + free(rsp->obj.param_name); free(rsp); } } -struct devlink_sb_pool_get_list * -devlink_sb_pool_get_dump(struct ynl_sock *ys, - struct devlink_sb_pool_get_req_dump *req) +struct devlink_param_get_list * +devlink_param_get_dump(struct ynl_sock *ys, + struct devlink_param_get_req_dump *req) { struct ynl_dump_state yds = {}; struct nlmsghdr *nlh; int err; yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_sb_pool_get_list); - yds.cb = devlink_sb_pool_get_rsp_parse; - yds.rsp_cmd = 17; + yds.alloc_sz = sizeof(struct devlink_param_get_list); + yds.cb = devlink_param_get_rsp_parse; + yds.rsp_cmd = DEVLINK_CMD_PARAM_GET; yds.rsp_policy = &devlink_nest; - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_POOL_GET, 1); + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_PARAM_GET, 1); ys->req_policy = &devlink_nest; if (req->_present.bus_name_len) @@ -1039,31 +4301,67 @@ devlink_sb_pool_get_dump(struct ynl_sock *ys, return yds.first; free_list: - devlink_sb_pool_get_list_free(yds.first); + devlink_param_get_list_free(yds.first); return NULL; } -/* ============== DEVLINK_CMD_SB_PORT_POOL_GET ============== */ -/* DEVLINK_CMD_SB_PORT_POOL_GET - do */ -void -devlink_sb_port_pool_get_req_free(struct devlink_sb_port_pool_get_req *req) +/* ============== DEVLINK_CMD_PARAM_SET ============== */ +/* DEVLINK_CMD_PARAM_SET - do */ +void devlink_param_set_req_free(struct devlink_param_set_req *req) { free(req->bus_name); free(req->dev_name); + free(req->param_name); free(req); } -void -devlink_sb_port_pool_get_rsp_free(struct devlink_sb_port_pool_get_rsp *rsp) +int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PARAM_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.param_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_PARAM_NAME, req->param_name); + if (req->_present.param_type) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_TYPE, req->param_type); + if (req->_present.param_value_cmode) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, req->param_value_cmode); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_REGION_GET ============== */ +/* DEVLINK_CMD_REGION_GET - do */ +void devlink_region_get_req_free(struct devlink_region_get_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->region_name); + free(req); +} + +void devlink_region_get_rsp_free(struct devlink_region_get_rsp *rsp) { free(rsp->bus_name); free(rsp->dev_name); + free(rsp->region_name); free(rsp); } -int devlink_sb_port_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +int devlink_region_get_rsp_parse(const struct nlmsghdr *nlh, void *data) { - struct devlink_sb_port_pool_get_rsp *dst; + struct devlink_region_get_rsp *dst; struct ynl_parse_arg *yarg = data; const struct nlattr *attr; @@ -1099,32 +4397,32 @@ int devlink_sb_port_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data) return MNL_CB_ERROR; dst->_present.port_index = 1; dst->port_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_SB_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sb_index = 1; - dst->sb_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_SB_POOL_INDEX) { + } else if (type == DEVLINK_ATTR_REGION_NAME) { + unsigned int len; + if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - dst->_present.sb_pool_index = 1; - dst->sb_pool_index = mnl_attr_get_u16(attr); + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.region_name_len = len; + dst->region_name = malloc(len + 1); + memcpy(dst->region_name, mnl_attr_get_str(attr), len); + dst->region_name[len] = 0; } } return MNL_CB_OK; } -struct devlink_sb_port_pool_get_rsp * -devlink_sb_port_pool_get(struct ynl_sock *ys, - struct devlink_sb_port_pool_get_req *req) +struct devlink_region_get_rsp * +devlink_region_get(struct ynl_sock *ys, struct devlink_region_get_req *req) { struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_sb_port_pool_get_rsp *rsp; + struct devlink_region_get_rsp *rsp; struct nlmsghdr *nlh; int err; - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_GET, 1); + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_REGION_GET, 1); ys->req_policy = &devlink_nest; yrs.yarg.rsp_policy = &devlink_nest; @@ -1134,15 +4432,13 @@ devlink_sb_port_pool_get(struct ynl_sock *ys, mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); if (req->_present.port_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - if (req->_present.sb_pool_index) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index); + if (req->_present.region_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name); rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; - yrs.cb = devlink_sb_port_pool_get_rsp_parse; - yrs.rsp_cmd = 21; + yrs.cb = devlink_region_get_rsp_parse; + yrs.rsp_cmd = DEVLINK_CMD_REGION_GET; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1151,15 +4447,14 @@ devlink_sb_port_pool_get(struct ynl_sock *ys, return rsp; err_free: - devlink_sb_port_pool_get_rsp_free(rsp); + devlink_region_get_rsp_free(rsp); return NULL; } -/* DEVLINK_CMD_SB_PORT_POOL_GET - dump */ -void -devlink_sb_port_pool_get_list_free(struct devlink_sb_port_pool_get_list *rsp) +/* DEVLINK_CMD_REGION_GET - dump */ +void devlink_region_get_list_free(struct devlink_region_get_list *rsp) { - struct devlink_sb_port_pool_get_list *next = rsp; + struct devlink_region_get_list *next = rsp; while ((void *)next != YNL_LIST_END) { rsp = next; @@ -1167,25 +4462,26 @@ devlink_sb_port_pool_get_list_free(struct devlink_sb_port_pool_get_list *rsp) free(rsp->obj.bus_name); free(rsp->obj.dev_name); + free(rsp->obj.region_name); free(rsp); } } -struct devlink_sb_port_pool_get_list * -devlink_sb_port_pool_get_dump(struct ynl_sock *ys, - struct devlink_sb_port_pool_get_req_dump *req) +struct devlink_region_get_list * +devlink_region_get_dump(struct ynl_sock *ys, + struct devlink_region_get_req_dump *req) { struct ynl_dump_state yds = {}; struct nlmsghdr *nlh; int err; yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_sb_port_pool_get_list); - yds.cb = devlink_sb_port_pool_get_rsp_parse; - yds.rsp_cmd = 21; + yds.alloc_sz = sizeof(struct devlink_region_get_list); + yds.cb = devlink_region_get_rsp_parse; + yds.rsp_cmd = DEVLINK_CMD_REGION_GET; yds.rsp_policy = &devlink_nest; - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_GET, 1); + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_REGION_GET, 1); ys->req_policy = &devlink_nest; if (req->_present.bus_name_len) @@ -1200,32 +4496,31 @@ devlink_sb_port_pool_get_dump(struct ynl_sock *ys, return yds.first; free_list: - devlink_sb_port_pool_get_list_free(yds.first); + devlink_region_get_list_free(yds.first); return NULL; } -/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_GET ============== */ -/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */ -void -devlink_sb_tc_pool_bind_get_req_free(struct devlink_sb_tc_pool_bind_get_req *req) +/* ============== DEVLINK_CMD_REGION_NEW ============== */ +/* DEVLINK_CMD_REGION_NEW - do */ +void devlink_region_new_req_free(struct devlink_region_new_req *req) { free(req->bus_name); free(req->dev_name); + free(req->region_name); free(req); } -void -devlink_sb_tc_pool_bind_get_rsp_free(struct devlink_sb_tc_pool_bind_get_rsp *rsp) +void devlink_region_new_rsp_free(struct devlink_region_new_rsp *rsp) { free(rsp->bus_name); free(rsp->dev_name); + free(rsp->region_name); free(rsp); } -int devlink_sb_tc_pool_bind_get_rsp_parse(const struct nlmsghdr *nlh, - void *data) +int devlink_region_new_rsp_parse(const struct nlmsghdr *nlh, void *data) { - struct devlink_sb_tc_pool_bind_get_rsp *dst; + struct devlink_region_new_rsp *dst; struct ynl_parse_arg *yarg = data; const struct nlattr *attr; @@ -1261,37 +4556,37 @@ int devlink_sb_tc_pool_bind_get_rsp_parse(const struct nlmsghdr *nlh, return MNL_CB_ERROR; dst->_present.port_index = 1; dst->port_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_SB_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sb_index = 1; - dst->sb_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_SB_POOL_TYPE) { + } else if (type == DEVLINK_ATTR_REGION_NAME) { + unsigned int len; + if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - dst->_present.sb_pool_type = 1; - dst->sb_pool_type = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_SB_TC_INDEX) { + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.region_name_len = len; + dst->region_name = malloc(len + 1); + memcpy(dst->region_name, mnl_attr_get_str(attr), len); + dst->region_name[len] = 0; + } else if (type == DEVLINK_ATTR_REGION_SNAPSHOT_ID) { if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; - dst->_present.sb_tc_index = 1; - dst->sb_tc_index = mnl_attr_get_u16(attr); + dst->_present.region_snapshot_id = 1; + dst->region_snapshot_id = mnl_attr_get_u32(attr); } } return MNL_CB_OK; } -struct devlink_sb_tc_pool_bind_get_rsp * -devlink_sb_tc_pool_bind_get(struct ynl_sock *ys, - struct devlink_sb_tc_pool_bind_get_req *req) +struct devlink_region_new_rsp * +devlink_region_new(struct ynl_sock *ys, struct devlink_region_new_req *req) { struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_sb_tc_pool_bind_get_rsp *rsp; + struct devlink_region_new_rsp *rsp; struct nlmsghdr *nlh; int err; - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_GET, 1); + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_REGION_NEW, 1); ys->req_policy = &devlink_nest; yrs.yarg.rsp_policy = &devlink_nest; @@ -1301,17 +4596,15 @@ devlink_sb_tc_pool_bind_get(struct ynl_sock *ys, mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); if (req->_present.port_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - if (req->_present.sb_pool_type) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_SB_POOL_TYPE, req->sb_pool_type); - if (req->_present.sb_tc_index) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_TC_INDEX, req->sb_tc_index); + if (req->_present.region_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name); + if (req->_present.region_snapshot_id) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id); rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; - yrs.cb = devlink_sb_tc_pool_bind_get_rsp_parse; - yrs.rsp_cmd = 25; + yrs.cb = devlink_region_new_rsp_parse; + yrs.rsp_cmd = DEVLINK_CMD_REGION_NEW; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1320,80 +4613,51 @@ devlink_sb_tc_pool_bind_get(struct ynl_sock *ys, return rsp; err_free: - devlink_sb_tc_pool_bind_get_rsp_free(rsp); + devlink_region_new_rsp_free(rsp); return NULL; } -/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */ -void -devlink_sb_tc_pool_bind_get_list_free(struct devlink_sb_tc_pool_bind_get_list *rsp) +/* ============== DEVLINK_CMD_REGION_DEL ============== */ +/* DEVLINK_CMD_REGION_DEL - do */ +void devlink_region_del_req_free(struct devlink_region_del_req *req) { - struct devlink_sb_tc_pool_bind_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp); - } + free(req->bus_name); + free(req->dev_name); + free(req->region_name); + free(req); } -struct devlink_sb_tc_pool_bind_get_list * -devlink_sb_tc_pool_bind_get_dump(struct ynl_sock *ys, - struct devlink_sb_tc_pool_bind_get_req_dump *req) +int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req) { - struct ynl_dump_state yds = {}; struct nlmsghdr *nlh; int err; - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_sb_tc_pool_bind_get_list); - yds.cb = devlink_sb_tc_pool_bind_get_rsp_parse; - yds.rsp_cmd = 25; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_GET, 1); + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_REGION_DEL, 1); ys->req_policy = &devlink_nest; if (req->_present.bus_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); if (req->_present.dev_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.region_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name); + if (req->_present.region_snapshot_id) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id); - err = ynl_exec_dump(ys, nlh, &yds); + err = ynl_exec(ys, nlh, NULL); if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_sb_tc_pool_bind_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_PARAM_GET ============== */ -/* DEVLINK_CMD_PARAM_GET - do */ -void devlink_param_get_req_free(struct devlink_param_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->param_name); - free(req); -} + return -1; -void devlink_param_get_rsp_free(struct devlink_param_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp->param_name); - free(rsp); + return 0; } -int devlink_param_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +/* ============== DEVLINK_CMD_REGION_READ ============== */ +/* DEVLINK_CMD_REGION_READ - dump */ +int devlink_region_read_rsp_dump_parse(const struct nlmsghdr *nlh, void *data) { - struct devlink_param_get_rsp *dst; + struct devlink_region_read_rsp_dump *dst; struct ynl_parse_arg *yarg = data; const struct nlattr *attr; @@ -1424,62 +4688,32 @@ int devlink_param_get_rsp_parse(const struct nlmsghdr *nlh, void *data) dst->dev_name = malloc(len + 1); memcpy(dst->dev_name, mnl_attr_get_str(attr), len); dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PARAM_NAME) { + } else if (type == DEVLINK_ATTR_PORT_INDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.port_index = 1; + dst->port_index = mnl_attr_get_u32(attr); + } else if (type == DEVLINK_ATTR_REGION_NAME) { unsigned int len; if (ynl_attr_validate(yarg, attr)) return MNL_CB_ERROR; len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.param_name_len = len; - dst->param_name = malloc(len + 1); - memcpy(dst->param_name, mnl_attr_get_str(attr), len); - dst->param_name[len] = 0; + dst->_present.region_name_len = len; + dst->region_name = malloc(len + 1); + memcpy(dst->region_name, mnl_attr_get_str(attr), len); + dst->region_name[len] = 0; } } return MNL_CB_OK; } -struct devlink_param_get_rsp * -devlink_param_get(struct ynl_sock *ys, struct devlink_param_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_param_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PARAM_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.param_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_PARAM_NAME, req->param_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_param_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_PARAM_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_param_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_PARAM_GET - dump */ -void devlink_param_get_list_free(struct devlink_param_get_list *rsp) +void +devlink_region_read_rsp_list_free(struct devlink_region_read_rsp_list *rsp) { - struct devlink_param_get_list *next = rsp; + struct devlink_region_read_rsp_list *next = rsp; while ((void *)next != YNL_LIST_END) { rsp = next; @@ -1487,32 +4721,44 @@ void devlink_param_get_list_free(struct devlink_param_get_list *rsp) free(rsp->obj.bus_name); free(rsp->obj.dev_name); - free(rsp->obj.param_name); + free(rsp->obj.region_name); free(rsp); } } -struct devlink_param_get_list * -devlink_param_get_dump(struct ynl_sock *ys, - struct devlink_param_get_req_dump *req) +struct devlink_region_read_rsp_list * +devlink_region_read_dump(struct ynl_sock *ys, + struct devlink_region_read_req_dump *req) { struct ynl_dump_state yds = {}; struct nlmsghdr *nlh; int err; yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_param_get_list); - yds.cb = devlink_param_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_PARAM_GET; + yds.alloc_sz = sizeof(struct devlink_region_read_rsp_list); + yds.cb = devlink_region_read_rsp_dump_parse; + yds.rsp_cmd = DEVLINK_CMD_REGION_READ; yds.rsp_policy = &devlink_nest; - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_PARAM_GET, 1); + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_REGION_READ, 1); ys->req_policy = &devlink_nest; if (req->_present.bus_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); if (req->_present.dev_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.region_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name); + if (req->_present.region_snapshot_id) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id); + if (req->_present.region_direct) + mnl_attr_put(nlh, DEVLINK_ATTR_REGION_DIRECT, 0, NULL); + if (req->_present.region_chunk_addr) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_REGION_CHUNK_ADDR, req->region_chunk_addr); + if (req->_present.region_chunk_len) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_REGION_CHUNK_LEN, req->region_chunk_len); err = ynl_exec_dump(ys, nlh, &yds); if (err < 0) @@ -1521,31 +4767,29 @@ devlink_param_get_dump(struct ynl_sock *ys, return yds.first; free_list: - devlink_param_get_list_free(yds.first); + devlink_region_read_rsp_list_free(yds.first); return NULL; } -/* ============== DEVLINK_CMD_REGION_GET ============== */ -/* DEVLINK_CMD_REGION_GET - do */ -void devlink_region_get_req_free(struct devlink_region_get_req *req) +/* ============== DEVLINK_CMD_PORT_PARAM_GET ============== */ +/* DEVLINK_CMD_PORT_PARAM_GET - do */ +void devlink_port_param_get_req_free(struct devlink_port_param_get_req *req) { free(req->bus_name); free(req->dev_name); - free(req->region_name); free(req); } -void devlink_region_get_rsp_free(struct devlink_region_get_rsp *rsp) +void devlink_port_param_get_rsp_free(struct devlink_port_param_get_rsp *rsp) { free(rsp->bus_name); free(rsp->dev_name); - free(rsp->region_name); free(rsp); } -int devlink_region_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +int devlink_port_param_get_rsp_parse(const struct nlmsghdr *nlh, void *data) { - struct devlink_region_get_rsp *dst; + struct devlink_port_param_get_rsp *dst; struct ynl_parse_arg *yarg = data; const struct nlattr *attr; @@ -1581,32 +4825,22 @@ int devlink_region_get_rsp_parse(const struct nlmsghdr *nlh, void *data) return MNL_CB_ERROR; dst->_present.port_index = 1; dst->port_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_REGION_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.region_name_len = len; - dst->region_name = malloc(len + 1); - memcpy(dst->region_name, mnl_attr_get_str(attr), len); - dst->region_name[len] = 0; } } return MNL_CB_OK; } -struct devlink_region_get_rsp * -devlink_region_get(struct ynl_sock *ys, struct devlink_region_get_req *req) +struct devlink_port_param_get_rsp * +devlink_port_param_get(struct ynl_sock *ys, + struct devlink_port_param_get_req *req) { struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_region_get_rsp *rsp; + struct devlink_port_param_get_rsp *rsp; struct nlmsghdr *nlh; int err; - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_REGION_GET, 1); + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_PARAM_GET, 1); ys->req_policy = &devlink_nest; yrs.yarg.rsp_policy = &devlink_nest; @@ -1616,13 +4850,11 @@ devlink_region_get(struct ynl_sock *ys, struct devlink_region_get_req *req) mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); if (req->_present.port_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.region_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name); rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; - yrs.cb = devlink_region_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_REGION_GET; + yrs.cb = devlink_port_param_get_rsp_parse; + yrs.rsp_cmd = DEVLINK_CMD_PORT_PARAM_GET; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1631,14 +4863,14 @@ devlink_region_get(struct ynl_sock *ys, struct devlink_region_get_req *req) return rsp; err_free: - devlink_region_get_rsp_free(rsp); + devlink_port_param_get_rsp_free(rsp); return NULL; } -/* DEVLINK_CMD_REGION_GET - dump */ -void devlink_region_get_list_free(struct devlink_region_get_list *rsp) +/* DEVLINK_CMD_PORT_PARAM_GET - dump */ +void devlink_port_param_get_list_free(struct devlink_port_param_get_list *rsp) { - struct devlink_region_get_list *next = rsp; + struct devlink_port_param_get_list *next = rsp; while ((void *)next != YNL_LIST_END) { rsp = next; @@ -1646,32 +4878,24 @@ void devlink_region_get_list_free(struct devlink_region_get_list *rsp) free(rsp->obj.bus_name); free(rsp->obj.dev_name); - free(rsp->obj.region_name); free(rsp); } } -struct devlink_region_get_list * -devlink_region_get_dump(struct ynl_sock *ys, - struct devlink_region_get_req_dump *req) +struct devlink_port_param_get_list * +devlink_port_param_get_dump(struct ynl_sock *ys) { struct ynl_dump_state yds = {}; struct nlmsghdr *nlh; int err; yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_region_get_list); - yds.cb = devlink_region_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_REGION_GET; + yds.alloc_sz = sizeof(struct devlink_port_param_get_list); + yds.cb = devlink_port_param_get_rsp_parse; + yds.rsp_cmd = DEVLINK_CMD_PORT_PARAM_GET; yds.rsp_policy = &devlink_nest; - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_REGION_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_PORT_PARAM_GET, 1); err = ynl_exec_dump(ys, nlh, &yds); if (err < 0) @@ -1680,10 +4904,42 @@ devlink_region_get_dump(struct ynl_sock *ys, return yds.first; free_list: - devlink_region_get_list_free(yds.first); + devlink_port_param_get_list_free(yds.first); return NULL; } +/* ============== DEVLINK_CMD_PORT_PARAM_SET ============== */ +/* DEVLINK_CMD_PORT_PARAM_SET - do */ +void devlink_port_param_set_req_free(struct devlink_port_param_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +int devlink_port_param_set(struct ynl_sock *ys, + struct devlink_port_param_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_PARAM_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + /* ============== DEVLINK_CMD_INFO_GET ============== */ /* DEVLINK_CMD_INFO_GET - do */ void devlink_info_get_req_free(struct devlink_info_get_req *req) @@ -2046,46 +5302,316 @@ devlink_health_reporter_get_list_free(struct devlink_health_reporter_get_list *r rsp = next; next = rsp->next; - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp->obj.health_reporter_name); - free(rsp); - } + free(rsp->obj.bus_name); + free(rsp->obj.dev_name); + free(rsp->obj.health_reporter_name); + free(rsp); + } +} + +struct devlink_health_reporter_get_list * +devlink_health_reporter_get_dump(struct ynl_sock *ys, + struct devlink_health_reporter_get_req_dump *req) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct devlink_health_reporter_get_list); + yds.cb = devlink_health_reporter_get_rsp_parse; + yds.rsp_cmd = DEVLINK_CMD_HEALTH_REPORTER_GET; + yds.rsp_policy = &devlink_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_GET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + devlink_health_reporter_get_list_free(yds.first); + return NULL; +} + +/* ============== DEVLINK_CMD_HEALTH_REPORTER_SET ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_SET - do */ +void +devlink_health_reporter_set_req_free(struct devlink_health_reporter_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->health_reporter_name); + free(req); +} + +int devlink_health_reporter_set(struct ynl_sock *ys, + struct devlink_health_reporter_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.health_reporter_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); + if (req->_present.health_reporter_graceful_period) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, req->health_reporter_graceful_period); + if (req->_present.health_reporter_auto_recover) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, req->health_reporter_auto_recover); + if (req->_present.health_reporter_auto_dump) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, req->health_reporter_auto_dump); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_HEALTH_REPORTER_RECOVER ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */ +void +devlink_health_reporter_recover_req_free(struct devlink_health_reporter_recover_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->health_reporter_name); + free(req); +} + +int devlink_health_reporter_recover(struct ynl_sock *ys, + struct devlink_health_reporter_recover_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_RECOVER, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.health_reporter_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE - do */ +void +devlink_health_reporter_diagnose_req_free(struct devlink_health_reporter_diagnose_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->health_reporter_name); + free(req); +} + +int devlink_health_reporter_diagnose(struct ynl_sock *ys, + struct devlink_health_reporter_diagnose_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.health_reporter_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET - dump */ +int devlink_health_reporter_dump_get_rsp_dump_parse(const struct nlmsghdr *nlh, + void *data) +{ + struct devlink_health_reporter_dump_get_rsp_dump *dst; + struct ynl_parse_arg *yarg = data; + const struct nlattr *attr; + struct ynl_parse_arg parg; + + dst = yarg->data; + parg.ys = yarg->ys; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_FMSG) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.fmsg = 1; + + parg.rsp_policy = &devlink_dl_fmsg_nest; + parg.data = &dst->fmsg; + if (devlink_dl_fmsg_parse(&parg, attr)) + return MNL_CB_ERROR; + } + } + + return MNL_CB_OK; +} + +void +devlink_health_reporter_dump_get_rsp_list_free(struct devlink_health_reporter_dump_get_rsp_list *rsp) +{ + struct devlink_health_reporter_dump_get_rsp_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + devlink_dl_fmsg_free(&rsp->obj.fmsg); + free(rsp); + } +} + +struct devlink_health_reporter_dump_get_rsp_list * +devlink_health_reporter_dump_get_dump(struct ynl_sock *ys, + struct devlink_health_reporter_dump_get_req_dump *req) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct devlink_health_reporter_dump_get_rsp_list); + yds.cb = devlink_health_reporter_dump_get_rsp_dump_parse; + yds.rsp_cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET; + yds.rsp_policy = &devlink_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.health_reporter_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + devlink_health_reporter_dump_get_rsp_list_free(yds.first); + return NULL; +} + +/* ============== DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR - do */ +void +devlink_health_reporter_dump_clear_req_free(struct devlink_health_reporter_dump_clear_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->health_reporter_name); + free(req); +} + +int devlink_health_reporter_dump_clear(struct ynl_sock *ys, + struct devlink_health_reporter_dump_clear_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.health_reporter_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_FLASH_UPDATE ============== */ +/* DEVLINK_CMD_FLASH_UPDATE - do */ +void devlink_flash_update_req_free(struct devlink_flash_update_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->flash_update_file_name); + free(req->flash_update_component); + free(req); } -struct devlink_health_reporter_get_list * -devlink_health_reporter_get_dump(struct ynl_sock *ys, - struct devlink_health_reporter_get_req_dump *req) +int devlink_flash_update(struct ynl_sock *ys, + struct devlink_flash_update_req *req) { - struct ynl_dump_state yds = {}; struct nlmsghdr *nlh; int err; - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_health_reporter_get_list); - yds.cb = devlink_health_reporter_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_HEALTH_REPORTER_GET; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_GET, 1); + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_FLASH_UPDATE, 1); ys->req_policy = &devlink_nest; if (req->_present.bus_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); if (req->_present.dev_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - - err = ynl_exec_dump(ys, nlh, &yds); + if (req->_present.flash_update_file_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME, req->flash_update_file_name); + if (req->_present.flash_update_component_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT, req->flash_update_component); + if (req->_present.flash_update_overwrite_mask) + mnl_attr_put(nlh, DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, sizeof(struct nla_bitfield32), &req->flash_update_overwrite_mask); + + err = ynl_exec(ys, nlh, NULL); if (err < 0) - goto free_list; - - return yds.first; + return -1; -free_list: - devlink_health_reporter_get_list_free(yds.first); - return NULL; + return 0; } /* ============== DEVLINK_CMD_TRAP_GET ============== */ @@ -2240,6 +5766,40 @@ free_list: return NULL; } +/* ============== DEVLINK_CMD_TRAP_SET ============== */ +/* DEVLINK_CMD_TRAP_SET - do */ +void devlink_trap_set_req_free(struct devlink_trap_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->trap_name); + free(req); +} + +int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.trap_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_NAME, req->trap_name); + if (req->_present.trap_action) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_TRAP_ACTION, req->trap_action); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + /* ============== DEVLINK_CMD_TRAP_GROUP_GET ============== */ /* DEVLINK_CMD_TRAP_GROUP_GET - do */ void devlink_trap_group_get_req_free(struct devlink_trap_group_get_req *req) @@ -2393,6 +5953,43 @@ free_list: return NULL; } +/* ============== DEVLINK_CMD_TRAP_GROUP_SET ============== */ +/* DEVLINK_CMD_TRAP_GROUP_SET - do */ +void devlink_trap_group_set_req_free(struct devlink_trap_group_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->trap_group_name); + free(req); +} + +int devlink_trap_group_set(struct ynl_sock *ys, + struct devlink_trap_group_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_GROUP_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.trap_group_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_GROUP_NAME, req->trap_group_name); + if (req->_present.trap_action) + mnl_attr_put_u8(nlh, DEVLINK_ATTR_TRAP_ACTION, req->trap_action); + if (req->_present.trap_policer_id) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, req->trap_policer_id); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + /* ============== DEVLINK_CMD_TRAP_POLICER_GET ============== */ /* DEVLINK_CMD_TRAP_POLICER_GET - do */ void @@ -2540,6 +6137,79 @@ free_list: return NULL; } +/* ============== DEVLINK_CMD_TRAP_POLICER_SET ============== */ +/* DEVLINK_CMD_TRAP_POLICER_SET - do */ +void +devlink_trap_policer_set_req_free(struct devlink_trap_policer_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +int devlink_trap_policer_set(struct ynl_sock *ys, + struct devlink_trap_policer_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_POLICER_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.trap_policer_id) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, req->trap_policer_id); + if (req->_present.trap_policer_rate) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_RATE, req->trap_policer_rate); + if (req->_present.trap_policer_burst) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_BURST, req->trap_policer_burst); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_HEALTH_REPORTER_TEST ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_TEST - do */ +void +devlink_health_reporter_test_req_free(struct devlink_health_reporter_test_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->health_reporter_name); + free(req); +} + +int devlink_health_reporter_test(struct ynl_sock *ys, + struct devlink_health_reporter_test_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_TEST, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.port_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); + if (req->_present.health_reporter_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + /* ============== DEVLINK_CMD_RATE_GET ============== */ /* DEVLINK_CMD_RATE_GET - do */ void devlink_rate_get_req_free(struct devlink_rate_get_req *req) @@ -2699,6 +6369,124 @@ free_list: return NULL; } +/* ============== DEVLINK_CMD_RATE_SET ============== */ +/* DEVLINK_CMD_RATE_SET - do */ +void devlink_rate_set_req_free(struct devlink_rate_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->rate_node_name); + free(req->rate_parent_node_name); + free(req); +} + +int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RATE_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.rate_node_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name); + if (req->_present.rate_tx_share) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_SHARE, req->rate_tx_share); + if (req->_present.rate_tx_max) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_MAX, req->rate_tx_max); + if (req->_present.rate_tx_priority) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_RATE_TX_PRIORITY, req->rate_tx_priority); + if (req->_present.rate_tx_weight) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_RATE_TX_WEIGHT, req->rate_tx_weight); + if (req->_present.rate_parent_node_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_RATE_NEW ============== */ +/* DEVLINK_CMD_RATE_NEW - do */ +void devlink_rate_new_req_free(struct devlink_rate_new_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->rate_node_name); + free(req->rate_parent_node_name); + free(req); +} + +int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RATE_NEW, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.rate_node_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name); + if (req->_present.rate_tx_share) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_SHARE, req->rate_tx_share); + if (req->_present.rate_tx_max) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_MAX, req->rate_tx_max); + if (req->_present.rate_tx_priority) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_RATE_TX_PRIORITY, req->rate_tx_priority); + if (req->_present.rate_tx_weight) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_RATE_TX_WEIGHT, req->rate_tx_weight); + if (req->_present.rate_parent_node_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== DEVLINK_CMD_RATE_DEL ============== */ +/* DEVLINK_CMD_RATE_DEL - do */ +void devlink_rate_del_req_free(struct devlink_rate_del_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->rate_node_name); + free(req); +} + +int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RATE_DEL, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.rate_node_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + /* ============== DEVLINK_CMD_LINECARD_GET ============== */ /* DEVLINK_CMD_LINECARD_GET - do */ void devlink_linecard_get_req_free(struct devlink_linecard_get_req *req) @@ -2842,6 +6630,41 @@ free_list: return NULL; } +/* ============== DEVLINK_CMD_LINECARD_SET ============== */ +/* DEVLINK_CMD_LINECARD_SET - do */ +void devlink_linecard_set_req_free(struct devlink_linecard_set_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req->linecard_type); + free(req); +} + +int devlink_linecard_set(struct ynl_sock *ys, + struct devlink_linecard_set_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_LINECARD_SET, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.linecard_index) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_LINECARD_INDEX, req->linecard_index); + if (req->_present.linecard_type_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_LINECARD_TYPE, req->linecard_type); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + /* ============== DEVLINK_CMD_SELFTESTS_GET ============== */ /* DEVLINK_CMD_SELFTESTS_GET - do */ void devlink_selftests_get_req_free(struct devlink_selftests_get_req *req) @@ -2972,6 +6795,39 @@ free_list: return NULL; } +/* ============== DEVLINK_CMD_SELFTESTS_RUN ============== */ +/* DEVLINK_CMD_SELFTESTS_RUN - do */ +void devlink_selftests_run_req_free(struct devlink_selftests_run_req *req) +{ + free(req->bus_name); + free(req->dev_name); + devlink_dl_selftest_id_free(&req->selftests); + free(req); +} + +int devlink_selftests_run(struct ynl_sock *ys, + struct devlink_selftests_run_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SELFTESTS_RUN, 1); + ys->req_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + if (req->_present.selftests) + devlink_dl_selftest_id_put(nlh, DEVLINK_ATTR_SELFTESTS, &req->selftests); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + const struct ynl_family ynl_devlink_family = { .name = "devlink", }; diff --git a/tools/net/ynl/generated/devlink-user.h b/tools/net/ynl/generated/devlink-user.h index d00bcf79fa0d..9f45cc0d854c 100644 --- a/tools/net/ynl/generated/devlink-user.h +++ b/tools/net/ynl/generated/devlink-user.h @@ -9,6 +9,7 @@ #include #include #include +#include #include struct ynl_sock; @@ -18,8 +19,130 @@ extern const struct ynl_family ynl_devlink_family; /* Enums */ const char *devlink_op_str(int op); const char *devlink_sb_pool_type_str(enum devlink_sb_pool_type value); +const char *devlink_port_type_str(enum devlink_port_type value); +const char *devlink_port_flavour_str(enum devlink_port_flavour value); +const char *devlink_port_fn_state_str(enum devlink_port_fn_state value); +const char *devlink_port_fn_opstate_str(enum devlink_port_fn_opstate value); +const char *devlink_port_fn_attr_cap_str(enum devlink_port_fn_attr_cap value); +const char * +devlink_sb_threshold_type_str(enum devlink_sb_threshold_type value); +const char *devlink_eswitch_mode_str(enum devlink_eswitch_mode value); +const char * +devlink_eswitch_inline_mode_str(enum devlink_eswitch_inline_mode value); +const char * +devlink_eswitch_encap_mode_str(enum devlink_eswitch_encap_mode value); +const char *devlink_dpipe_match_type_str(enum devlink_dpipe_match_type value); +const char * +devlink_dpipe_action_type_str(enum devlink_dpipe_action_type value); +const char * +devlink_dpipe_field_mapping_type_str(enum devlink_dpipe_field_mapping_type value); +const char *devlink_resource_unit_str(enum devlink_resource_unit value); +const char *devlink_reload_action_str(enum devlink_reload_action value); +const char *devlink_param_cmode_str(enum devlink_param_cmode value); +const char *devlink_flash_overwrite_str(enum devlink_flash_overwrite value); +const char *devlink_trap_action_str(enum devlink_trap_action value); /* Common nested types */ +struct devlink_dl_dpipe_match { + struct { + __u32 dpipe_match_type:1; + __u32 dpipe_header_id:1; + __u32 dpipe_header_global:1; + __u32 dpipe_header_index:1; + __u32 dpipe_field_id:1; + } _present; + + enum devlink_dpipe_match_type dpipe_match_type; + __u32 dpipe_header_id; + __u8 dpipe_header_global; + __u32 dpipe_header_index; + __u32 dpipe_field_id; +}; + +struct devlink_dl_dpipe_match_value { + struct { + __u32 dpipe_value_len; + __u32 dpipe_value_mask_len; + __u32 dpipe_value_mapping:1; + } _present; + + unsigned int n_dpipe_match; + struct devlink_dl_dpipe_match *dpipe_match; + void *dpipe_value; + void *dpipe_value_mask; + __u32 dpipe_value_mapping; +}; + +struct devlink_dl_dpipe_action { + struct { + __u32 dpipe_action_type:1; + __u32 dpipe_header_id:1; + __u32 dpipe_header_global:1; + __u32 dpipe_header_index:1; + __u32 dpipe_field_id:1; + } _present; + + enum devlink_dpipe_action_type dpipe_action_type; + __u32 dpipe_header_id; + __u8 dpipe_header_global; + __u32 dpipe_header_index; + __u32 dpipe_field_id; +}; + +struct devlink_dl_dpipe_action_value { + struct { + __u32 dpipe_value_len; + __u32 dpipe_value_mask_len; + __u32 dpipe_value_mapping:1; + } _present; + + unsigned int n_dpipe_action; + struct devlink_dl_dpipe_action *dpipe_action; + void *dpipe_value; + void *dpipe_value_mask; + __u32 dpipe_value_mapping; +}; + +struct devlink_dl_dpipe_field { + struct { + __u32 dpipe_field_name_len; + __u32 dpipe_field_id:1; + __u32 dpipe_field_bitwidth:1; + __u32 dpipe_field_mapping_type:1; + } _present; + + char *dpipe_field_name; + __u32 dpipe_field_id; + __u32 dpipe_field_bitwidth; + enum devlink_dpipe_field_mapping_type dpipe_field_mapping_type; +}; + +struct devlink_dl_resource { + struct { + __u32 resource_name_len; + __u32 resource_id:1; + __u32 resource_size:1; + __u32 resource_size_new:1; + __u32 resource_size_valid:1; + __u32 resource_size_min:1; + __u32 resource_size_max:1; + __u32 resource_size_gran:1; + __u32 resource_unit:1; + __u32 resource_occ:1; + } _present; + + char *resource_name; + __u64 resource_id; + __u64 resource_size; + __u64 resource_size_new; + __u8 resource_size_valid; + __u64 resource_size_min; + __u64 resource_size_max; + __u64 resource_size_gran; + enum devlink_resource_unit resource_unit; + __u64 resource_occ; +}; + struct devlink_dl_info_version { struct { __u32 info_version_name_len; @@ -30,6 +153,32 @@ struct devlink_dl_info_version { char *info_version_value; }; +struct devlink_dl_fmsg { + struct { + __u32 fmsg_obj_nest_start:1; + __u32 fmsg_pair_nest_start:1; + __u32 fmsg_arr_nest_start:1; + __u32 fmsg_nest_end:1; + __u32 fmsg_obj_name_len; + } _present; + + char *fmsg_obj_name; +}; + +struct devlink_dl_port_function { + struct { + __u32 hw_addr_len; + __u32 state:1; + __u32 opstate:1; + __u32 caps:1; + } _present; + + void *hw_addr; + enum devlink_port_fn_state state; + enum devlink_port_fn_opstate opstate; + struct nla_bitfield32 caps; +}; + struct devlink_dl_reload_stats_entry { struct { __u32 reload_stats_limit:1; @@ -45,21 +194,120 @@ struct devlink_dl_reload_act_stats { struct devlink_dl_reload_stats_entry *reload_stats_entry; }; +struct devlink_dl_selftest_id { + struct { + __u32 flash:1; + } _present; +}; + +struct devlink_dl_dpipe_table_matches { + unsigned int n_dpipe_match; + struct devlink_dl_dpipe_match *dpipe_match; +}; + +struct devlink_dl_dpipe_table_actions { + unsigned int n_dpipe_action; + struct devlink_dl_dpipe_action *dpipe_action; +}; + +struct devlink_dl_dpipe_entry_match_values { + unsigned int n_dpipe_match_value; + struct devlink_dl_dpipe_match_value *dpipe_match_value; +}; + +struct devlink_dl_dpipe_entry_action_values { + unsigned int n_dpipe_action_value; + struct devlink_dl_dpipe_action_value *dpipe_action_value; +}; + +struct devlink_dl_dpipe_header_fields { + unsigned int n_dpipe_field; + struct devlink_dl_dpipe_field *dpipe_field; +}; + +struct devlink_dl_resource_list { + unsigned int n_resource; + struct devlink_dl_resource *resource; +}; + struct devlink_dl_reload_act_info { struct { __u32 reload_action:1; } _present; - __u8 reload_action; + enum devlink_reload_action reload_action; unsigned int n_reload_action_stats; struct devlink_dl_reload_act_stats *reload_action_stats; }; +struct devlink_dl_dpipe_table { + struct { + __u32 dpipe_table_name_len; + __u32 dpipe_table_size:1; + __u32 dpipe_table_matches:1; + __u32 dpipe_table_actions:1; + __u32 dpipe_table_counters_enabled:1; + __u32 dpipe_table_resource_id:1; + __u32 dpipe_table_resource_units:1; + } _present; + + char *dpipe_table_name; + __u64 dpipe_table_size; + struct devlink_dl_dpipe_table_matches dpipe_table_matches; + struct devlink_dl_dpipe_table_actions dpipe_table_actions; + __u8 dpipe_table_counters_enabled; + __u64 dpipe_table_resource_id; + __u64 dpipe_table_resource_units; +}; + +struct devlink_dl_dpipe_entry { + struct { + __u32 dpipe_entry_index:1; + __u32 dpipe_entry_match_values:1; + __u32 dpipe_entry_action_values:1; + __u32 dpipe_entry_counter:1; + } _present; + + __u64 dpipe_entry_index; + struct devlink_dl_dpipe_entry_match_values dpipe_entry_match_values; + struct devlink_dl_dpipe_entry_action_values dpipe_entry_action_values; + __u64 dpipe_entry_counter; +}; + +struct devlink_dl_dpipe_header { + struct { + __u32 dpipe_header_name_len; + __u32 dpipe_header_id:1; + __u32 dpipe_header_global:1; + __u32 dpipe_header_fields:1; + } _present; + + char *dpipe_header_name; + __u32 dpipe_header_id; + __u8 dpipe_header_global; + struct devlink_dl_dpipe_header_fields dpipe_header_fields; +}; + struct devlink_dl_reload_stats { unsigned int n_reload_action_info; struct devlink_dl_reload_act_info *reload_action_info; }; +struct devlink_dl_dpipe_tables { + unsigned int n_dpipe_table; + struct devlink_dl_dpipe_table *dpipe_table; +}; + +struct devlink_dl_dpipe_entries { + unsigned int n_dpipe_entry; + struct devlink_dl_dpipe_entry *dpipe_entry; +}; + +struct devlink_dl_dpipe_headers { + unsigned int n_dpipe_header; + struct devlink_dl_dpipe_header *dpipe_header; +}; + struct devlink_dl_dev_stats { struct { __u32 reload_stats:1; @@ -269,29 +517,33 @@ struct devlink_port_get_rsp_list * devlink_port_get_dump(struct ynl_sock *ys, struct devlink_port_get_req_dump *req); -/* ============== DEVLINK_CMD_SB_GET ============== */ -/* DEVLINK_CMD_SB_GET - do */ -struct devlink_sb_get_req { +/* ============== DEVLINK_CMD_PORT_SET ============== */ +/* DEVLINK_CMD_PORT_SET - do */ +struct devlink_port_set_req { struct { __u32 bus_name_len; __u32 dev_name_len; - __u32 sb_index:1; + __u32 port_index:1; + __u32 port_type:1; + __u32 port_function:1; } _present; char *bus_name; char *dev_name; - __u32 sb_index; + __u32 port_index; + enum devlink_port_type port_type; + struct devlink_dl_port_function port_function; }; -static inline struct devlink_sb_get_req *devlink_sb_get_req_alloc(void) +static inline struct devlink_port_set_req *devlink_port_set_req_alloc(void) { - return calloc(1, sizeof(struct devlink_sb_get_req)); + return calloc(1, sizeof(struct devlink_port_set_req)); } -void devlink_sb_get_req_free(struct devlink_sb_get_req *req); +void devlink_port_set_req_free(struct devlink_port_set_req *req); static inline void -devlink_sb_get_req_set_bus_name(struct devlink_sb_get_req *req, - const char *bus_name) +devlink_port_set_req_set_bus_name(struct devlink_port_set_req *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -300,8 +552,8 @@ devlink_sb_get_req_set_bus_name(struct devlink_sb_get_req *req, req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_sb_get_req_set_dev_name(struct devlink_sb_get_req *req, - const char *dev_name) +devlink_port_set_req_set_dev_name(struct devlink_port_set_req *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -310,53 +562,89 @@ devlink_sb_get_req_set_dev_name(struct devlink_sb_get_req *req, req->dev_name[req->_present.dev_name_len] = 0; } static inline void -devlink_sb_get_req_set_sb_index(struct devlink_sb_get_req *req, __u32 sb_index) +devlink_port_set_req_set_port_index(struct devlink_port_set_req *req, + __u32 port_index) { - req->_present.sb_index = 1; - req->sb_index = sb_index; + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_port_set_req_set_port_type(struct devlink_port_set_req *req, + enum devlink_port_type port_type) +{ + req->_present.port_type = 1; + req->port_type = port_type; +} +static inline void +devlink_port_set_req_set_port_function_hw_addr(struct devlink_port_set_req *req, + const void *hw_addr, size_t len) +{ + free(req->port_function.hw_addr); + req->port_function._present.hw_addr_len = len; + req->port_function.hw_addr = malloc(req->port_function._present.hw_addr_len); + memcpy(req->port_function.hw_addr, hw_addr, req->port_function._present.hw_addr_len); +} +static inline void +devlink_port_set_req_set_port_function_state(struct devlink_port_set_req *req, + enum devlink_port_fn_state state) +{ + req->_present.port_function = 1; + req->port_function._present.state = 1; + req->port_function.state = state; +} +static inline void +devlink_port_set_req_set_port_function_opstate(struct devlink_port_set_req *req, + enum devlink_port_fn_opstate opstate) +{ + req->_present.port_function = 1; + req->port_function._present.opstate = 1; + req->port_function.opstate = opstate; +} +static inline void +devlink_port_set_req_set_port_function_caps(struct devlink_port_set_req *req, + struct nla_bitfield32 *caps) +{ + req->_present.port_function = 1; + req->port_function._present.caps = 1; + memcpy(&req->port_function.caps, caps, sizeof(struct nla_bitfield32)); } - -struct devlink_sb_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 sb_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 sb_index; -}; - -void devlink_sb_get_rsp_free(struct devlink_sb_get_rsp *rsp); /* - * Get shared buffer instances. + * Set devlink port instances. */ -struct devlink_sb_get_rsp * -devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req); +int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req); -/* DEVLINK_CMD_SB_GET - dump */ -struct devlink_sb_get_req_dump { +/* ============== DEVLINK_CMD_PORT_NEW ============== */ +/* DEVLINK_CMD_PORT_NEW - do */ +struct devlink_port_new_req { struct { __u32 bus_name_len; __u32 dev_name_len; + __u32 port_index:1; + __u32 port_flavour:1; + __u32 port_pci_pf_number:1; + __u32 port_pci_sf_number:1; + __u32 port_controller_number:1; } _present; char *bus_name; char *dev_name; + __u32 port_index; + enum devlink_port_flavour port_flavour; + __u16 port_pci_pf_number; + __u32 port_pci_sf_number; + __u32 port_controller_number; }; -static inline struct devlink_sb_get_req_dump * -devlink_sb_get_req_dump_alloc(void) +static inline struct devlink_port_new_req *devlink_port_new_req_alloc(void) { - return calloc(1, sizeof(struct devlink_sb_get_req_dump)); + return calloc(1, sizeof(struct devlink_port_new_req)); } -void devlink_sb_get_req_dump_free(struct devlink_sb_get_req_dump *req); +void devlink_port_new_req_free(struct devlink_port_new_req *req); static inline void -devlink_sb_get_req_dump_set_bus_name(struct devlink_sb_get_req_dump *req, - const char *bus_name) +devlink_port_new_req_set_bus_name(struct devlink_port_new_req *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -365,8 +653,8 @@ devlink_sb_get_req_dump_set_bus_name(struct devlink_sb_get_req_dump *req, req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_sb_get_req_dump_set_dev_name(struct devlink_sb_get_req_dump *req, - const char *dev_name) +devlink_port_new_req_set_dev_name(struct devlink_port_new_req *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -374,119 +662,85 @@ devlink_sb_get_req_dump_set_dev_name(struct devlink_sb_get_req_dump *req, memcpy(req->dev_name, dev_name, req->_present.dev_name_len); req->dev_name[req->_present.dev_name_len] = 0; } - -struct devlink_sb_get_list { - struct devlink_sb_get_list *next; - struct devlink_sb_get_rsp obj __attribute__ ((aligned (8))); -}; - -void devlink_sb_get_list_free(struct devlink_sb_get_list *rsp); - -struct devlink_sb_get_list * -devlink_sb_get_dump(struct ynl_sock *ys, struct devlink_sb_get_req_dump *req); - -/* ============== DEVLINK_CMD_SB_POOL_GET ============== */ -/* DEVLINK_CMD_SB_POOL_GET - do */ -struct devlink_sb_pool_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 sb_index:1; - __u32 sb_pool_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 sb_index; - __u16 sb_pool_index; -}; - -static inline struct devlink_sb_pool_get_req * -devlink_sb_pool_get_req_alloc(void) +static inline void +devlink_port_new_req_set_port_index(struct devlink_port_new_req *req, + __u32 port_index) { - return calloc(1, sizeof(struct devlink_sb_pool_get_req)); + req->_present.port_index = 1; + req->port_index = port_index; } -void devlink_sb_pool_get_req_free(struct devlink_sb_pool_get_req *req); - static inline void -devlink_sb_pool_get_req_set_bus_name(struct devlink_sb_pool_get_req *req, - const char *bus_name) +devlink_port_new_req_set_port_flavour(struct devlink_port_new_req *req, + enum devlink_port_flavour port_flavour) { - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; + req->_present.port_flavour = 1; + req->port_flavour = port_flavour; } static inline void -devlink_sb_pool_get_req_set_dev_name(struct devlink_sb_pool_get_req *req, - const char *dev_name) +devlink_port_new_req_set_port_pci_pf_number(struct devlink_port_new_req *req, + __u16 port_pci_pf_number) { - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; + req->_present.port_pci_pf_number = 1; + req->port_pci_pf_number = port_pci_pf_number; } static inline void -devlink_sb_pool_get_req_set_sb_index(struct devlink_sb_pool_get_req *req, - __u32 sb_index) +devlink_port_new_req_set_port_pci_sf_number(struct devlink_port_new_req *req, + __u32 port_pci_sf_number) { - req->_present.sb_index = 1; - req->sb_index = sb_index; + req->_present.port_pci_sf_number = 1; + req->port_pci_sf_number = port_pci_sf_number; } static inline void -devlink_sb_pool_get_req_set_sb_pool_index(struct devlink_sb_pool_get_req *req, - __u16 sb_pool_index) +devlink_port_new_req_set_port_controller_number(struct devlink_port_new_req *req, + __u32 port_controller_number) { - req->_present.sb_pool_index = 1; - req->sb_pool_index = sb_pool_index; + req->_present.port_controller_number = 1; + req->port_controller_number = port_controller_number; } -struct devlink_sb_pool_get_rsp { +struct devlink_port_new_rsp { struct { __u32 bus_name_len; __u32 dev_name_len; - __u32 sb_index:1; - __u32 sb_pool_index:1; + __u32 port_index:1; } _present; char *bus_name; char *dev_name; - __u32 sb_index; - __u16 sb_pool_index; + __u32 port_index; }; -void devlink_sb_pool_get_rsp_free(struct devlink_sb_pool_get_rsp *rsp); +void devlink_port_new_rsp_free(struct devlink_port_new_rsp *rsp); /* - * Get shared buffer pool instances. + * Create devlink port instances. */ -struct devlink_sb_pool_get_rsp * -devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req); +struct devlink_port_new_rsp * +devlink_port_new(struct ynl_sock *ys, struct devlink_port_new_req *req); -/* DEVLINK_CMD_SB_POOL_GET - dump */ -struct devlink_sb_pool_get_req_dump { +/* ============== DEVLINK_CMD_PORT_DEL ============== */ +/* DEVLINK_CMD_PORT_DEL - do */ +struct devlink_port_del_req { struct { __u32 bus_name_len; __u32 dev_name_len; + __u32 port_index:1; } _present; char *bus_name; char *dev_name; + __u32 port_index; }; -static inline struct devlink_sb_pool_get_req_dump * -devlink_sb_pool_get_req_dump_alloc(void) +static inline struct devlink_port_del_req *devlink_port_del_req_alloc(void) { - return calloc(1, sizeof(struct devlink_sb_pool_get_req_dump)); + return calloc(1, sizeof(struct devlink_port_del_req)); } -void -devlink_sb_pool_get_req_dump_free(struct devlink_sb_pool_get_req_dump *req); +void devlink_port_del_req_free(struct devlink_port_del_req *req); static inline void -devlink_sb_pool_get_req_dump_set_bus_name(struct devlink_sb_pool_get_req_dump *req, - const char *bus_name) +devlink_port_del_req_set_bus_name(struct devlink_port_del_req *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -495,8 +749,8 @@ devlink_sb_pool_get_req_dump_set_bus_name(struct devlink_sb_pool_get_req_dump *r req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_sb_pool_get_req_dump_set_dev_name(struct devlink_sb_pool_get_req_dump *req, - const char *dev_name) +devlink_port_del_req_set_dev_name(struct devlink_port_del_req *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -504,47 +758,44 @@ devlink_sb_pool_get_req_dump_set_dev_name(struct devlink_sb_pool_get_req_dump *r memcpy(req->dev_name, dev_name, req->_present.dev_name_len); req->dev_name[req->_present.dev_name_len] = 0; } +static inline void +devlink_port_del_req_set_port_index(struct devlink_port_del_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} -struct devlink_sb_pool_get_list { - struct devlink_sb_pool_get_list *next; - struct devlink_sb_pool_get_rsp obj __attribute__ ((aligned (8))); -}; - -void devlink_sb_pool_get_list_free(struct devlink_sb_pool_get_list *rsp); - -struct devlink_sb_pool_get_list * -devlink_sb_pool_get_dump(struct ynl_sock *ys, - struct devlink_sb_pool_get_req_dump *req); +/* + * Delete devlink port instances. + */ +int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req); -/* ============== DEVLINK_CMD_SB_PORT_POOL_GET ============== */ -/* DEVLINK_CMD_SB_PORT_POOL_GET - do */ -struct devlink_sb_port_pool_get_req { +/* ============== DEVLINK_CMD_PORT_SPLIT ============== */ +/* DEVLINK_CMD_PORT_SPLIT - do */ +struct devlink_port_split_req { struct { __u32 bus_name_len; __u32 dev_name_len; __u32 port_index:1; - __u32 sb_index:1; - __u32 sb_pool_index:1; + __u32 port_split_count:1; } _present; char *bus_name; char *dev_name; __u32 port_index; - __u32 sb_index; - __u16 sb_pool_index; + __u32 port_split_count; }; -static inline struct devlink_sb_port_pool_get_req * -devlink_sb_port_pool_get_req_alloc(void) +static inline struct devlink_port_split_req *devlink_port_split_req_alloc(void) { - return calloc(1, sizeof(struct devlink_sb_port_pool_get_req)); + return calloc(1, sizeof(struct devlink_port_split_req)); } -void -devlink_sb_port_pool_get_req_free(struct devlink_sb_port_pool_get_req *req); +void devlink_port_split_req_free(struct devlink_port_split_req *req); static inline void -devlink_sb_port_pool_get_req_set_bus_name(struct devlink_sb_port_pool_get_req *req, - const char *bus_name) +devlink_port_split_req_set_bus_name(struct devlink_port_split_req *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -553,8 +804,8 @@ devlink_sb_port_pool_get_req_set_bus_name(struct devlink_sb_port_pool_get_req *r req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_sb_port_pool_get_req_set_dev_name(struct devlink_sb_port_pool_get_req *req, - const char *dev_name) +devlink_port_split_req_set_dev_name(struct devlink_port_split_req *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -563,75 +814,49 @@ devlink_sb_port_pool_get_req_set_dev_name(struct devlink_sb_port_pool_get_req *r req->dev_name[req->_present.dev_name_len] = 0; } static inline void -devlink_sb_port_pool_get_req_set_port_index(struct devlink_sb_port_pool_get_req *req, - __u32 port_index) +devlink_port_split_req_set_port_index(struct devlink_port_split_req *req, + __u32 port_index) { req->_present.port_index = 1; req->port_index = port_index; } static inline void -devlink_sb_port_pool_get_req_set_sb_index(struct devlink_sb_port_pool_get_req *req, - __u32 sb_index) -{ - req->_present.sb_index = 1; - req->sb_index = sb_index; -} -static inline void -devlink_sb_port_pool_get_req_set_sb_pool_index(struct devlink_sb_port_pool_get_req *req, - __u16 sb_pool_index) +devlink_port_split_req_set_port_split_count(struct devlink_port_split_req *req, + __u32 port_split_count) { - req->_present.sb_pool_index = 1; - req->sb_pool_index = sb_pool_index; + req->_present.port_split_count = 1; + req->port_split_count = port_split_count; } -struct devlink_sb_port_pool_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 sb_index:1; - __u32 sb_pool_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - __u32 sb_index; - __u16 sb_pool_index; -}; - -void -devlink_sb_port_pool_get_rsp_free(struct devlink_sb_port_pool_get_rsp *rsp); - /* - * Get shared buffer port-pool combinations and threshold. + * Split devlink port instances. */ -struct devlink_sb_port_pool_get_rsp * -devlink_sb_port_pool_get(struct ynl_sock *ys, - struct devlink_sb_port_pool_get_req *req); +int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req); -/* DEVLINK_CMD_SB_PORT_POOL_GET - dump */ -struct devlink_sb_port_pool_get_req_dump { +/* ============== DEVLINK_CMD_PORT_UNSPLIT ============== */ +/* DEVLINK_CMD_PORT_UNSPLIT - do */ +struct devlink_port_unsplit_req { struct { __u32 bus_name_len; __u32 dev_name_len; + __u32 port_index:1; } _present; char *bus_name; char *dev_name; + __u32 port_index; }; -static inline struct devlink_sb_port_pool_get_req_dump * -devlink_sb_port_pool_get_req_dump_alloc(void) +static inline struct devlink_port_unsplit_req * +devlink_port_unsplit_req_alloc(void) { - return calloc(1, sizeof(struct devlink_sb_port_pool_get_req_dump)); + return calloc(1, sizeof(struct devlink_port_unsplit_req)); } -void -devlink_sb_port_pool_get_req_dump_free(struct devlink_sb_port_pool_get_req_dump *req); +void devlink_port_unsplit_req_free(struct devlink_port_unsplit_req *req); static inline void -devlink_sb_port_pool_get_req_dump_set_bus_name(struct devlink_sb_port_pool_get_req_dump *req, - const char *bus_name) +devlink_port_unsplit_req_set_bus_name(struct devlink_port_unsplit_req *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -640,8 +865,8 @@ devlink_sb_port_pool_get_req_dump_set_bus_name(struct devlink_sb_port_pool_get_r req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_sb_port_pool_get_req_dump_set_dev_name(struct devlink_sb_port_pool_get_req_dump *req, - const char *dev_name) +devlink_port_unsplit_req_set_dev_name(struct devlink_port_unsplit_req *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -649,50 +874,43 @@ devlink_sb_port_pool_get_req_dump_set_dev_name(struct devlink_sb_port_pool_get_r memcpy(req->dev_name, dev_name, req->_present.dev_name_len); req->dev_name[req->_present.dev_name_len] = 0; } +static inline void +devlink_port_unsplit_req_set_port_index(struct devlink_port_unsplit_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} -struct devlink_sb_port_pool_get_list { - struct devlink_sb_port_pool_get_list *next; - struct devlink_sb_port_pool_get_rsp obj __attribute__ ((aligned (8))); -}; - -void -devlink_sb_port_pool_get_list_free(struct devlink_sb_port_pool_get_list *rsp); - -struct devlink_sb_port_pool_get_list * -devlink_sb_port_pool_get_dump(struct ynl_sock *ys, - struct devlink_sb_port_pool_get_req_dump *req); +/* + * Unplit devlink port instances. + */ +int devlink_port_unsplit(struct ynl_sock *ys, + struct devlink_port_unsplit_req *req); -/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_GET ============== */ -/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */ -struct devlink_sb_tc_pool_bind_get_req { +/* ============== DEVLINK_CMD_SB_GET ============== */ +/* DEVLINK_CMD_SB_GET - do */ +struct devlink_sb_get_req { struct { __u32 bus_name_len; __u32 dev_name_len; - __u32 port_index:1; __u32 sb_index:1; - __u32 sb_pool_type:1; - __u32 sb_tc_index:1; } _present; char *bus_name; char *dev_name; - __u32 port_index; __u32 sb_index; - enum devlink_sb_pool_type sb_pool_type; - __u16 sb_tc_index; }; -static inline struct devlink_sb_tc_pool_bind_get_req * -devlink_sb_tc_pool_bind_get_req_alloc(void) +static inline struct devlink_sb_get_req *devlink_sb_get_req_alloc(void) { - return calloc(1, sizeof(struct devlink_sb_tc_pool_bind_get_req)); + return calloc(1, sizeof(struct devlink_sb_get_req)); } -void -devlink_sb_tc_pool_bind_get_req_free(struct devlink_sb_tc_pool_bind_get_req *req); +void devlink_sb_get_req_free(struct devlink_sb_get_req *req); static inline void -devlink_sb_tc_pool_bind_get_req_set_bus_name(struct devlink_sb_tc_pool_bind_get_req *req, - const char *bus_name) +devlink_sb_get_req_set_bus_name(struct devlink_sb_get_req *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -701,8 +919,8 @@ devlink_sb_tc_pool_bind_get_req_set_bus_name(struct devlink_sb_tc_pool_bind_get_ req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_sb_tc_pool_bind_get_req_set_dev_name(struct devlink_sb_tc_pool_bind_get_req *req, - const char *dev_name) +devlink_sb_get_req_set_dev_name(struct devlink_sb_get_req *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -711,64 +929,34 @@ devlink_sb_tc_pool_bind_get_req_set_dev_name(struct devlink_sb_tc_pool_bind_get_ req->dev_name[req->_present.dev_name_len] = 0; } static inline void -devlink_sb_tc_pool_bind_get_req_set_port_index(struct devlink_sb_tc_pool_bind_get_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_sb_tc_pool_bind_get_req_set_sb_index(struct devlink_sb_tc_pool_bind_get_req *req, - __u32 sb_index) +devlink_sb_get_req_set_sb_index(struct devlink_sb_get_req *req, __u32 sb_index) { req->_present.sb_index = 1; req->sb_index = sb_index; } -static inline void -devlink_sb_tc_pool_bind_get_req_set_sb_pool_type(struct devlink_sb_tc_pool_bind_get_req *req, - enum devlink_sb_pool_type sb_pool_type) -{ - req->_present.sb_pool_type = 1; - req->sb_pool_type = sb_pool_type; -} -static inline void -devlink_sb_tc_pool_bind_get_req_set_sb_tc_index(struct devlink_sb_tc_pool_bind_get_req *req, - __u16 sb_tc_index) -{ - req->_present.sb_tc_index = 1; - req->sb_tc_index = sb_tc_index; -} -struct devlink_sb_tc_pool_bind_get_rsp { +struct devlink_sb_get_rsp { struct { __u32 bus_name_len; __u32 dev_name_len; - __u32 port_index:1; __u32 sb_index:1; - __u32 sb_pool_type:1; - __u32 sb_tc_index:1; } _present; char *bus_name; char *dev_name; - __u32 port_index; __u32 sb_index; - enum devlink_sb_pool_type sb_pool_type; - __u16 sb_tc_index; }; -void -devlink_sb_tc_pool_bind_get_rsp_free(struct devlink_sb_tc_pool_bind_get_rsp *rsp); +void devlink_sb_get_rsp_free(struct devlink_sb_get_rsp *rsp); /* - * Get shared buffer port-TC to pool bindings and threshold. + * Get shared buffer instances. */ -struct devlink_sb_tc_pool_bind_get_rsp * -devlink_sb_tc_pool_bind_get(struct ynl_sock *ys, - struct devlink_sb_tc_pool_bind_get_req *req); +struct devlink_sb_get_rsp * +devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req); -/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */ -struct devlink_sb_tc_pool_bind_get_req_dump { +/* DEVLINK_CMD_SB_GET - dump */ +struct devlink_sb_get_req_dump { struct { __u32 bus_name_len; __u32 dev_name_len; @@ -778,17 +966,16 @@ struct devlink_sb_tc_pool_bind_get_req_dump { char *dev_name; }; -static inline struct devlink_sb_tc_pool_bind_get_req_dump * -devlink_sb_tc_pool_bind_get_req_dump_alloc(void) +static inline struct devlink_sb_get_req_dump * +devlink_sb_get_req_dump_alloc(void) { - return calloc(1, sizeof(struct devlink_sb_tc_pool_bind_get_req_dump)); + return calloc(1, sizeof(struct devlink_sb_get_req_dump)); } -void -devlink_sb_tc_pool_bind_get_req_dump_free(struct devlink_sb_tc_pool_bind_get_req_dump *req); +void devlink_sb_get_req_dump_free(struct devlink_sb_get_req_dump *req); static inline void -devlink_sb_tc_pool_bind_get_req_dump_set_bus_name(struct devlink_sb_tc_pool_bind_get_req_dump *req, - const char *bus_name) +devlink_sb_get_req_dump_set_bus_name(struct devlink_sb_get_req_dump *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -797,8 +984,8 @@ devlink_sb_tc_pool_bind_get_req_dump_set_bus_name(struct devlink_sb_tc_pool_bind req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_sb_tc_pool_bind_get_req_dump_set_dev_name(struct devlink_sb_tc_pool_bind_get_req_dump *req, - const char *dev_name) +devlink_sb_get_req_dump_set_dev_name(struct devlink_sb_get_req_dump *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -807,41 +994,42 @@ devlink_sb_tc_pool_bind_get_req_dump_set_dev_name(struct devlink_sb_tc_pool_bind req->dev_name[req->_present.dev_name_len] = 0; } -struct devlink_sb_tc_pool_bind_get_list { - struct devlink_sb_tc_pool_bind_get_list *next; - struct devlink_sb_tc_pool_bind_get_rsp obj __attribute__ ((aligned (8))); +struct devlink_sb_get_list { + struct devlink_sb_get_list *next; + struct devlink_sb_get_rsp obj __attribute__ ((aligned (8))); }; -void -devlink_sb_tc_pool_bind_get_list_free(struct devlink_sb_tc_pool_bind_get_list *rsp); +void devlink_sb_get_list_free(struct devlink_sb_get_list *rsp); -struct devlink_sb_tc_pool_bind_get_list * -devlink_sb_tc_pool_bind_get_dump(struct ynl_sock *ys, - struct devlink_sb_tc_pool_bind_get_req_dump *req); +struct devlink_sb_get_list * +devlink_sb_get_dump(struct ynl_sock *ys, struct devlink_sb_get_req_dump *req); -/* ============== DEVLINK_CMD_PARAM_GET ============== */ -/* DEVLINK_CMD_PARAM_GET - do */ -struct devlink_param_get_req { +/* ============== DEVLINK_CMD_SB_POOL_GET ============== */ +/* DEVLINK_CMD_SB_POOL_GET - do */ +struct devlink_sb_pool_get_req { struct { __u32 bus_name_len; __u32 dev_name_len; - __u32 param_name_len; + __u32 sb_index:1; + __u32 sb_pool_index:1; } _present; char *bus_name; char *dev_name; - char *param_name; + __u32 sb_index; + __u16 sb_pool_index; }; -static inline struct devlink_param_get_req *devlink_param_get_req_alloc(void) +static inline struct devlink_sb_pool_get_req * +devlink_sb_pool_get_req_alloc(void) { - return calloc(1, sizeof(struct devlink_param_get_req)); + return calloc(1, sizeof(struct devlink_sb_pool_get_req)); } -void devlink_param_get_req_free(struct devlink_param_get_req *req); +void devlink_sb_pool_get_req_free(struct devlink_sb_pool_get_req *req); static inline void -devlink_param_get_req_set_bus_name(struct devlink_param_get_req *req, - const char *bus_name) +devlink_sb_pool_get_req_set_bus_name(struct devlink_sb_pool_get_req *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -850,8 +1038,8 @@ devlink_param_get_req_set_bus_name(struct devlink_param_get_req *req, req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_param_get_req_set_dev_name(struct devlink_param_get_req *req, - const char *dev_name) +devlink_sb_pool_get_req_set_dev_name(struct devlink_sb_pool_get_req *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -860,38 +1048,44 @@ devlink_param_get_req_set_dev_name(struct devlink_param_get_req *req, req->dev_name[req->_present.dev_name_len] = 0; } static inline void -devlink_param_get_req_set_param_name(struct devlink_param_get_req *req, - const char *param_name) +devlink_sb_pool_get_req_set_sb_index(struct devlink_sb_pool_get_req *req, + __u32 sb_index) { - free(req->param_name); - req->_present.param_name_len = strlen(param_name); - req->param_name = malloc(req->_present.param_name_len + 1); - memcpy(req->param_name, param_name, req->_present.param_name_len); - req->param_name[req->_present.param_name_len] = 0; + req->_present.sb_index = 1; + req->sb_index = sb_index; +} +static inline void +devlink_sb_pool_get_req_set_sb_pool_index(struct devlink_sb_pool_get_req *req, + __u16 sb_pool_index) +{ + req->_present.sb_pool_index = 1; + req->sb_pool_index = sb_pool_index; } -struct devlink_param_get_rsp { +struct devlink_sb_pool_get_rsp { struct { __u32 bus_name_len; __u32 dev_name_len; - __u32 param_name_len; + __u32 sb_index:1; + __u32 sb_pool_index:1; } _present; char *bus_name; char *dev_name; - char *param_name; + __u32 sb_index; + __u16 sb_pool_index; }; -void devlink_param_get_rsp_free(struct devlink_param_get_rsp *rsp); +void devlink_sb_pool_get_rsp_free(struct devlink_sb_pool_get_rsp *rsp); /* - * Get param instances. + * Get shared buffer pool instances. */ -struct devlink_param_get_rsp * -devlink_param_get(struct ynl_sock *ys, struct devlink_param_get_req *req); +struct devlink_sb_pool_get_rsp * +devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req); -/* DEVLINK_CMD_PARAM_GET - dump */ -struct devlink_param_get_req_dump { +/* DEVLINK_CMD_SB_POOL_GET - dump */ +struct devlink_sb_pool_get_req_dump { struct { __u32 bus_name_len; __u32 dev_name_len; @@ -901,16 +1095,2307 @@ struct devlink_param_get_req_dump { char *dev_name; }; -static inline struct devlink_param_get_req_dump * -devlink_param_get_req_dump_alloc(void) +static inline struct devlink_sb_pool_get_req_dump * +devlink_sb_pool_get_req_dump_alloc(void) { - return calloc(1, sizeof(struct devlink_param_get_req_dump)); + return calloc(1, sizeof(struct devlink_sb_pool_get_req_dump)); } -void devlink_param_get_req_dump_free(struct devlink_param_get_req_dump *req); +void +devlink_sb_pool_get_req_dump_free(struct devlink_sb_pool_get_req_dump *req); + +static inline void +devlink_sb_pool_get_req_dump_set_bus_name(struct devlink_sb_pool_get_req_dump *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_sb_pool_get_req_dump_set_dev_name(struct devlink_sb_pool_get_req_dump *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} + +struct devlink_sb_pool_get_list { + struct devlink_sb_pool_get_list *next; + struct devlink_sb_pool_get_rsp obj __attribute__ ((aligned (8))); +}; + +void devlink_sb_pool_get_list_free(struct devlink_sb_pool_get_list *rsp); + +struct devlink_sb_pool_get_list * +devlink_sb_pool_get_dump(struct ynl_sock *ys, + struct devlink_sb_pool_get_req_dump *req); + +/* ============== DEVLINK_CMD_SB_POOL_SET ============== */ +/* DEVLINK_CMD_SB_POOL_SET - do */ +struct devlink_sb_pool_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 sb_index:1; + __u32 sb_pool_index:1; + __u32 sb_pool_threshold_type:1; + __u32 sb_pool_size:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 sb_index; + __u16 sb_pool_index; + enum devlink_sb_threshold_type sb_pool_threshold_type; + __u32 sb_pool_size; +}; + +static inline struct devlink_sb_pool_set_req * +devlink_sb_pool_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_sb_pool_set_req)); +} +void devlink_sb_pool_set_req_free(struct devlink_sb_pool_set_req *req); + +static inline void +devlink_sb_pool_set_req_set_bus_name(struct devlink_sb_pool_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_sb_pool_set_req_set_dev_name(struct devlink_sb_pool_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_sb_pool_set_req_set_sb_index(struct devlink_sb_pool_set_req *req, + __u32 sb_index) +{ + req->_present.sb_index = 1; + req->sb_index = sb_index; +} +static inline void +devlink_sb_pool_set_req_set_sb_pool_index(struct devlink_sb_pool_set_req *req, + __u16 sb_pool_index) +{ + req->_present.sb_pool_index = 1; + req->sb_pool_index = sb_pool_index; +} +static inline void +devlink_sb_pool_set_req_set_sb_pool_threshold_type(struct devlink_sb_pool_set_req *req, + enum devlink_sb_threshold_type sb_pool_threshold_type) +{ + req->_present.sb_pool_threshold_type = 1; + req->sb_pool_threshold_type = sb_pool_threshold_type; +} +static inline void +devlink_sb_pool_set_req_set_sb_pool_size(struct devlink_sb_pool_set_req *req, + __u32 sb_pool_size) +{ + req->_present.sb_pool_size = 1; + req->sb_pool_size = sb_pool_size; +} + +/* + * Set shared buffer pool instances. + */ +int devlink_sb_pool_set(struct ynl_sock *ys, + struct devlink_sb_pool_set_req *req); + +/* ============== DEVLINK_CMD_SB_PORT_POOL_GET ============== */ +/* DEVLINK_CMD_SB_PORT_POOL_GET - do */ +struct devlink_sb_port_pool_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 sb_index:1; + __u32 sb_pool_index:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + __u32 sb_index; + __u16 sb_pool_index; +}; + +static inline struct devlink_sb_port_pool_get_req * +devlink_sb_port_pool_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_sb_port_pool_get_req)); +} +void +devlink_sb_port_pool_get_req_free(struct devlink_sb_port_pool_get_req *req); + +static inline void +devlink_sb_port_pool_get_req_set_bus_name(struct devlink_sb_port_pool_get_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_sb_port_pool_get_req_set_dev_name(struct devlink_sb_port_pool_get_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_sb_port_pool_get_req_set_port_index(struct devlink_sb_port_pool_get_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_sb_port_pool_get_req_set_sb_index(struct devlink_sb_port_pool_get_req *req, + __u32 sb_index) +{ + req->_present.sb_index = 1; + req->sb_index = sb_index; +} +static inline void +devlink_sb_port_pool_get_req_set_sb_pool_index(struct devlink_sb_port_pool_get_req *req, + __u16 sb_pool_index) +{ + req->_present.sb_pool_index = 1; + req->sb_pool_index = sb_pool_index; +} + +struct devlink_sb_port_pool_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 sb_index:1; + __u32 sb_pool_index:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + __u32 sb_index; + __u16 sb_pool_index; +}; + +void +devlink_sb_port_pool_get_rsp_free(struct devlink_sb_port_pool_get_rsp *rsp); + +/* + * Get shared buffer port-pool combinations and threshold. + */ +struct devlink_sb_port_pool_get_rsp * +devlink_sb_port_pool_get(struct ynl_sock *ys, + struct devlink_sb_port_pool_get_req *req); + +/* DEVLINK_CMD_SB_PORT_POOL_GET - dump */ +struct devlink_sb_port_pool_get_req_dump { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + } _present; + + char *bus_name; + char *dev_name; +}; + +static inline struct devlink_sb_port_pool_get_req_dump * +devlink_sb_port_pool_get_req_dump_alloc(void) +{ + return calloc(1, sizeof(struct devlink_sb_port_pool_get_req_dump)); +} +void +devlink_sb_port_pool_get_req_dump_free(struct devlink_sb_port_pool_get_req_dump *req); + +static inline void +devlink_sb_port_pool_get_req_dump_set_bus_name(struct devlink_sb_port_pool_get_req_dump *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_sb_port_pool_get_req_dump_set_dev_name(struct devlink_sb_port_pool_get_req_dump *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} + +struct devlink_sb_port_pool_get_list { + struct devlink_sb_port_pool_get_list *next; + struct devlink_sb_port_pool_get_rsp obj __attribute__ ((aligned (8))); +}; + +void +devlink_sb_port_pool_get_list_free(struct devlink_sb_port_pool_get_list *rsp); + +struct devlink_sb_port_pool_get_list * +devlink_sb_port_pool_get_dump(struct ynl_sock *ys, + struct devlink_sb_port_pool_get_req_dump *req); + +/* ============== DEVLINK_CMD_SB_PORT_POOL_SET ============== */ +/* DEVLINK_CMD_SB_PORT_POOL_SET - do */ +struct devlink_sb_port_pool_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 sb_index:1; + __u32 sb_pool_index:1; + __u32 sb_threshold:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + __u32 sb_index; + __u16 sb_pool_index; + __u32 sb_threshold; +}; + +static inline struct devlink_sb_port_pool_set_req * +devlink_sb_port_pool_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_sb_port_pool_set_req)); +} +void +devlink_sb_port_pool_set_req_free(struct devlink_sb_port_pool_set_req *req); + +static inline void +devlink_sb_port_pool_set_req_set_bus_name(struct devlink_sb_port_pool_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_sb_port_pool_set_req_set_dev_name(struct devlink_sb_port_pool_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_sb_port_pool_set_req_set_port_index(struct devlink_sb_port_pool_set_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_sb_port_pool_set_req_set_sb_index(struct devlink_sb_port_pool_set_req *req, + __u32 sb_index) +{ + req->_present.sb_index = 1; + req->sb_index = sb_index; +} +static inline void +devlink_sb_port_pool_set_req_set_sb_pool_index(struct devlink_sb_port_pool_set_req *req, + __u16 sb_pool_index) +{ + req->_present.sb_pool_index = 1; + req->sb_pool_index = sb_pool_index; +} +static inline void +devlink_sb_port_pool_set_req_set_sb_threshold(struct devlink_sb_port_pool_set_req *req, + __u32 sb_threshold) +{ + req->_present.sb_threshold = 1; + req->sb_threshold = sb_threshold; +} + +/* + * Set shared buffer port-pool combinations and threshold. + */ +int devlink_sb_port_pool_set(struct ynl_sock *ys, + struct devlink_sb_port_pool_set_req *req); + +/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_GET ============== */ +/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */ +struct devlink_sb_tc_pool_bind_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 sb_index:1; + __u32 sb_pool_type:1; + __u32 sb_tc_index:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + __u32 sb_index; + enum devlink_sb_pool_type sb_pool_type; + __u16 sb_tc_index; +}; + +static inline struct devlink_sb_tc_pool_bind_get_req * +devlink_sb_tc_pool_bind_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_sb_tc_pool_bind_get_req)); +} +void +devlink_sb_tc_pool_bind_get_req_free(struct devlink_sb_tc_pool_bind_get_req *req); + +static inline void +devlink_sb_tc_pool_bind_get_req_set_bus_name(struct devlink_sb_tc_pool_bind_get_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_sb_tc_pool_bind_get_req_set_dev_name(struct devlink_sb_tc_pool_bind_get_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_sb_tc_pool_bind_get_req_set_port_index(struct devlink_sb_tc_pool_bind_get_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_sb_tc_pool_bind_get_req_set_sb_index(struct devlink_sb_tc_pool_bind_get_req *req, + __u32 sb_index) +{ + req->_present.sb_index = 1; + req->sb_index = sb_index; +} +static inline void +devlink_sb_tc_pool_bind_get_req_set_sb_pool_type(struct devlink_sb_tc_pool_bind_get_req *req, + enum devlink_sb_pool_type sb_pool_type) +{ + req->_present.sb_pool_type = 1; + req->sb_pool_type = sb_pool_type; +} +static inline void +devlink_sb_tc_pool_bind_get_req_set_sb_tc_index(struct devlink_sb_tc_pool_bind_get_req *req, + __u16 sb_tc_index) +{ + req->_present.sb_tc_index = 1; + req->sb_tc_index = sb_tc_index; +} + +struct devlink_sb_tc_pool_bind_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 sb_index:1; + __u32 sb_pool_type:1; + __u32 sb_tc_index:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + __u32 sb_index; + enum devlink_sb_pool_type sb_pool_type; + __u16 sb_tc_index; +}; + +void +devlink_sb_tc_pool_bind_get_rsp_free(struct devlink_sb_tc_pool_bind_get_rsp *rsp); + +/* + * Get shared buffer port-TC to pool bindings and threshold. + */ +struct devlink_sb_tc_pool_bind_get_rsp * +devlink_sb_tc_pool_bind_get(struct ynl_sock *ys, + struct devlink_sb_tc_pool_bind_get_req *req); + +/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */ +struct devlink_sb_tc_pool_bind_get_req_dump { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + } _present; + + char *bus_name; + char *dev_name; +}; + +static inline struct devlink_sb_tc_pool_bind_get_req_dump * +devlink_sb_tc_pool_bind_get_req_dump_alloc(void) +{ + return calloc(1, sizeof(struct devlink_sb_tc_pool_bind_get_req_dump)); +} +void +devlink_sb_tc_pool_bind_get_req_dump_free(struct devlink_sb_tc_pool_bind_get_req_dump *req); + +static inline void +devlink_sb_tc_pool_bind_get_req_dump_set_bus_name(struct devlink_sb_tc_pool_bind_get_req_dump *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_sb_tc_pool_bind_get_req_dump_set_dev_name(struct devlink_sb_tc_pool_bind_get_req_dump *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} + +struct devlink_sb_tc_pool_bind_get_list { + struct devlink_sb_tc_pool_bind_get_list *next; + struct devlink_sb_tc_pool_bind_get_rsp obj __attribute__ ((aligned (8))); +}; + +void +devlink_sb_tc_pool_bind_get_list_free(struct devlink_sb_tc_pool_bind_get_list *rsp); + +struct devlink_sb_tc_pool_bind_get_list * +devlink_sb_tc_pool_bind_get_dump(struct ynl_sock *ys, + struct devlink_sb_tc_pool_bind_get_req_dump *req); + +/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_SET ============== */ +/* DEVLINK_CMD_SB_TC_POOL_BIND_SET - do */ +struct devlink_sb_tc_pool_bind_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 sb_index:1; + __u32 sb_pool_index:1; + __u32 sb_pool_type:1; + __u32 sb_tc_index:1; + __u32 sb_threshold:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + __u32 sb_index; + __u16 sb_pool_index; + enum devlink_sb_pool_type sb_pool_type; + __u16 sb_tc_index; + __u32 sb_threshold; +}; + +static inline struct devlink_sb_tc_pool_bind_set_req * +devlink_sb_tc_pool_bind_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_sb_tc_pool_bind_set_req)); +} +void +devlink_sb_tc_pool_bind_set_req_free(struct devlink_sb_tc_pool_bind_set_req *req); + +static inline void +devlink_sb_tc_pool_bind_set_req_set_bus_name(struct devlink_sb_tc_pool_bind_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_sb_tc_pool_bind_set_req_set_dev_name(struct devlink_sb_tc_pool_bind_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_sb_tc_pool_bind_set_req_set_port_index(struct devlink_sb_tc_pool_bind_set_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_sb_tc_pool_bind_set_req_set_sb_index(struct devlink_sb_tc_pool_bind_set_req *req, + __u32 sb_index) +{ + req->_present.sb_index = 1; + req->sb_index = sb_index; +} +static inline void +devlink_sb_tc_pool_bind_set_req_set_sb_pool_index(struct devlink_sb_tc_pool_bind_set_req *req, + __u16 sb_pool_index) +{ + req->_present.sb_pool_index = 1; + req->sb_pool_index = sb_pool_index; +} +static inline void +devlink_sb_tc_pool_bind_set_req_set_sb_pool_type(struct devlink_sb_tc_pool_bind_set_req *req, + enum devlink_sb_pool_type sb_pool_type) +{ + req->_present.sb_pool_type = 1; + req->sb_pool_type = sb_pool_type; +} +static inline void +devlink_sb_tc_pool_bind_set_req_set_sb_tc_index(struct devlink_sb_tc_pool_bind_set_req *req, + __u16 sb_tc_index) +{ + req->_present.sb_tc_index = 1; + req->sb_tc_index = sb_tc_index; +} +static inline void +devlink_sb_tc_pool_bind_set_req_set_sb_threshold(struct devlink_sb_tc_pool_bind_set_req *req, + __u32 sb_threshold) +{ + req->_present.sb_threshold = 1; + req->sb_threshold = sb_threshold; +} + +/* + * Set shared buffer port-TC to pool bindings and threshold. + */ +int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys, + struct devlink_sb_tc_pool_bind_set_req *req); + +/* ============== DEVLINK_CMD_SB_OCC_SNAPSHOT ============== */ +/* DEVLINK_CMD_SB_OCC_SNAPSHOT - do */ +struct devlink_sb_occ_snapshot_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 sb_index:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 sb_index; +}; + +static inline struct devlink_sb_occ_snapshot_req * +devlink_sb_occ_snapshot_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_sb_occ_snapshot_req)); +} +void devlink_sb_occ_snapshot_req_free(struct devlink_sb_occ_snapshot_req *req); + +static inline void +devlink_sb_occ_snapshot_req_set_bus_name(struct devlink_sb_occ_snapshot_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_sb_occ_snapshot_req_set_dev_name(struct devlink_sb_occ_snapshot_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_sb_occ_snapshot_req_set_sb_index(struct devlink_sb_occ_snapshot_req *req, + __u32 sb_index) +{ + req->_present.sb_index = 1; + req->sb_index = sb_index; +} + +/* + * Take occupancy snapshot of shared buffer. + */ +int devlink_sb_occ_snapshot(struct ynl_sock *ys, + struct devlink_sb_occ_snapshot_req *req); + +/* ============== DEVLINK_CMD_SB_OCC_MAX_CLEAR ============== */ +/* DEVLINK_CMD_SB_OCC_MAX_CLEAR - do */ +struct devlink_sb_occ_max_clear_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 sb_index:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 sb_index; +}; + +static inline struct devlink_sb_occ_max_clear_req * +devlink_sb_occ_max_clear_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_sb_occ_max_clear_req)); +} +void +devlink_sb_occ_max_clear_req_free(struct devlink_sb_occ_max_clear_req *req); + +static inline void +devlink_sb_occ_max_clear_req_set_bus_name(struct devlink_sb_occ_max_clear_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_sb_occ_max_clear_req_set_dev_name(struct devlink_sb_occ_max_clear_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_sb_occ_max_clear_req_set_sb_index(struct devlink_sb_occ_max_clear_req *req, + __u32 sb_index) +{ + req->_present.sb_index = 1; + req->sb_index = sb_index; +} + +/* + * Clear occupancy watermarks of shared buffer. + */ +int devlink_sb_occ_max_clear(struct ynl_sock *ys, + struct devlink_sb_occ_max_clear_req *req); + +/* ============== DEVLINK_CMD_ESWITCH_GET ============== */ +/* DEVLINK_CMD_ESWITCH_GET - do */ +struct devlink_eswitch_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + } _present; + + char *bus_name; + char *dev_name; +}; + +static inline struct devlink_eswitch_get_req * +devlink_eswitch_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_eswitch_get_req)); +} +void devlink_eswitch_get_req_free(struct devlink_eswitch_get_req *req); + +static inline void +devlink_eswitch_get_req_set_bus_name(struct devlink_eswitch_get_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_eswitch_get_req_set_dev_name(struct devlink_eswitch_get_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} + +struct devlink_eswitch_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 eswitch_mode:1; + __u32 eswitch_inline_mode:1; + __u32 eswitch_encap_mode:1; + } _present; + + char *bus_name; + char *dev_name; + enum devlink_eswitch_mode eswitch_mode; + enum devlink_eswitch_inline_mode eswitch_inline_mode; + enum devlink_eswitch_encap_mode eswitch_encap_mode; +}; + +void devlink_eswitch_get_rsp_free(struct devlink_eswitch_get_rsp *rsp); + +/* + * Get eswitch attributes. + */ +struct devlink_eswitch_get_rsp * +devlink_eswitch_get(struct ynl_sock *ys, struct devlink_eswitch_get_req *req); + +/* ============== DEVLINK_CMD_ESWITCH_SET ============== */ +/* DEVLINK_CMD_ESWITCH_SET - do */ +struct devlink_eswitch_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 eswitch_mode:1; + __u32 eswitch_inline_mode:1; + __u32 eswitch_encap_mode:1; + } _present; + + char *bus_name; + char *dev_name; + enum devlink_eswitch_mode eswitch_mode; + enum devlink_eswitch_inline_mode eswitch_inline_mode; + enum devlink_eswitch_encap_mode eswitch_encap_mode; +}; + +static inline struct devlink_eswitch_set_req * +devlink_eswitch_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_eswitch_set_req)); +} +void devlink_eswitch_set_req_free(struct devlink_eswitch_set_req *req); + +static inline void +devlink_eswitch_set_req_set_bus_name(struct devlink_eswitch_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_eswitch_set_req_set_dev_name(struct devlink_eswitch_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_eswitch_set_req_set_eswitch_mode(struct devlink_eswitch_set_req *req, + enum devlink_eswitch_mode eswitch_mode) +{ + req->_present.eswitch_mode = 1; + req->eswitch_mode = eswitch_mode; +} +static inline void +devlink_eswitch_set_req_set_eswitch_inline_mode(struct devlink_eswitch_set_req *req, + enum devlink_eswitch_inline_mode eswitch_inline_mode) +{ + req->_present.eswitch_inline_mode = 1; + req->eswitch_inline_mode = eswitch_inline_mode; +} +static inline void +devlink_eswitch_set_req_set_eswitch_encap_mode(struct devlink_eswitch_set_req *req, + enum devlink_eswitch_encap_mode eswitch_encap_mode) +{ + req->_present.eswitch_encap_mode = 1; + req->eswitch_encap_mode = eswitch_encap_mode; +} + +/* + * Set eswitch attributes. + */ +int devlink_eswitch_set(struct ynl_sock *ys, + struct devlink_eswitch_set_req *req); + +/* ============== DEVLINK_CMD_DPIPE_TABLE_GET ============== */ +/* DEVLINK_CMD_DPIPE_TABLE_GET - do */ +struct devlink_dpipe_table_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 dpipe_table_name_len; + } _present; + + char *bus_name; + char *dev_name; + char *dpipe_table_name; +}; + +static inline struct devlink_dpipe_table_get_req * +devlink_dpipe_table_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_dpipe_table_get_req)); +} +void devlink_dpipe_table_get_req_free(struct devlink_dpipe_table_get_req *req); + +static inline void +devlink_dpipe_table_get_req_set_bus_name(struct devlink_dpipe_table_get_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_dpipe_table_get_req_set_dev_name(struct devlink_dpipe_table_get_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_dpipe_table_get_req_set_dpipe_table_name(struct devlink_dpipe_table_get_req *req, + const char *dpipe_table_name) +{ + free(req->dpipe_table_name); + req->_present.dpipe_table_name_len = strlen(dpipe_table_name); + req->dpipe_table_name = malloc(req->_present.dpipe_table_name_len + 1); + memcpy(req->dpipe_table_name, dpipe_table_name, req->_present.dpipe_table_name_len); + req->dpipe_table_name[req->_present.dpipe_table_name_len] = 0; +} + +struct devlink_dpipe_table_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 dpipe_tables:1; + } _present; + + char *bus_name; + char *dev_name; + struct devlink_dl_dpipe_tables dpipe_tables; +}; + +void devlink_dpipe_table_get_rsp_free(struct devlink_dpipe_table_get_rsp *rsp); + +/* + * Get dpipe table attributes. + */ +struct devlink_dpipe_table_get_rsp * +devlink_dpipe_table_get(struct ynl_sock *ys, + struct devlink_dpipe_table_get_req *req); + +/* ============== DEVLINK_CMD_DPIPE_ENTRIES_GET ============== */ +/* DEVLINK_CMD_DPIPE_ENTRIES_GET - do */ +struct devlink_dpipe_entries_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 dpipe_table_name_len; + } _present; + + char *bus_name; + char *dev_name; + char *dpipe_table_name; +}; + +static inline struct devlink_dpipe_entries_get_req * +devlink_dpipe_entries_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_dpipe_entries_get_req)); +} +void +devlink_dpipe_entries_get_req_free(struct devlink_dpipe_entries_get_req *req); + +static inline void +devlink_dpipe_entries_get_req_set_bus_name(struct devlink_dpipe_entries_get_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_dpipe_entries_get_req_set_dev_name(struct devlink_dpipe_entries_get_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_dpipe_entries_get_req_set_dpipe_table_name(struct devlink_dpipe_entries_get_req *req, + const char *dpipe_table_name) +{ + free(req->dpipe_table_name); + req->_present.dpipe_table_name_len = strlen(dpipe_table_name); + req->dpipe_table_name = malloc(req->_present.dpipe_table_name_len + 1); + memcpy(req->dpipe_table_name, dpipe_table_name, req->_present.dpipe_table_name_len); + req->dpipe_table_name[req->_present.dpipe_table_name_len] = 0; +} + +struct devlink_dpipe_entries_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 dpipe_entries:1; + } _present; + + char *bus_name; + char *dev_name; + struct devlink_dl_dpipe_entries dpipe_entries; +}; + +void +devlink_dpipe_entries_get_rsp_free(struct devlink_dpipe_entries_get_rsp *rsp); + +/* + * Get dpipe entries attributes. + */ +struct devlink_dpipe_entries_get_rsp * +devlink_dpipe_entries_get(struct ynl_sock *ys, + struct devlink_dpipe_entries_get_req *req); + +/* ============== DEVLINK_CMD_DPIPE_HEADERS_GET ============== */ +/* DEVLINK_CMD_DPIPE_HEADERS_GET - do */ +struct devlink_dpipe_headers_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + } _present; + + char *bus_name; + char *dev_name; +}; + +static inline struct devlink_dpipe_headers_get_req * +devlink_dpipe_headers_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_dpipe_headers_get_req)); +} +void +devlink_dpipe_headers_get_req_free(struct devlink_dpipe_headers_get_req *req); + +static inline void +devlink_dpipe_headers_get_req_set_bus_name(struct devlink_dpipe_headers_get_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_dpipe_headers_get_req_set_dev_name(struct devlink_dpipe_headers_get_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} + +struct devlink_dpipe_headers_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 dpipe_headers:1; + } _present; + + char *bus_name; + char *dev_name; + struct devlink_dl_dpipe_headers dpipe_headers; +}; + +void +devlink_dpipe_headers_get_rsp_free(struct devlink_dpipe_headers_get_rsp *rsp); + +/* + * Get dpipe headers attributes. + */ +struct devlink_dpipe_headers_get_rsp * +devlink_dpipe_headers_get(struct ynl_sock *ys, + struct devlink_dpipe_headers_get_req *req); + +/* ============== DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET ============== */ +/* DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET - do */ +struct devlink_dpipe_table_counters_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 dpipe_table_name_len; + __u32 dpipe_table_counters_enabled:1; + } _present; + + char *bus_name; + char *dev_name; + char *dpipe_table_name; + __u8 dpipe_table_counters_enabled; +}; + +static inline struct devlink_dpipe_table_counters_set_req * +devlink_dpipe_table_counters_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_dpipe_table_counters_set_req)); +} +void +devlink_dpipe_table_counters_set_req_free(struct devlink_dpipe_table_counters_set_req *req); + +static inline void +devlink_dpipe_table_counters_set_req_set_bus_name(struct devlink_dpipe_table_counters_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_dpipe_table_counters_set_req_set_dev_name(struct devlink_dpipe_table_counters_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_dpipe_table_counters_set_req_set_dpipe_table_name(struct devlink_dpipe_table_counters_set_req *req, + const char *dpipe_table_name) +{ + free(req->dpipe_table_name); + req->_present.dpipe_table_name_len = strlen(dpipe_table_name); + req->dpipe_table_name = malloc(req->_present.dpipe_table_name_len + 1); + memcpy(req->dpipe_table_name, dpipe_table_name, req->_present.dpipe_table_name_len); + req->dpipe_table_name[req->_present.dpipe_table_name_len] = 0; +} +static inline void +devlink_dpipe_table_counters_set_req_set_dpipe_table_counters_enabled(struct devlink_dpipe_table_counters_set_req *req, + __u8 dpipe_table_counters_enabled) +{ + req->_present.dpipe_table_counters_enabled = 1; + req->dpipe_table_counters_enabled = dpipe_table_counters_enabled; +} + +/* + * Set dpipe counter attributes. + */ +int devlink_dpipe_table_counters_set(struct ynl_sock *ys, + struct devlink_dpipe_table_counters_set_req *req); + +/* ============== DEVLINK_CMD_RESOURCE_SET ============== */ +/* DEVLINK_CMD_RESOURCE_SET - do */ +struct devlink_resource_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 resource_id:1; + __u32 resource_size:1; + } _present; + + char *bus_name; + char *dev_name; + __u64 resource_id; + __u64 resource_size; +}; + +static inline struct devlink_resource_set_req * +devlink_resource_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_resource_set_req)); +} +void devlink_resource_set_req_free(struct devlink_resource_set_req *req); + +static inline void +devlink_resource_set_req_set_bus_name(struct devlink_resource_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_resource_set_req_set_dev_name(struct devlink_resource_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_resource_set_req_set_resource_id(struct devlink_resource_set_req *req, + __u64 resource_id) +{ + req->_present.resource_id = 1; + req->resource_id = resource_id; +} +static inline void +devlink_resource_set_req_set_resource_size(struct devlink_resource_set_req *req, + __u64 resource_size) +{ + req->_present.resource_size = 1; + req->resource_size = resource_size; +} + +/* + * Set resource attributes. + */ +int devlink_resource_set(struct ynl_sock *ys, + struct devlink_resource_set_req *req); + +/* ============== DEVLINK_CMD_RESOURCE_DUMP ============== */ +/* DEVLINK_CMD_RESOURCE_DUMP - do */ +struct devlink_resource_dump_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + } _present; + + char *bus_name; + char *dev_name; +}; + +static inline struct devlink_resource_dump_req * +devlink_resource_dump_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_resource_dump_req)); +} +void devlink_resource_dump_req_free(struct devlink_resource_dump_req *req); + +static inline void +devlink_resource_dump_req_set_bus_name(struct devlink_resource_dump_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_resource_dump_req_set_dev_name(struct devlink_resource_dump_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} + +struct devlink_resource_dump_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 resource_list:1; + } _present; + + char *bus_name; + char *dev_name; + struct devlink_dl_resource_list resource_list; +}; + +void devlink_resource_dump_rsp_free(struct devlink_resource_dump_rsp *rsp); + +/* + * Get resource attributes. + */ +struct devlink_resource_dump_rsp * +devlink_resource_dump(struct ynl_sock *ys, + struct devlink_resource_dump_req *req); + +/* ============== DEVLINK_CMD_RELOAD ============== */ +/* DEVLINK_CMD_RELOAD - do */ +struct devlink_reload_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 reload_action:1; + __u32 reload_limits:1; + __u32 netns_pid:1; + __u32 netns_fd:1; + __u32 netns_id:1; + } _present; + + char *bus_name; + char *dev_name; + enum devlink_reload_action reload_action; + struct nla_bitfield32 reload_limits; + __u32 netns_pid; + __u32 netns_fd; + __u32 netns_id; +}; + +static inline struct devlink_reload_req *devlink_reload_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_reload_req)); +} +void devlink_reload_req_free(struct devlink_reload_req *req); + +static inline void +devlink_reload_req_set_bus_name(struct devlink_reload_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_reload_req_set_dev_name(struct devlink_reload_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_reload_req_set_reload_action(struct devlink_reload_req *req, + enum devlink_reload_action reload_action) +{ + req->_present.reload_action = 1; + req->reload_action = reload_action; +} +static inline void +devlink_reload_req_set_reload_limits(struct devlink_reload_req *req, + struct nla_bitfield32 *reload_limits) +{ + req->_present.reload_limits = 1; + memcpy(&req->reload_limits, reload_limits, sizeof(struct nla_bitfield32)); +} +static inline void +devlink_reload_req_set_netns_pid(struct devlink_reload_req *req, + __u32 netns_pid) +{ + req->_present.netns_pid = 1; + req->netns_pid = netns_pid; +} +static inline void +devlink_reload_req_set_netns_fd(struct devlink_reload_req *req, __u32 netns_fd) +{ + req->_present.netns_fd = 1; + req->netns_fd = netns_fd; +} +static inline void +devlink_reload_req_set_netns_id(struct devlink_reload_req *req, __u32 netns_id) +{ + req->_present.netns_id = 1; + req->netns_id = netns_id; +} + +struct devlink_reload_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 reload_actions_performed:1; + } _present; + + char *bus_name; + char *dev_name; + struct nla_bitfield32 reload_actions_performed; +}; + +void devlink_reload_rsp_free(struct devlink_reload_rsp *rsp); + +/* + * Reload devlink. + */ +struct devlink_reload_rsp * +devlink_reload(struct ynl_sock *ys, struct devlink_reload_req *req); + +/* ============== DEVLINK_CMD_PARAM_GET ============== */ +/* DEVLINK_CMD_PARAM_GET - do */ +struct devlink_param_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 param_name_len; + } _present; + + char *bus_name; + char *dev_name; + char *param_name; +}; + +static inline struct devlink_param_get_req *devlink_param_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_param_get_req)); +} +void devlink_param_get_req_free(struct devlink_param_get_req *req); + +static inline void +devlink_param_get_req_set_bus_name(struct devlink_param_get_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_param_get_req_set_dev_name(struct devlink_param_get_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_param_get_req_set_param_name(struct devlink_param_get_req *req, + const char *param_name) +{ + free(req->param_name); + req->_present.param_name_len = strlen(param_name); + req->param_name = malloc(req->_present.param_name_len + 1); + memcpy(req->param_name, param_name, req->_present.param_name_len); + req->param_name[req->_present.param_name_len] = 0; +} + +struct devlink_param_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 param_name_len; + } _present; + + char *bus_name; + char *dev_name; + char *param_name; +}; + +void devlink_param_get_rsp_free(struct devlink_param_get_rsp *rsp); + +/* + * Get param instances. + */ +struct devlink_param_get_rsp * +devlink_param_get(struct ynl_sock *ys, struct devlink_param_get_req *req); + +/* DEVLINK_CMD_PARAM_GET - dump */ +struct devlink_param_get_req_dump { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + } _present; + + char *bus_name; + char *dev_name; +}; + +static inline struct devlink_param_get_req_dump * +devlink_param_get_req_dump_alloc(void) +{ + return calloc(1, sizeof(struct devlink_param_get_req_dump)); +} +void devlink_param_get_req_dump_free(struct devlink_param_get_req_dump *req); + +static inline void +devlink_param_get_req_dump_set_bus_name(struct devlink_param_get_req_dump *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_param_get_req_dump_set_dev_name(struct devlink_param_get_req_dump *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} + +struct devlink_param_get_list { + struct devlink_param_get_list *next; + struct devlink_param_get_rsp obj __attribute__ ((aligned (8))); +}; + +void devlink_param_get_list_free(struct devlink_param_get_list *rsp); + +struct devlink_param_get_list * +devlink_param_get_dump(struct ynl_sock *ys, + struct devlink_param_get_req_dump *req); + +/* ============== DEVLINK_CMD_PARAM_SET ============== */ +/* DEVLINK_CMD_PARAM_SET - do */ +struct devlink_param_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 param_name_len; + __u32 param_type:1; + __u32 param_value_cmode:1; + } _present; + + char *bus_name; + char *dev_name; + char *param_name; + __u8 param_type; + enum devlink_param_cmode param_value_cmode; +}; + +static inline struct devlink_param_set_req *devlink_param_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_param_set_req)); +} +void devlink_param_set_req_free(struct devlink_param_set_req *req); + +static inline void +devlink_param_set_req_set_bus_name(struct devlink_param_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_param_set_req_set_dev_name(struct devlink_param_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_param_set_req_set_param_name(struct devlink_param_set_req *req, + const char *param_name) +{ + free(req->param_name); + req->_present.param_name_len = strlen(param_name); + req->param_name = malloc(req->_present.param_name_len + 1); + memcpy(req->param_name, param_name, req->_present.param_name_len); + req->param_name[req->_present.param_name_len] = 0; +} +static inline void +devlink_param_set_req_set_param_type(struct devlink_param_set_req *req, + __u8 param_type) +{ + req->_present.param_type = 1; + req->param_type = param_type; +} +static inline void +devlink_param_set_req_set_param_value_cmode(struct devlink_param_set_req *req, + enum devlink_param_cmode param_value_cmode) +{ + req->_present.param_value_cmode = 1; + req->param_value_cmode = param_value_cmode; +} + +/* + * Set param instances. + */ +int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req); + +/* ============== DEVLINK_CMD_REGION_GET ============== */ +/* DEVLINK_CMD_REGION_GET - do */ +struct devlink_region_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 region_name_len; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + char *region_name; +}; + +static inline struct devlink_region_get_req *devlink_region_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_region_get_req)); +} +void devlink_region_get_req_free(struct devlink_region_get_req *req); + +static inline void +devlink_region_get_req_set_bus_name(struct devlink_region_get_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_region_get_req_set_dev_name(struct devlink_region_get_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_region_get_req_set_port_index(struct devlink_region_get_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_region_get_req_set_region_name(struct devlink_region_get_req *req, + const char *region_name) +{ + free(req->region_name); + req->_present.region_name_len = strlen(region_name); + req->region_name = malloc(req->_present.region_name_len + 1); + memcpy(req->region_name, region_name, req->_present.region_name_len); + req->region_name[req->_present.region_name_len] = 0; +} + +struct devlink_region_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 region_name_len; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + char *region_name; +}; + +void devlink_region_get_rsp_free(struct devlink_region_get_rsp *rsp); + +/* + * Get region instances. + */ +struct devlink_region_get_rsp * +devlink_region_get(struct ynl_sock *ys, struct devlink_region_get_req *req); + +/* DEVLINK_CMD_REGION_GET - dump */ +struct devlink_region_get_req_dump { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + } _present; + + char *bus_name; + char *dev_name; +}; + +static inline struct devlink_region_get_req_dump * +devlink_region_get_req_dump_alloc(void) +{ + return calloc(1, sizeof(struct devlink_region_get_req_dump)); +} +void devlink_region_get_req_dump_free(struct devlink_region_get_req_dump *req); + +static inline void +devlink_region_get_req_dump_set_bus_name(struct devlink_region_get_req_dump *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_region_get_req_dump_set_dev_name(struct devlink_region_get_req_dump *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} + +struct devlink_region_get_list { + struct devlink_region_get_list *next; + struct devlink_region_get_rsp obj __attribute__ ((aligned (8))); +}; + +void devlink_region_get_list_free(struct devlink_region_get_list *rsp); + +struct devlink_region_get_list * +devlink_region_get_dump(struct ynl_sock *ys, + struct devlink_region_get_req_dump *req); + +/* ============== DEVLINK_CMD_REGION_NEW ============== */ +/* DEVLINK_CMD_REGION_NEW - do */ +struct devlink_region_new_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 region_name_len; + __u32 region_snapshot_id:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + char *region_name; + __u32 region_snapshot_id; +}; + +static inline struct devlink_region_new_req *devlink_region_new_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_region_new_req)); +} +void devlink_region_new_req_free(struct devlink_region_new_req *req); + +static inline void +devlink_region_new_req_set_bus_name(struct devlink_region_new_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_region_new_req_set_dev_name(struct devlink_region_new_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_region_new_req_set_port_index(struct devlink_region_new_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_region_new_req_set_region_name(struct devlink_region_new_req *req, + const char *region_name) +{ + free(req->region_name); + req->_present.region_name_len = strlen(region_name); + req->region_name = malloc(req->_present.region_name_len + 1); + memcpy(req->region_name, region_name, req->_present.region_name_len); + req->region_name[req->_present.region_name_len] = 0; +} +static inline void +devlink_region_new_req_set_region_snapshot_id(struct devlink_region_new_req *req, + __u32 region_snapshot_id) +{ + req->_present.region_snapshot_id = 1; + req->region_snapshot_id = region_snapshot_id; +} + +struct devlink_region_new_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 region_name_len; + __u32 region_snapshot_id:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + char *region_name; + __u32 region_snapshot_id; +}; + +void devlink_region_new_rsp_free(struct devlink_region_new_rsp *rsp); + +/* + * Create region snapshot. + */ +struct devlink_region_new_rsp * +devlink_region_new(struct ynl_sock *ys, struct devlink_region_new_req *req); + +/* ============== DEVLINK_CMD_REGION_DEL ============== */ +/* DEVLINK_CMD_REGION_DEL - do */ +struct devlink_region_del_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 region_name_len; + __u32 region_snapshot_id:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + char *region_name; + __u32 region_snapshot_id; +}; + +static inline struct devlink_region_del_req *devlink_region_del_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_region_del_req)); +} +void devlink_region_del_req_free(struct devlink_region_del_req *req); + +static inline void +devlink_region_del_req_set_bus_name(struct devlink_region_del_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_region_del_req_set_dev_name(struct devlink_region_del_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_region_del_req_set_port_index(struct devlink_region_del_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_region_del_req_set_region_name(struct devlink_region_del_req *req, + const char *region_name) +{ + free(req->region_name); + req->_present.region_name_len = strlen(region_name); + req->region_name = malloc(req->_present.region_name_len + 1); + memcpy(req->region_name, region_name, req->_present.region_name_len); + req->region_name[req->_present.region_name_len] = 0; +} +static inline void +devlink_region_del_req_set_region_snapshot_id(struct devlink_region_del_req *req, + __u32 region_snapshot_id) +{ + req->_present.region_snapshot_id = 1; + req->region_snapshot_id = region_snapshot_id; +} + +/* + * Delete region snapshot. + */ +int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req); + +/* ============== DEVLINK_CMD_REGION_READ ============== */ +/* DEVLINK_CMD_REGION_READ - dump */ +struct devlink_region_read_req_dump { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 region_name_len; + __u32 region_snapshot_id:1; + __u32 region_direct:1; + __u32 region_chunk_addr:1; + __u32 region_chunk_len:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + char *region_name; + __u32 region_snapshot_id; + __u64 region_chunk_addr; + __u64 region_chunk_len; +}; + +static inline struct devlink_region_read_req_dump * +devlink_region_read_req_dump_alloc(void) +{ + return calloc(1, sizeof(struct devlink_region_read_req_dump)); +} +void +devlink_region_read_req_dump_free(struct devlink_region_read_req_dump *req); + +static inline void +devlink_region_read_req_dump_set_bus_name(struct devlink_region_read_req_dump *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_region_read_req_dump_set_dev_name(struct devlink_region_read_req_dump *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_region_read_req_dump_set_port_index(struct devlink_region_read_req_dump *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_region_read_req_dump_set_region_name(struct devlink_region_read_req_dump *req, + const char *region_name) +{ + free(req->region_name); + req->_present.region_name_len = strlen(region_name); + req->region_name = malloc(req->_present.region_name_len + 1); + memcpy(req->region_name, region_name, req->_present.region_name_len); + req->region_name[req->_present.region_name_len] = 0; +} +static inline void +devlink_region_read_req_dump_set_region_snapshot_id(struct devlink_region_read_req_dump *req, + __u32 region_snapshot_id) +{ + req->_present.region_snapshot_id = 1; + req->region_snapshot_id = region_snapshot_id; +} +static inline void +devlink_region_read_req_dump_set_region_direct(struct devlink_region_read_req_dump *req) +{ + req->_present.region_direct = 1; +} +static inline void +devlink_region_read_req_dump_set_region_chunk_addr(struct devlink_region_read_req_dump *req, + __u64 region_chunk_addr) +{ + req->_present.region_chunk_addr = 1; + req->region_chunk_addr = region_chunk_addr; +} +static inline void +devlink_region_read_req_dump_set_region_chunk_len(struct devlink_region_read_req_dump *req, + __u64 region_chunk_len) +{ + req->_present.region_chunk_len = 1; + req->region_chunk_len = region_chunk_len; +} + +struct devlink_region_read_rsp_dump { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 region_name_len; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + char *region_name; +}; + +struct devlink_region_read_rsp_list { + struct devlink_region_read_rsp_list *next; + struct devlink_region_read_rsp_dump obj __attribute__((aligned(8))); +}; + +void +devlink_region_read_rsp_list_free(struct devlink_region_read_rsp_list *rsp); + +struct devlink_region_read_rsp_list * +devlink_region_read_dump(struct ynl_sock *ys, + struct devlink_region_read_req_dump *req); + +/* ============== DEVLINK_CMD_PORT_PARAM_GET ============== */ +/* DEVLINK_CMD_PORT_PARAM_GET - do */ +struct devlink_port_param_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; +}; + +static inline struct devlink_port_param_get_req * +devlink_port_param_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_port_param_get_req)); +} +void devlink_port_param_get_req_free(struct devlink_port_param_get_req *req); + +static inline void +devlink_port_param_get_req_set_bus_name(struct devlink_port_param_get_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_port_param_get_req_set_dev_name(struct devlink_port_param_get_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_port_param_get_req_set_port_index(struct devlink_port_param_get_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} + +struct devlink_port_param_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; +}; + +void devlink_port_param_get_rsp_free(struct devlink_port_param_get_rsp *rsp); + +/* + * Get port param instances. + */ +struct devlink_port_param_get_rsp * +devlink_port_param_get(struct ynl_sock *ys, + struct devlink_port_param_get_req *req); + +/* DEVLINK_CMD_PORT_PARAM_GET - dump */ +struct devlink_port_param_get_list { + struct devlink_port_param_get_list *next; + struct devlink_port_param_get_rsp obj __attribute__((aligned(8))); +}; + +void devlink_port_param_get_list_free(struct devlink_port_param_get_list *rsp); + +struct devlink_port_param_get_list * +devlink_port_param_get_dump(struct ynl_sock *ys); + +/* ============== DEVLINK_CMD_PORT_PARAM_SET ============== */ +/* DEVLINK_CMD_PORT_PARAM_SET - do */ +struct devlink_port_param_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; +}; + +static inline struct devlink_port_param_set_req * +devlink_port_param_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_port_param_set_req)); +} +void devlink_port_param_set_req_free(struct devlink_port_param_set_req *req); + +static inline void +devlink_port_param_set_req_set_bus_name(struct devlink_port_param_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_port_param_set_req_set_dev_name(struct devlink_port_param_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_port_param_set_req_set_port_index(struct devlink_port_param_set_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} + +/* + * Set port param instances. + */ +int devlink_port_param_set(struct ynl_sock *ys, + struct devlink_port_param_set_req *req); + +/* ============== DEVLINK_CMD_INFO_GET ============== */ +/* DEVLINK_CMD_INFO_GET - do */ +struct devlink_info_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + } _present; + + char *bus_name; + char *dev_name; +}; + +static inline struct devlink_info_get_req *devlink_info_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_info_get_req)); +} +void devlink_info_get_req_free(struct devlink_info_get_req *req); + +static inline void +devlink_info_get_req_set_bus_name(struct devlink_info_get_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_info_get_req_set_dev_name(struct devlink_info_get_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} + +struct devlink_info_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 info_driver_name_len; + __u32 info_serial_number_len; + } _present; + + char *bus_name; + char *dev_name; + char *info_driver_name; + char *info_serial_number; + unsigned int n_info_version_fixed; + struct devlink_dl_info_version *info_version_fixed; + unsigned int n_info_version_running; + struct devlink_dl_info_version *info_version_running; + unsigned int n_info_version_stored; + struct devlink_dl_info_version *info_version_stored; +}; + +void devlink_info_get_rsp_free(struct devlink_info_get_rsp *rsp); + +/* + * Get device information, like driver name, hardware and firmware versions etc. + */ +struct devlink_info_get_rsp * +devlink_info_get(struct ynl_sock *ys, struct devlink_info_get_req *req); + +/* DEVLINK_CMD_INFO_GET - dump */ +struct devlink_info_get_list { + struct devlink_info_get_list *next; + struct devlink_info_get_rsp obj __attribute__ ((aligned (8))); +}; + +void devlink_info_get_list_free(struct devlink_info_get_list *rsp); + +struct devlink_info_get_list *devlink_info_get_dump(struct ynl_sock *ys); + +/* ============== DEVLINK_CMD_HEALTH_REPORTER_GET ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_GET - do */ +struct devlink_health_reporter_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 health_reporter_name_len; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + char *health_reporter_name; +}; + +static inline struct devlink_health_reporter_get_req * +devlink_health_reporter_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_health_reporter_get_req)); +} +void +devlink_health_reporter_get_req_free(struct devlink_health_reporter_get_req *req); + +static inline void +devlink_health_reporter_get_req_set_bus_name(struct devlink_health_reporter_get_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_health_reporter_get_req_set_dev_name(struct devlink_health_reporter_get_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_health_reporter_get_req_set_port_index(struct devlink_health_reporter_get_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_health_reporter_get_req_set_health_reporter_name(struct devlink_health_reporter_get_req *req, + const char *health_reporter_name) +{ + free(req->health_reporter_name); + req->_present.health_reporter_name_len = strlen(health_reporter_name); + req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); + memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); + req->health_reporter_name[req->_present.health_reporter_name_len] = 0; +} + +struct devlink_health_reporter_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 health_reporter_name_len; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + char *health_reporter_name; +}; + +void +devlink_health_reporter_get_rsp_free(struct devlink_health_reporter_get_rsp *rsp); + +/* + * Get health reporter instances. + */ +struct devlink_health_reporter_get_rsp * +devlink_health_reporter_get(struct ynl_sock *ys, + struct devlink_health_reporter_get_req *req); + +/* DEVLINK_CMD_HEALTH_REPORTER_GET - dump */ +struct devlink_health_reporter_get_req_dump { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; +}; + +static inline struct devlink_health_reporter_get_req_dump * +devlink_health_reporter_get_req_dump_alloc(void) +{ + return calloc(1, sizeof(struct devlink_health_reporter_get_req_dump)); +} +void +devlink_health_reporter_get_req_dump_free(struct devlink_health_reporter_get_req_dump *req); static inline void -devlink_param_get_req_dump_set_bus_name(struct devlink_param_get_req_dump *req, - const char *bus_name) +devlink_health_reporter_get_req_dump_set_bus_name(struct devlink_health_reporter_get_req_dump *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -919,8 +3404,8 @@ devlink_param_get_req_dump_set_bus_name(struct devlink_param_get_req_dump *req, req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_param_get_req_dump_set_dev_name(struct devlink_param_get_req_dump *req, - const char *dev_name) +devlink_health_reporter_get_req_dump_set_dev_name(struct devlink_health_reporter_get_req_dump *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -928,43 +3413,59 @@ devlink_param_get_req_dump_set_dev_name(struct devlink_param_get_req_dump *req, memcpy(req->dev_name, dev_name, req->_present.dev_name_len); req->dev_name[req->_present.dev_name_len] = 0; } +static inline void +devlink_health_reporter_get_req_dump_set_port_index(struct devlink_health_reporter_get_req_dump *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} -struct devlink_param_get_list { - struct devlink_param_get_list *next; - struct devlink_param_get_rsp obj __attribute__ ((aligned (8))); +struct devlink_health_reporter_get_list { + struct devlink_health_reporter_get_list *next; + struct devlink_health_reporter_get_rsp obj __attribute__ ((aligned (8))); }; -void devlink_param_get_list_free(struct devlink_param_get_list *rsp); +void +devlink_health_reporter_get_list_free(struct devlink_health_reporter_get_list *rsp); -struct devlink_param_get_list * -devlink_param_get_dump(struct ynl_sock *ys, - struct devlink_param_get_req_dump *req); +struct devlink_health_reporter_get_list * +devlink_health_reporter_get_dump(struct ynl_sock *ys, + struct devlink_health_reporter_get_req_dump *req); -/* ============== DEVLINK_CMD_REGION_GET ============== */ -/* DEVLINK_CMD_REGION_GET - do */ -struct devlink_region_get_req { +/* ============== DEVLINK_CMD_HEALTH_REPORTER_SET ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_SET - do */ +struct devlink_health_reporter_set_req { struct { __u32 bus_name_len; __u32 dev_name_len; __u32 port_index:1; - __u32 region_name_len; + __u32 health_reporter_name_len; + __u32 health_reporter_graceful_period:1; + __u32 health_reporter_auto_recover:1; + __u32 health_reporter_auto_dump:1; } _present; char *bus_name; char *dev_name; __u32 port_index; - char *region_name; + char *health_reporter_name; + __u64 health_reporter_graceful_period; + __u8 health_reporter_auto_recover; + __u8 health_reporter_auto_dump; }; -static inline struct devlink_region_get_req *devlink_region_get_req_alloc(void) +static inline struct devlink_health_reporter_set_req * +devlink_health_reporter_set_req_alloc(void) { - return calloc(1, sizeof(struct devlink_region_get_req)); + return calloc(1, sizeof(struct devlink_health_reporter_set_req)); } -void devlink_region_get_req_free(struct devlink_region_get_req *req); +void +devlink_health_reporter_set_req_free(struct devlink_health_reporter_set_req *req); static inline void -devlink_region_get_req_set_bus_name(struct devlink_region_get_req *req, - const char *bus_name) +devlink_health_reporter_set_req_set_bus_name(struct devlink_health_reporter_set_req *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -973,8 +3474,8 @@ devlink_region_get_req_set_bus_name(struct devlink_region_get_req *req, req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_region_get_req_set_dev_name(struct devlink_region_get_req *req, - const char *dev_name) +devlink_health_reporter_set_req_set_dev_name(struct devlink_health_reporter_set_req *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -983,66 +3484,77 @@ devlink_region_get_req_set_dev_name(struct devlink_region_get_req *req, req->dev_name[req->_present.dev_name_len] = 0; } static inline void -devlink_region_get_req_set_port_index(struct devlink_region_get_req *req, - __u32 port_index) +devlink_health_reporter_set_req_set_port_index(struct devlink_health_reporter_set_req *req, + __u32 port_index) { req->_present.port_index = 1; req->port_index = port_index; } static inline void -devlink_region_get_req_set_region_name(struct devlink_region_get_req *req, - const char *region_name) +devlink_health_reporter_set_req_set_health_reporter_name(struct devlink_health_reporter_set_req *req, + const char *health_reporter_name) { - free(req->region_name); - req->_present.region_name_len = strlen(region_name); - req->region_name = malloc(req->_present.region_name_len + 1); - memcpy(req->region_name, region_name, req->_present.region_name_len); - req->region_name[req->_present.region_name_len] = 0; + free(req->health_reporter_name); + req->_present.health_reporter_name_len = strlen(health_reporter_name); + req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); + memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); + req->health_reporter_name[req->_present.health_reporter_name_len] = 0; +} +static inline void +devlink_health_reporter_set_req_set_health_reporter_graceful_period(struct devlink_health_reporter_set_req *req, + __u64 health_reporter_graceful_period) +{ + req->_present.health_reporter_graceful_period = 1; + req->health_reporter_graceful_period = health_reporter_graceful_period; +} +static inline void +devlink_health_reporter_set_req_set_health_reporter_auto_recover(struct devlink_health_reporter_set_req *req, + __u8 health_reporter_auto_recover) +{ + req->_present.health_reporter_auto_recover = 1; + req->health_reporter_auto_recover = health_reporter_auto_recover; +} +static inline void +devlink_health_reporter_set_req_set_health_reporter_auto_dump(struct devlink_health_reporter_set_req *req, + __u8 health_reporter_auto_dump) +{ + req->_present.health_reporter_auto_dump = 1; + req->health_reporter_auto_dump = health_reporter_auto_dump; } - -struct devlink_region_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 region_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *region_name; -}; - -void devlink_region_get_rsp_free(struct devlink_region_get_rsp *rsp); /* - * Get region instances. + * Set health reporter instances. */ -struct devlink_region_get_rsp * -devlink_region_get(struct ynl_sock *ys, struct devlink_region_get_req *req); +int devlink_health_reporter_set(struct ynl_sock *ys, + struct devlink_health_reporter_set_req *req); -/* DEVLINK_CMD_REGION_GET - dump */ -struct devlink_region_get_req_dump { +/* ============== DEVLINK_CMD_HEALTH_REPORTER_RECOVER ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */ +struct devlink_health_reporter_recover_req { struct { __u32 bus_name_len; __u32 dev_name_len; + __u32 port_index:1; + __u32 health_reporter_name_len; } _present; char *bus_name; char *dev_name; + __u32 port_index; + char *health_reporter_name; }; -static inline struct devlink_region_get_req_dump * -devlink_region_get_req_dump_alloc(void) +static inline struct devlink_health_reporter_recover_req * +devlink_health_reporter_recover_req_alloc(void) { - return calloc(1, sizeof(struct devlink_region_get_req_dump)); + return calloc(1, sizeof(struct devlink_health_reporter_recover_req)); } -void devlink_region_get_req_dump_free(struct devlink_region_get_req_dump *req); +void +devlink_health_reporter_recover_req_free(struct devlink_health_reporter_recover_req *req); static inline void -devlink_region_get_req_dump_set_bus_name(struct devlink_region_get_req_dump *req, - const char *bus_name) +devlink_health_reporter_recover_req_set_bus_name(struct devlink_health_reporter_recover_req *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -1051,8 +3563,8 @@ devlink_region_get_req_dump_set_bus_name(struct devlink_region_get_req_dump *req req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_region_get_req_dump_set_dev_name(struct devlink_region_get_req_dump *req, - const char *dev_name) +devlink_health_reporter_recover_req_set_dev_name(struct devlink_health_reporter_recover_req *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -1060,39 +3572,57 @@ devlink_region_get_req_dump_set_dev_name(struct devlink_region_get_req_dump *req memcpy(req->dev_name, dev_name, req->_present.dev_name_len); req->dev_name[req->_present.dev_name_len] = 0; } +static inline void +devlink_health_reporter_recover_req_set_port_index(struct devlink_health_reporter_recover_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_health_reporter_recover_req_set_health_reporter_name(struct devlink_health_reporter_recover_req *req, + const char *health_reporter_name) +{ + free(req->health_reporter_name); + req->_present.health_reporter_name_len = strlen(health_reporter_name); + req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); + memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); + req->health_reporter_name[req->_present.health_reporter_name_len] = 0; +} -struct devlink_region_get_list { - struct devlink_region_get_list *next; - struct devlink_region_get_rsp obj __attribute__ ((aligned (8))); -}; - -void devlink_region_get_list_free(struct devlink_region_get_list *rsp); - -struct devlink_region_get_list * -devlink_region_get_dump(struct ynl_sock *ys, - struct devlink_region_get_req_dump *req); +/* + * Recover health reporter instances. + */ +int devlink_health_reporter_recover(struct ynl_sock *ys, + struct devlink_health_reporter_recover_req *req); -/* ============== DEVLINK_CMD_INFO_GET ============== */ -/* DEVLINK_CMD_INFO_GET - do */ -struct devlink_info_get_req { +/* ============== DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE - do */ +struct devlink_health_reporter_diagnose_req { struct { __u32 bus_name_len; __u32 dev_name_len; + __u32 port_index:1; + __u32 health_reporter_name_len; } _present; char *bus_name; char *dev_name; + __u32 port_index; + char *health_reporter_name; }; -static inline struct devlink_info_get_req *devlink_info_get_req_alloc(void) +static inline struct devlink_health_reporter_diagnose_req * +devlink_health_reporter_diagnose_req_alloc(void) { - return calloc(1, sizeof(struct devlink_info_get_req)); + return calloc(1, sizeof(struct devlink_health_reporter_diagnose_req)); } -void devlink_info_get_req_free(struct devlink_info_get_req *req); +void +devlink_health_reporter_diagnose_req_free(struct devlink_health_reporter_diagnose_req *req); static inline void -devlink_info_get_req_set_bus_name(struct devlink_info_get_req *req, - const char *bus_name) +devlink_health_reporter_diagnose_req_set_bus_name(struct devlink_health_reporter_diagnose_req *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -1101,8 +3631,8 @@ devlink_info_get_req_set_bus_name(struct devlink_info_get_req *req, req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_info_get_req_set_dev_name(struct devlink_info_get_req *req, - const char *dev_name) +devlink_health_reporter_diagnose_req_set_dev_name(struct devlink_health_reporter_diagnose_req *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -1110,48 +3640,33 @@ devlink_info_get_req_set_dev_name(struct devlink_info_get_req *req, memcpy(req->dev_name, dev_name, req->_present.dev_name_len); req->dev_name[req->_present.dev_name_len] = 0; } - -struct devlink_info_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 info_driver_name_len; - __u32 info_serial_number_len; - } _present; - - char *bus_name; - char *dev_name; - char *info_driver_name; - char *info_serial_number; - unsigned int n_info_version_fixed; - struct devlink_dl_info_version *info_version_fixed; - unsigned int n_info_version_running; - struct devlink_dl_info_version *info_version_running; - unsigned int n_info_version_stored; - struct devlink_dl_info_version *info_version_stored; -}; - -void devlink_info_get_rsp_free(struct devlink_info_get_rsp *rsp); +static inline void +devlink_health_reporter_diagnose_req_set_port_index(struct devlink_health_reporter_diagnose_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_health_reporter_diagnose_req_set_health_reporter_name(struct devlink_health_reporter_diagnose_req *req, + const char *health_reporter_name) +{ + free(req->health_reporter_name); + req->_present.health_reporter_name_len = strlen(health_reporter_name); + req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); + memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); + req->health_reporter_name[req->_present.health_reporter_name_len] = 0; +} /* - * Get device information, like driver name, hardware and firmware versions etc. + * Diagnose health reporter instances. */ -struct devlink_info_get_rsp * -devlink_info_get(struct ynl_sock *ys, struct devlink_info_get_req *req); - -/* DEVLINK_CMD_INFO_GET - dump */ -struct devlink_info_get_list { - struct devlink_info_get_list *next; - struct devlink_info_get_rsp obj __attribute__ ((aligned (8))); -}; - -void devlink_info_get_list_free(struct devlink_info_get_list *rsp); - -struct devlink_info_get_list *devlink_info_get_dump(struct ynl_sock *ys); +int devlink_health_reporter_diagnose(struct ynl_sock *ys, + struct devlink_health_reporter_diagnose_req *req); -/* ============== DEVLINK_CMD_HEALTH_REPORTER_GET ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_GET - do */ -struct devlink_health_reporter_get_req { +/* ============== DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET - dump */ +struct devlink_health_reporter_dump_get_req_dump { struct { __u32 bus_name_len; __u32 dev_name_len; @@ -1165,17 +3680,17 @@ struct devlink_health_reporter_get_req { char *health_reporter_name; }; -static inline struct devlink_health_reporter_get_req * -devlink_health_reporter_get_req_alloc(void) +static inline struct devlink_health_reporter_dump_get_req_dump * +devlink_health_reporter_dump_get_req_dump_alloc(void) { - return calloc(1, sizeof(struct devlink_health_reporter_get_req)); + return calloc(1, sizeof(struct devlink_health_reporter_dump_get_req_dump)); } void -devlink_health_reporter_get_req_free(struct devlink_health_reporter_get_req *req); +devlink_health_reporter_dump_get_req_dump_free(struct devlink_health_reporter_dump_get_req_dump *req); static inline void -devlink_health_reporter_get_req_set_bus_name(struct devlink_health_reporter_get_req *req, - const char *bus_name) +devlink_health_reporter_dump_get_req_dump_set_bus_name(struct devlink_health_reporter_dump_get_req_dump *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -1184,8 +3699,8 @@ devlink_health_reporter_get_req_set_bus_name(struct devlink_health_reporter_get_ req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_health_reporter_get_req_set_dev_name(struct devlink_health_reporter_get_req *req, - const char *dev_name) +devlink_health_reporter_dump_get_req_dump_set_dev_name(struct devlink_health_reporter_dump_get_req_dump *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -1194,15 +3709,15 @@ devlink_health_reporter_get_req_set_dev_name(struct devlink_health_reporter_get_ req->dev_name[req->_present.dev_name_len] = 0; } static inline void -devlink_health_reporter_get_req_set_port_index(struct devlink_health_reporter_get_req *req, - __u32 port_index) +devlink_health_reporter_dump_get_req_dump_set_port_index(struct devlink_health_reporter_dump_get_req_dump *req, + __u32 port_index) { req->_present.port_index = 1; req->port_index = port_index; } static inline void -devlink_health_reporter_get_req_set_health_reporter_name(struct devlink_health_reporter_get_req *req, - const char *health_reporter_name) +devlink_health_reporter_dump_get_req_dump_set_health_reporter_name(struct devlink_health_reporter_dump_get_req_dump *req, + const char *health_reporter_name) { free(req->health_reporter_name); req->_present.health_reporter_name_len = strlen(health_reporter_name); @@ -1211,7 +3726,29 @@ devlink_health_reporter_get_req_set_health_reporter_name(struct devlink_health_r req->health_reporter_name[req->_present.health_reporter_name_len] = 0; } -struct devlink_health_reporter_get_rsp { +struct devlink_health_reporter_dump_get_rsp_dump { + struct { + __u32 fmsg:1; + } _present; + + struct devlink_dl_fmsg fmsg; +}; + +struct devlink_health_reporter_dump_get_rsp_list { + struct devlink_health_reporter_dump_get_rsp_list *next; + struct devlink_health_reporter_dump_get_rsp_dump obj __attribute__((aligned(8))); +}; + +void +devlink_health_reporter_dump_get_rsp_list_free(struct devlink_health_reporter_dump_get_rsp_list *rsp); + +struct devlink_health_reporter_dump_get_rsp_list * +devlink_health_reporter_dump_get_dump(struct ynl_sock *ys, + struct devlink_health_reporter_dump_get_req_dump *req); + +/* ============== DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR - do */ +struct devlink_health_reporter_dump_clear_req { struct { __u32 bus_name_len; __u32 dev_name_len; @@ -1225,40 +3762,86 @@ struct devlink_health_reporter_get_rsp { char *health_reporter_name; }; +static inline struct devlink_health_reporter_dump_clear_req * +devlink_health_reporter_dump_clear_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_health_reporter_dump_clear_req)); +} void -devlink_health_reporter_get_rsp_free(struct devlink_health_reporter_get_rsp *rsp); +devlink_health_reporter_dump_clear_req_free(struct devlink_health_reporter_dump_clear_req *req); + +static inline void +devlink_health_reporter_dump_clear_req_set_bus_name(struct devlink_health_reporter_dump_clear_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_health_reporter_dump_clear_req_set_dev_name(struct devlink_health_reporter_dump_clear_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_health_reporter_dump_clear_req_set_port_index(struct devlink_health_reporter_dump_clear_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_health_reporter_dump_clear_req_set_health_reporter_name(struct devlink_health_reporter_dump_clear_req *req, + const char *health_reporter_name) +{ + free(req->health_reporter_name); + req->_present.health_reporter_name_len = strlen(health_reporter_name); + req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); + memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); + req->health_reporter_name[req->_present.health_reporter_name_len] = 0; +} /* - * Get health reporter instances. + * Clear dump of health reporter instances. */ -struct devlink_health_reporter_get_rsp * -devlink_health_reporter_get(struct ynl_sock *ys, - struct devlink_health_reporter_get_req *req); +int devlink_health_reporter_dump_clear(struct ynl_sock *ys, + struct devlink_health_reporter_dump_clear_req *req); -/* DEVLINK_CMD_HEALTH_REPORTER_GET - dump */ -struct devlink_health_reporter_get_req_dump { +/* ============== DEVLINK_CMD_FLASH_UPDATE ============== */ +/* DEVLINK_CMD_FLASH_UPDATE - do */ +struct devlink_flash_update_req { struct { __u32 bus_name_len; __u32 dev_name_len; - __u32 port_index:1; + __u32 flash_update_file_name_len; + __u32 flash_update_component_len; + __u32 flash_update_overwrite_mask:1; } _present; char *bus_name; char *dev_name; - __u32 port_index; + char *flash_update_file_name; + char *flash_update_component; + struct nla_bitfield32 flash_update_overwrite_mask; }; -static inline struct devlink_health_reporter_get_req_dump * -devlink_health_reporter_get_req_dump_alloc(void) +static inline struct devlink_flash_update_req * +devlink_flash_update_req_alloc(void) { - return calloc(1, sizeof(struct devlink_health_reporter_get_req_dump)); + return calloc(1, sizeof(struct devlink_flash_update_req)); } -void -devlink_health_reporter_get_req_dump_free(struct devlink_health_reporter_get_req_dump *req); +void devlink_flash_update_req_free(struct devlink_flash_update_req *req); static inline void -devlink_health_reporter_get_req_dump_set_bus_name(struct devlink_health_reporter_get_req_dump *req, - const char *bus_name) +devlink_flash_update_req_set_bus_name(struct devlink_flash_update_req *req, + const char *bus_name) { free(req->bus_name); req->_present.bus_name_len = strlen(bus_name); @@ -1267,8 +3850,8 @@ devlink_health_reporter_get_req_dump_set_bus_name(struct devlink_health_reporter req->bus_name[req->_present.bus_name_len] = 0; } static inline void -devlink_health_reporter_get_req_dump_set_dev_name(struct devlink_health_reporter_get_req_dump *req, - const char *dev_name) +devlink_flash_update_req_set_dev_name(struct devlink_flash_update_req *req, + const char *dev_name) { free(req->dev_name); req->_present.dev_name_len = strlen(dev_name); @@ -1277,24 +3860,38 @@ devlink_health_reporter_get_req_dump_set_dev_name(struct devlink_health_reporter req->dev_name[req->_present.dev_name_len] = 0; } static inline void -devlink_health_reporter_get_req_dump_set_port_index(struct devlink_health_reporter_get_req_dump *req, - __u32 port_index) +devlink_flash_update_req_set_flash_update_file_name(struct devlink_flash_update_req *req, + const char *flash_update_file_name) { - req->_present.port_index = 1; - req->port_index = port_index; + free(req->flash_update_file_name); + req->_present.flash_update_file_name_len = strlen(flash_update_file_name); + req->flash_update_file_name = malloc(req->_present.flash_update_file_name_len + 1); + memcpy(req->flash_update_file_name, flash_update_file_name, req->_present.flash_update_file_name_len); + req->flash_update_file_name[req->_present.flash_update_file_name_len] = 0; +} +static inline void +devlink_flash_update_req_set_flash_update_component(struct devlink_flash_update_req *req, + const char *flash_update_component) +{ + free(req->flash_update_component); + req->_present.flash_update_component_len = strlen(flash_update_component); + req->flash_update_component = malloc(req->_present.flash_update_component_len + 1); + memcpy(req->flash_update_component, flash_update_component, req->_present.flash_update_component_len); + req->flash_update_component[req->_present.flash_update_component_len] = 0; +} +static inline void +devlink_flash_update_req_set_flash_update_overwrite_mask(struct devlink_flash_update_req *req, + struct nla_bitfield32 *flash_update_overwrite_mask) +{ + req->_present.flash_update_overwrite_mask = 1; + memcpy(&req->flash_update_overwrite_mask, flash_update_overwrite_mask, sizeof(struct nla_bitfield32)); } -struct devlink_health_reporter_get_list { - struct devlink_health_reporter_get_list *next; - struct devlink_health_reporter_get_rsp obj __attribute__ ((aligned (8))); -}; - -void -devlink_health_reporter_get_list_free(struct devlink_health_reporter_get_list *rsp); - -struct devlink_health_reporter_get_list * -devlink_health_reporter_get_dump(struct ynl_sock *ys, - struct devlink_health_reporter_get_req_dump *req); +/* + * Flash update devlink instances. + */ +int devlink_flash_update(struct ynl_sock *ys, + struct devlink_flash_update_req *req); /* ============== DEVLINK_CMD_TRAP_GET ============== */ /* DEVLINK_CMD_TRAP_GET - do */ @@ -1417,6 +4014,71 @@ struct devlink_trap_get_list * devlink_trap_get_dump(struct ynl_sock *ys, struct devlink_trap_get_req_dump *req); +/* ============== DEVLINK_CMD_TRAP_SET ============== */ +/* DEVLINK_CMD_TRAP_SET - do */ +struct devlink_trap_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 trap_name_len; + __u32 trap_action:1; + } _present; + + char *bus_name; + char *dev_name; + char *trap_name; + enum devlink_trap_action trap_action; +}; + +static inline struct devlink_trap_set_req *devlink_trap_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_trap_set_req)); +} +void devlink_trap_set_req_free(struct devlink_trap_set_req *req); + +static inline void +devlink_trap_set_req_set_bus_name(struct devlink_trap_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_trap_set_req_set_dev_name(struct devlink_trap_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_trap_set_req_set_trap_name(struct devlink_trap_set_req *req, + const char *trap_name) +{ + free(req->trap_name); + req->_present.trap_name_len = strlen(trap_name); + req->trap_name = malloc(req->_present.trap_name_len + 1); + memcpy(req->trap_name, trap_name, req->_present.trap_name_len); + req->trap_name[req->_present.trap_name_len] = 0; +} +static inline void +devlink_trap_set_req_set_trap_action(struct devlink_trap_set_req *req, + enum devlink_trap_action trap_action) +{ + req->_present.trap_action = 1; + req->trap_action = trap_action; +} + +/* + * Set trap instances. + */ +int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req); + /* ============== DEVLINK_CMD_TRAP_GROUP_GET ============== */ /* DEVLINK_CMD_TRAP_GROUP_GET - do */ struct devlink_trap_group_get_req { @@ -1541,6 +4203,82 @@ struct devlink_trap_group_get_list * devlink_trap_group_get_dump(struct ynl_sock *ys, struct devlink_trap_group_get_req_dump *req); +/* ============== DEVLINK_CMD_TRAP_GROUP_SET ============== */ +/* DEVLINK_CMD_TRAP_GROUP_SET - do */ +struct devlink_trap_group_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 trap_group_name_len; + __u32 trap_action:1; + __u32 trap_policer_id:1; + } _present; + + char *bus_name; + char *dev_name; + char *trap_group_name; + enum devlink_trap_action trap_action; + __u32 trap_policer_id; +}; + +static inline struct devlink_trap_group_set_req * +devlink_trap_group_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_trap_group_set_req)); +} +void devlink_trap_group_set_req_free(struct devlink_trap_group_set_req *req); + +static inline void +devlink_trap_group_set_req_set_bus_name(struct devlink_trap_group_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_trap_group_set_req_set_dev_name(struct devlink_trap_group_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_trap_group_set_req_set_trap_group_name(struct devlink_trap_group_set_req *req, + const char *trap_group_name) +{ + free(req->trap_group_name); + req->_present.trap_group_name_len = strlen(trap_group_name); + req->trap_group_name = malloc(req->_present.trap_group_name_len + 1); + memcpy(req->trap_group_name, trap_group_name, req->_present.trap_group_name_len); + req->trap_group_name[req->_present.trap_group_name_len] = 0; +} +static inline void +devlink_trap_group_set_req_set_trap_action(struct devlink_trap_group_set_req *req, + enum devlink_trap_action trap_action) +{ + req->_present.trap_action = 1; + req->trap_action = trap_action; +} +static inline void +devlink_trap_group_set_req_set_trap_policer_id(struct devlink_trap_group_set_req *req, + __u32 trap_policer_id) +{ + req->_present.trap_policer_id = 1; + req->trap_policer_id = trap_policer_id; +} + +/* + * Set trap group instances. + */ +int devlink_trap_group_set(struct ynl_sock *ys, + struct devlink_trap_group_set_req *req); + /* ============== DEVLINK_CMD_TRAP_POLICER_GET ============== */ /* DEVLINK_CMD_TRAP_POLICER_GET - do */ struct devlink_trap_policer_get_req { @@ -1661,9 +4399,151 @@ struct devlink_trap_policer_get_list { void devlink_trap_policer_get_list_free(struct devlink_trap_policer_get_list *rsp); -struct devlink_trap_policer_get_list * -devlink_trap_policer_get_dump(struct ynl_sock *ys, - struct devlink_trap_policer_get_req_dump *req); +struct devlink_trap_policer_get_list * +devlink_trap_policer_get_dump(struct ynl_sock *ys, + struct devlink_trap_policer_get_req_dump *req); + +/* ============== DEVLINK_CMD_TRAP_POLICER_SET ============== */ +/* DEVLINK_CMD_TRAP_POLICER_SET - do */ +struct devlink_trap_policer_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 trap_policer_id:1; + __u32 trap_policer_rate:1; + __u32 trap_policer_burst:1; + } _present; + + char *bus_name; + char *dev_name; + __u32 trap_policer_id; + __u64 trap_policer_rate; + __u64 trap_policer_burst; +}; + +static inline struct devlink_trap_policer_set_req * +devlink_trap_policer_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_trap_policer_set_req)); +} +void +devlink_trap_policer_set_req_free(struct devlink_trap_policer_set_req *req); + +static inline void +devlink_trap_policer_set_req_set_bus_name(struct devlink_trap_policer_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_trap_policer_set_req_set_dev_name(struct devlink_trap_policer_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_trap_policer_set_req_set_trap_policer_id(struct devlink_trap_policer_set_req *req, + __u32 trap_policer_id) +{ + req->_present.trap_policer_id = 1; + req->trap_policer_id = trap_policer_id; +} +static inline void +devlink_trap_policer_set_req_set_trap_policer_rate(struct devlink_trap_policer_set_req *req, + __u64 trap_policer_rate) +{ + req->_present.trap_policer_rate = 1; + req->trap_policer_rate = trap_policer_rate; +} +static inline void +devlink_trap_policer_set_req_set_trap_policer_burst(struct devlink_trap_policer_set_req *req, + __u64 trap_policer_burst) +{ + req->_present.trap_policer_burst = 1; + req->trap_policer_burst = trap_policer_burst; +} + +/* + * Get trap policer instances. + */ +int devlink_trap_policer_set(struct ynl_sock *ys, + struct devlink_trap_policer_set_req *req); + +/* ============== DEVLINK_CMD_HEALTH_REPORTER_TEST ============== */ +/* DEVLINK_CMD_HEALTH_REPORTER_TEST - do */ +struct devlink_health_reporter_test_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 port_index:1; + __u32 health_reporter_name_len; + } _present; + + char *bus_name; + char *dev_name; + __u32 port_index; + char *health_reporter_name; +}; + +static inline struct devlink_health_reporter_test_req * +devlink_health_reporter_test_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_health_reporter_test_req)); +} +void +devlink_health_reporter_test_req_free(struct devlink_health_reporter_test_req *req); + +static inline void +devlink_health_reporter_test_req_set_bus_name(struct devlink_health_reporter_test_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_health_reporter_test_req_set_dev_name(struct devlink_health_reporter_test_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_health_reporter_test_req_set_port_index(struct devlink_health_reporter_test_req *req, + __u32 port_index) +{ + req->_present.port_index = 1; + req->port_index = port_index; +} +static inline void +devlink_health_reporter_test_req_set_health_reporter_name(struct devlink_health_reporter_test_req *req, + const char *health_reporter_name) +{ + free(req->health_reporter_name); + req->_present.health_reporter_name_len = strlen(health_reporter_name); + req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); + memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); + req->health_reporter_name[req->_present.health_reporter_name_len] = 0; +} + +/* + * Test health reporter instances. + */ +int devlink_health_reporter_test(struct ynl_sock *ys, + struct devlink_health_reporter_test_req *req); /* ============== DEVLINK_CMD_RATE_GET ============== */ /* DEVLINK_CMD_RATE_GET - do */ @@ -1797,6 +4677,270 @@ struct devlink_rate_get_list * devlink_rate_get_dump(struct ynl_sock *ys, struct devlink_rate_get_req_dump *req); +/* ============== DEVLINK_CMD_RATE_SET ============== */ +/* DEVLINK_CMD_RATE_SET - do */ +struct devlink_rate_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 rate_node_name_len; + __u32 rate_tx_share:1; + __u32 rate_tx_max:1; + __u32 rate_tx_priority:1; + __u32 rate_tx_weight:1; + __u32 rate_parent_node_name_len; + } _present; + + char *bus_name; + char *dev_name; + char *rate_node_name; + __u64 rate_tx_share; + __u64 rate_tx_max; + __u32 rate_tx_priority; + __u32 rate_tx_weight; + char *rate_parent_node_name; +}; + +static inline struct devlink_rate_set_req *devlink_rate_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_rate_set_req)); +} +void devlink_rate_set_req_free(struct devlink_rate_set_req *req); + +static inline void +devlink_rate_set_req_set_bus_name(struct devlink_rate_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_rate_set_req_set_dev_name(struct devlink_rate_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_rate_set_req_set_rate_node_name(struct devlink_rate_set_req *req, + const char *rate_node_name) +{ + free(req->rate_node_name); + req->_present.rate_node_name_len = strlen(rate_node_name); + req->rate_node_name = malloc(req->_present.rate_node_name_len + 1); + memcpy(req->rate_node_name, rate_node_name, req->_present.rate_node_name_len); + req->rate_node_name[req->_present.rate_node_name_len] = 0; +} +static inline void +devlink_rate_set_req_set_rate_tx_share(struct devlink_rate_set_req *req, + __u64 rate_tx_share) +{ + req->_present.rate_tx_share = 1; + req->rate_tx_share = rate_tx_share; +} +static inline void +devlink_rate_set_req_set_rate_tx_max(struct devlink_rate_set_req *req, + __u64 rate_tx_max) +{ + req->_present.rate_tx_max = 1; + req->rate_tx_max = rate_tx_max; +} +static inline void +devlink_rate_set_req_set_rate_tx_priority(struct devlink_rate_set_req *req, + __u32 rate_tx_priority) +{ + req->_present.rate_tx_priority = 1; + req->rate_tx_priority = rate_tx_priority; +} +static inline void +devlink_rate_set_req_set_rate_tx_weight(struct devlink_rate_set_req *req, + __u32 rate_tx_weight) +{ + req->_present.rate_tx_weight = 1; + req->rate_tx_weight = rate_tx_weight; +} +static inline void +devlink_rate_set_req_set_rate_parent_node_name(struct devlink_rate_set_req *req, + const char *rate_parent_node_name) +{ + free(req->rate_parent_node_name); + req->_present.rate_parent_node_name_len = strlen(rate_parent_node_name); + req->rate_parent_node_name = malloc(req->_present.rate_parent_node_name_len + 1); + memcpy(req->rate_parent_node_name, rate_parent_node_name, req->_present.rate_parent_node_name_len); + req->rate_parent_node_name[req->_present.rate_parent_node_name_len] = 0; +} + +/* + * Set rate instances. + */ +int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req); + +/* ============== DEVLINK_CMD_RATE_NEW ============== */ +/* DEVLINK_CMD_RATE_NEW - do */ +struct devlink_rate_new_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 rate_node_name_len; + __u32 rate_tx_share:1; + __u32 rate_tx_max:1; + __u32 rate_tx_priority:1; + __u32 rate_tx_weight:1; + __u32 rate_parent_node_name_len; + } _present; + + char *bus_name; + char *dev_name; + char *rate_node_name; + __u64 rate_tx_share; + __u64 rate_tx_max; + __u32 rate_tx_priority; + __u32 rate_tx_weight; + char *rate_parent_node_name; +}; + +static inline struct devlink_rate_new_req *devlink_rate_new_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_rate_new_req)); +} +void devlink_rate_new_req_free(struct devlink_rate_new_req *req); + +static inline void +devlink_rate_new_req_set_bus_name(struct devlink_rate_new_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_rate_new_req_set_dev_name(struct devlink_rate_new_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_rate_new_req_set_rate_node_name(struct devlink_rate_new_req *req, + const char *rate_node_name) +{ + free(req->rate_node_name); + req->_present.rate_node_name_len = strlen(rate_node_name); + req->rate_node_name = malloc(req->_present.rate_node_name_len + 1); + memcpy(req->rate_node_name, rate_node_name, req->_present.rate_node_name_len); + req->rate_node_name[req->_present.rate_node_name_len] = 0; +} +static inline void +devlink_rate_new_req_set_rate_tx_share(struct devlink_rate_new_req *req, + __u64 rate_tx_share) +{ + req->_present.rate_tx_share = 1; + req->rate_tx_share = rate_tx_share; +} +static inline void +devlink_rate_new_req_set_rate_tx_max(struct devlink_rate_new_req *req, + __u64 rate_tx_max) +{ + req->_present.rate_tx_max = 1; + req->rate_tx_max = rate_tx_max; +} +static inline void +devlink_rate_new_req_set_rate_tx_priority(struct devlink_rate_new_req *req, + __u32 rate_tx_priority) +{ + req->_present.rate_tx_priority = 1; + req->rate_tx_priority = rate_tx_priority; +} +static inline void +devlink_rate_new_req_set_rate_tx_weight(struct devlink_rate_new_req *req, + __u32 rate_tx_weight) +{ + req->_present.rate_tx_weight = 1; + req->rate_tx_weight = rate_tx_weight; +} +static inline void +devlink_rate_new_req_set_rate_parent_node_name(struct devlink_rate_new_req *req, + const char *rate_parent_node_name) +{ + free(req->rate_parent_node_name); + req->_present.rate_parent_node_name_len = strlen(rate_parent_node_name); + req->rate_parent_node_name = malloc(req->_present.rate_parent_node_name_len + 1); + memcpy(req->rate_parent_node_name, rate_parent_node_name, req->_present.rate_parent_node_name_len); + req->rate_parent_node_name[req->_present.rate_parent_node_name_len] = 0; +} + +/* + * Create rate instances. + */ +int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req); + +/* ============== DEVLINK_CMD_RATE_DEL ============== */ +/* DEVLINK_CMD_RATE_DEL - do */ +struct devlink_rate_del_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 rate_node_name_len; + } _present; + + char *bus_name; + char *dev_name; + char *rate_node_name; +}; + +static inline struct devlink_rate_del_req *devlink_rate_del_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_rate_del_req)); +} +void devlink_rate_del_req_free(struct devlink_rate_del_req *req); + +static inline void +devlink_rate_del_req_set_bus_name(struct devlink_rate_del_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_rate_del_req_set_dev_name(struct devlink_rate_del_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_rate_del_req_set_rate_node_name(struct devlink_rate_del_req *req, + const char *rate_node_name) +{ + free(req->rate_node_name); + req->_present.rate_node_name_len = strlen(rate_node_name); + req->rate_node_name = malloc(req->_present.rate_node_name_len + 1); + memcpy(req->rate_node_name, rate_node_name, req->_present.rate_node_name_len); + req->rate_node_name[req->_present.rate_node_name_len] = 0; +} + +/* + * Delete rate instances. + */ +int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req); + /* ============== DEVLINK_CMD_LINECARD_GET ============== */ /* DEVLINK_CMD_LINECARD_GET - do */ struct devlink_linecard_get_req { @@ -1917,6 +5061,73 @@ struct devlink_linecard_get_list * devlink_linecard_get_dump(struct ynl_sock *ys, struct devlink_linecard_get_req_dump *req); +/* ============== DEVLINK_CMD_LINECARD_SET ============== */ +/* DEVLINK_CMD_LINECARD_SET - do */ +struct devlink_linecard_set_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 linecard_index:1; + __u32 linecard_type_len; + } _present; + + char *bus_name; + char *dev_name; + __u32 linecard_index; + char *linecard_type; +}; + +static inline struct devlink_linecard_set_req * +devlink_linecard_set_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_linecard_set_req)); +} +void devlink_linecard_set_req_free(struct devlink_linecard_set_req *req); + +static inline void +devlink_linecard_set_req_set_bus_name(struct devlink_linecard_set_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_linecard_set_req_set_dev_name(struct devlink_linecard_set_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_linecard_set_req_set_linecard_index(struct devlink_linecard_set_req *req, + __u32 linecard_index) +{ + req->_present.linecard_index = 1; + req->linecard_index = linecard_index; +} +static inline void +devlink_linecard_set_req_set_linecard_type(struct devlink_linecard_set_req *req, + const char *linecard_type) +{ + free(req->linecard_type); + req->_present.linecard_type_len = strlen(linecard_type); + req->linecard_type = malloc(req->_present.linecard_type_len + 1); + memcpy(req->linecard_type, linecard_type, req->_present.linecard_type_len); + req->linecard_type[req->_present.linecard_type_len] = 0; +} + +/* + * Set line card instances. + */ +int devlink_linecard_set(struct ynl_sock *ys, + struct devlink_linecard_set_req *req); + /* ============== DEVLINK_CMD_SELFTESTS_GET ============== */ /* DEVLINK_CMD_SELFTESTS_GET - do */ struct devlink_selftests_get_req { @@ -1987,4 +5198,58 @@ void devlink_selftests_get_list_free(struct devlink_selftests_get_list *rsp); struct devlink_selftests_get_list * devlink_selftests_get_dump(struct ynl_sock *ys); +/* ============== DEVLINK_CMD_SELFTESTS_RUN ============== */ +/* DEVLINK_CMD_SELFTESTS_RUN - do */ +struct devlink_selftests_run_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 selftests:1; + } _present; + + char *bus_name; + char *dev_name; + struct devlink_dl_selftest_id selftests; +}; + +static inline struct devlink_selftests_run_req * +devlink_selftests_run_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_selftests_run_req)); +} +void devlink_selftests_run_req_free(struct devlink_selftests_run_req *req); + +static inline void +devlink_selftests_run_req_set_bus_name(struct devlink_selftests_run_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_selftests_run_req_set_dev_name(struct devlink_selftests_run_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} +static inline void +devlink_selftests_run_req_set_selftests_flash(struct devlink_selftests_run_req *req) +{ + req->_present.selftests = 1; + req->selftests._present.flash = 1; +} + +/* + * Run device selftest instances. + */ +int devlink_selftests_run(struct ynl_sock *ys, + struct devlink_selftests_run_req *req); + #endif /* _LINUX_DEVLINK_GEN_H */ -- cgit v1.2.3 From 15c80e7a53d28aeb7354ef6d79d0ff55452e53f1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 21 Oct 2023 13:27:10 +0200 Subject: devlink: remove duplicated netlink callback prototypes The prototypes are now generated, remove the old ones. Signed-off-by: Jiri Pirko Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20231021112711.660606-10-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- net/devlink/devl_internal.h | 62 --------------------------------------------- 1 file changed, 62 deletions(-) (limited to 'net') diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index daf4c696a618..183dbe3807ab 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -227,65 +227,3 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, /* Linecards */ unsigned int devlink_linecard_index(struct devlink_linecard *linecard); - -/* Devlink nl cmds */ -int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_flash_update_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_port_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_port_split_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_port_unsplit_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_port_new_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_port_del_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_sb_port_pool_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_sb_tc_pool_bind_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_sb_occ_snapshot_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_sb_occ_max_clear_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_dpipe_table_get_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_dpipe_entries_get_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_dpipe_headers_get_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_dpipe_table_counters_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_resource_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_param_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_port_param_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb); -int devlink_nl_port_param_get_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_port_param_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_region_new_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_region_del_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_region_read_dumpit(struct sk_buff *skb, - struct netlink_callback *cb); -int devlink_nl_health_reporter_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_health_reporter_recover_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_health_reporter_diagnose_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_health_reporter_dump_get_dumpit(struct sk_buff *skb, - struct netlink_callback *cb); -int devlink_nl_health_reporter_dump_clear_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_health_reporter_test_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_trap_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_trap_group_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_trap_policer_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_rate_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_linecard_set_doit(struct sk_buff *skb, struct genl_info *info); -- cgit v1.2.3 From cebe7306073d4afeb24886f9063417e559fa2e22 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 21 Oct 2023 13:27:11 +0200 Subject: devlink: remove netlink small_ops All commands are now covered by generated split_ops. Remove the small_ops entirely alongside with unified devlink netlink policy array. Signed-off-by: Jiri Pirko Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20231021112711.660606-11-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- net/devlink/netlink.c | 328 +------------------------------------------------- 1 file changed, 1 insertion(+), 327 deletions(-) (limited to 'net') diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index ca63e59a5e92..d0b90ebc8b15 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -13,75 +13,6 @@ static const struct genl_multicast_group devlink_nl_mcgrps[] = { [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME }, }; -static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_UNSPEC] = { .strict_start_type = - DEVLINK_ATTR_TRAP_POLICER_ID }, - [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 }, - [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_PORT_TYPE_AUTO, - DEVLINK_PORT_TYPE_IB), - [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 }, - [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 }, - [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 }, - [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 }, - [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 }, - [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, - [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_ESWITCH_MODE_LEGACY, - DEVLINK_ESWITCH_MODE_SWITCHDEV), - [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 }, - [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64}, - [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64}, - [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 }, - [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 }, - [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, - [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, - [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] = - NLA_POLICY_BITFIELD32(DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS), - [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 }, - [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 }, - [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 }, - [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 }, - [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 }, - [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED }, - [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, - DEVLINK_RELOAD_ACTION_MAX), - [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK), - [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NLA_U16 }, - [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 }, - [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 }, - [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 }, - [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 }, - [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 }, - [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, - [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 }, - [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, - [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 }, - [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 }, - [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG }, -}; - int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, struct devlink *devlink, int attrtype) { @@ -191,7 +122,7 @@ unlock: int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { - return __devlink_nl_pre_doit(skb, info, ops->internal_flags); + return __devlink_nl_pre_doit(skb, info, 0); } int devlink_nl_pre_doit_port(const struct genl_split_ops *ops, @@ -287,269 +218,12 @@ int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb, return devlink_nl_inst_iter_dumpit(msg, cb, flags, dump_one); } -static const struct genl_small_ops devlink_nl_small_ops[40] = { - { - .cmd = DEVLINK_CMD_PORT_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_port_set_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - { - .cmd = DEVLINK_CMD_RATE_SET, - .doit = devlink_nl_rate_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_RATE_NEW, - .doit = devlink_nl_rate_new_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_RATE_DEL, - .doit = devlink_nl_rate_del_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_PORT_SPLIT, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_port_split_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - { - .cmd = DEVLINK_CMD_PORT_UNSPLIT, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_port_unsplit_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - { - .cmd = DEVLINK_CMD_PORT_NEW, - .doit = devlink_nl_port_new_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_PORT_DEL, - .doit = devlink_nl_port_del_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - - { - .cmd = DEVLINK_CMD_LINECARD_SET, - .doit = devlink_nl_linecard_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_SB_POOL_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_sb_pool_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_SB_PORT_POOL_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_sb_port_pool_set_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - { - .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_sb_tc_pool_bind_set_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - { - .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_sb_occ_snapshot_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_sb_occ_max_clear_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_ESWITCH_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_eswitch_get_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_ESWITCH_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_eswitch_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_dpipe_table_get_doit, - /* can be retrieved by unprivileged users */ - }, - { - .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_dpipe_entries_get_doit, - /* can be retrieved by unprivileged users */ - }, - { - .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_dpipe_headers_get_doit, - /* can be retrieved by unprivileged users */ - }, - { - .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_dpipe_table_counters_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_RESOURCE_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_resource_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_RESOURCE_DUMP, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_resource_dump_doit, - /* can be retrieved by unprivileged users */ - }, - { - .cmd = DEVLINK_CMD_RELOAD, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_reload_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_PARAM_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_param_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_PORT_PARAM_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_port_param_get_doit, - .dumpit = devlink_nl_port_param_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - /* can be retrieved by unprivileged users */ - }, - { - .cmd = DEVLINK_CMD_PORT_PARAM_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_port_param_set_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - { - .cmd = DEVLINK_CMD_REGION_NEW, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_region_new_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_REGION_DEL, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_region_del_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_REGION_READ, - .validate = GENL_DONT_VALIDATE_STRICT | - GENL_DONT_VALIDATE_DUMP_STRICT, - .dumpit = devlink_nl_region_read_dumpit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_health_reporter_set_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_health_reporter_recover_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_health_reporter_diagnose_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, - .validate = GENL_DONT_VALIDATE_STRICT | - GENL_DONT_VALIDATE_DUMP_STRICT, - .dumpit = devlink_nl_health_reporter_dump_get_dumpit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_health_reporter_dump_clear_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_health_reporter_test_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, - }, - { - .cmd = DEVLINK_CMD_FLASH_UPDATE, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_flash_update_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_TRAP_SET, - .doit = devlink_nl_trap_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_TRAP_GROUP_SET, - .doit = devlink_nl_trap_group_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_TRAP_POLICER_SET, - .doit = devlink_nl_trap_policer_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_SELFTESTS_RUN, - .doit = devlink_nl_selftests_run_doit, - .flags = GENL_ADMIN_PERM, - }, - /* -- No new ops here! Use split ops going forward! -- */ -}; - struct genl_family devlink_nl_family __ro_after_init = { .name = DEVLINK_GENL_NAME, .version = DEVLINK_GENL_VERSION, - .maxattr = DEVLINK_ATTR_MAX, - .policy = devlink_nl_policy, .netnsok = true, .parallel_ops = true, - .pre_doit = devlink_nl_pre_doit, - .post_doit = devlink_nl_post_doit, .module = THIS_MODULE, - .small_ops = devlink_nl_small_ops, - .n_small_ops = ARRAY_SIZE(devlink_nl_small_ops), .split_ops = devlink_nl_ops, .n_split_ops = ARRAY_SIZE(devlink_nl_ops), .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1, -- cgit v1.2.3 From 58d53d8f7da63dd13903bec0a40b3009a841b61b Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 20 Oct 2023 17:59:48 +0800 Subject: page_pool: unify frag_count handling in page_pool_is_last_frag() Currently when page_pool_create() is called with PP_FLAG_PAGE_FRAG flag, page_pool_alloc_pages() is only allowed to be called under the below constraints: 1. page_pool_fragment_page() need to be called to setup page->pp_frag_count immediately. 2. page_pool_defrag_page() often need to be called to drain the page->pp_frag_count when there is no more user will be holding on to that page. Those constraints exist in order to support a page to be split into multi fragments. And those constraints have some overhead because of the cache line dirtying/bouncing and atomic update. Those constraints are unavoidable for case when we need a page to be split into more than one fragment, but there is also case that we want to avoid the above constraints and their overhead when a page can't be split as it can only hold a fragment as requested by user, depending on different use cases: use case 1: allocate page without page splitting. use case 2: allocate page with page splitting. use case 3: allocate page with or without page splitting depending on the fragment size. Currently page pool only provide page_pool_alloc_pages() and page_pool_alloc_frag() API to enable the 1 & 2 separately, so we can not use a combination of 1 & 2 to enable 3, it is not possible yet because of the per page_pool flag PP_FLAG_PAGE_FRAG. So in order to allow allocating unsplit page without the overhead of split page while still allow allocating split page we need to remove the per page_pool flag in page_pool_is_last_frag(), as best as I can think of, it seems there are two methods as below: 1. Add per page flag/bit to indicate a page is split or not, which means we might need to update that flag/bit everytime the page is recycled, dirtying the cache line of 'struct page' for use case 1. 2. Unify the page->pp_frag_count handling for both split and unsplit page by assuming all pages in the page pool is split into a big fragment initially. As page pool already supports use case 1 without dirtying the cache line of 'struct page' whenever a page is recyclable, we need to support the above use case 3 with minimal overhead, especially not adding any noticeable overhead for use case 1, and we are already doing an optimization by not updating pp_frag_count in page_pool_defrag_page() for the last fragment user, this patch chooses to unify the pp_frag_count handling to support the above use case 3. There is no noticeable performance degradation and some justification for unifying the frag_count handling with this patch applied using a micro-benchmark testing in [1]. 1. https://lore.kernel.org/all/bf2591f8-7b3c-4480-bb2c-31dc9da1d6ac@huawei.com/ Signed-off-by: Yunsheng Lin CC: Lorenzo Bianconi CC: Alexander Duyck CC: Liang Chen CC: Alexander Lobakin Link: https://lore.kernel.org/r/20231020095952.11055-2-linyunsheng@huawei.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 47 +++++++++++++++++++++++++++++------------ net/core/page_pool.c | 10 ++++++++- 2 files changed, 43 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 8f64adf86f5b..759489c037c7 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -115,28 +115,49 @@ static inline long page_pool_defrag_page(struct page *page, long nr) long ret; /* If nr == pp_frag_count then we have cleared all remaining - * references to the page. No need to actually overwrite it, instead - * we can leave this to be overwritten by the calling function. + * references to the page: + * 1. 'n == 1': no need to actually overwrite it. + * 2. 'n != 1': overwrite it with one, which is the rare case + * for pp_frag_count draining. * - * The main advantage to doing this is that an atomic_read is - * generally a much cheaper operation than an atomic update, - * especially when dealing with a page that may be partitioned - * into only 2 or 3 pieces. + * The main advantage to doing this is that not only we avoid a atomic + * update, as an atomic_read is generally a much cheaper operation than + * an atomic update, especially when dealing with a page that may be + * partitioned into only 2 or 3 pieces; but also unify the pp_frag_count + * handling by ensuring all pages have partitioned into only 1 piece + * initially, and only overwrite it when the page is partitioned into + * more than one piece. */ - if (atomic_long_read(&page->pp_frag_count) == nr) + if (atomic_long_read(&page->pp_frag_count) == nr) { + /* As we have ensured nr is always one for constant case using + * the BUILD_BUG_ON(), only need to handle the non-constant case + * here for pp_frag_count draining, which is a rare case. + */ + BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1); + if (!__builtin_constant_p(nr)) + atomic_long_set(&page->pp_frag_count, 1); + return 0; + } ret = atomic_long_sub_return(nr, &page->pp_frag_count); WARN_ON(ret < 0); + + /* We are the last user here too, reset pp_frag_count back to 1 to + * ensure all pages have been partitioned into 1 piece initially, + * this should be the rare case when the last two fragment users call + * page_pool_defrag_page() currently. + */ + if (unlikely(!ret)) + atomic_long_set(&page->pp_frag_count, 1); + return ret; } -static inline bool page_pool_is_last_frag(struct page_pool *pool, - struct page *page) +static inline bool page_pool_is_last_frag(struct page *page) { - /* If fragments aren't enabled or count is 0 we were the last user */ - return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || - (page_pool_defrag_page(page, 1) == 0); + /* If page_pool_defrag_page() returns 0, we were the last user */ + return page_pool_defrag_page(page, 1) == 0; } /** @@ -161,7 +182,7 @@ static inline void page_pool_put_page(struct page_pool *pool, * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - if (!page_pool_is_last_frag(pool, page)) + if (!page_pool_is_last_frag(page)) return; page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 8a9868ea5067..953535cab081 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -376,6 +376,14 @@ static void page_pool_set_pp_info(struct page_pool *pool, { page->pp = pool; page->pp_magic |= PP_SIGNATURE; + + /* Ensuring all pages have been split into one fragment initially: + * page_pool_set_pp_info() is only called once for every page when it + * is allocated from the page allocator and page_pool_fragment_page() + * is dirtying the same cache line as the page->pp_magic above, so + * the overhead is negligible. + */ + page_pool_fragment_page(page, 1); if (pool->p.init_callback) pool->p.init_callback(page, pool->p.init_arg); } @@ -672,7 +680,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, struct page *page = virt_to_head_page(data[i]); /* It is not the last user for the page frag case */ - if (!page_pool_is_last_frag(pool, page)) + if (!page_pool_is_last_frag(page)) continue; page = __page_pool_put_page(pool, page, -1, false); -- cgit v1.2.3 From 09d96ee5674a0eaa800c664353756ecc45c4a87f Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 20 Oct 2023 17:59:49 +0800 Subject: page_pool: remove PP_FLAG_PAGE_FRAG PP_FLAG_PAGE_FRAG is not really needed after pp_frag_count handling is unified and page_pool_alloc_frag() is supported in 32-bit arch with 64-bit DMA, so remove it. Signed-off-by: Yunsheng Lin CC: Lorenzo Bianconi CC: Alexander Duyck CC: Liang Chen CC: Alexander Lobakin Link: https://lore.kernel.org/r/20231020095952.11055-3-linyunsheng@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 -- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 +-- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 3 --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/wireless/mediatek/mt76/mac80211.c | 2 +- include/net/page_pool/types.h | 6 ++---- net/core/page_pool.c | 3 +-- net/core/skbuff.c | 2 +- 9 files changed, 8 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5d7a29f99401..d0359b569afe 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3302,8 +3302,6 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, pp.dma_dir = bp->rx_dir; pp.max_len = PAGE_SIZE; pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; - if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) - pp.flags |= PP_FLAG_PAGE_FRAG; rxr->page_pool = page_pool_create(&pp); if (IS_ERR(rxr->page_pool)) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index cf50368441b7..06117502001f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -4940,8 +4940,7 @@ static void hns3_put_ring_config(struct hns3_nic_priv *priv) static void hns3_alloc_page_pool(struct hns3_enet_ring *ring) { struct page_pool_params pp_params = { - .flags = PP_FLAG_DMA_MAP | PP_FLAG_PAGE_FRAG | - PP_FLAG_DMA_SYNC_DEV, + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, .order = hns3_page_order(ring), .pool_size = ring->desc_num * hns3_buf_size(ring) / (PAGE_SIZE << hns3_page_order(ring)), diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 58c5412d3173..5e1ef70d54fe 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -595,9 +595,6 @@ static struct page_pool *idpf_rx_create_page_pool(struct idpf_queue *rxbufq) .offset = 0, }; - if (rxbufq->rx_buf_size == IDPF_RX_BUF_2048) - pp.flags |= PP_FLAG_PAGE_FRAG; - return page_pool_create(&pp); } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 818ce76185b2..1a42bfded872 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1404,7 +1404,7 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, } pp_params.order = get_order(buf_size); - pp_params.flags = PP_FLAG_PAGE_FRAG | PP_FLAG_DMA_MAP; + pp_params.flags = PP_FLAG_DMA_MAP; pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs); pp_params.nid = NUMA_NO_NODE; pp_params.dev = pfvf->dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9325b8f00af0..ea58c6917433 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -897,7 +897,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, struct page_pool_params pp_params = { 0 }; pp_params.order = 0; - pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | PP_FLAG_PAGE_FRAG; + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; pp_params.pool_size = pool_size; pp_params.nid = node; pp_params.dev = rq->pdev; diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index cb76053973aa..51a767121b0d 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -570,7 +570,7 @@ int mt76_create_page_pool(struct mt76_dev *dev, struct mt76_queue *q) { struct page_pool_params pp_params = { .order = 0, - .flags = PP_FLAG_PAGE_FRAG, + .flags = 0, .nid = NUMA_NO_NODE, .dev = dev->dma_dev, }; diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 887e7946a597..6fc5134095ed 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -17,10 +17,8 @@ * Please note DMA-sync-for-CPU is still * device driver responsibility */ -#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ #define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ - PP_FLAG_DMA_SYNC_DEV |\ - PP_FLAG_PAGE_FRAG) + PP_FLAG_DMA_SYNC_DEV) /* * Fast allocation side cache array/stack @@ -45,7 +43,7 @@ struct pp_alloc_cache { /** * struct page_pool_params - page pool parameters - * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG + * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV * @order: 2^order pages on allocation * @pool_size: size of the ptr_ring * @nid: NUMA node id to allocate from pages from diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 953535cab081..2a3671c97ca7 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -756,8 +756,7 @@ struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int max_size = PAGE_SIZE << pool->p.order; struct page *page = pool->frag_page; - if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) || - size > max_size)) + if (WARN_ON(size > max_size)) return NULL; size = ALIGN(size, dma_get_cache_alignment()); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 975c9a6ffb4a..c52ddd6891d9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5765,7 +5765,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, /* In general, avoid mixing page_pool and non-page_pool allocated * pages within the same SKB. Additionally avoid dealing with clones * with page_pool pages, in case the SKB is using page_pool fragment - * references (PP_FLAG_PAGE_FRAG). Since we only take full page + * references (page_pool_alloc_frag()). Since we only take full page * references for cloned SKBs at the moment that would result in * inconsistent reference counts. * In theory we could take full references if @from is cloned and -- cgit v1.2.3 From de97502e16fc406a74edee8359612e518986cf59 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 20 Oct 2023 17:59:50 +0800 Subject: page_pool: introduce page_pool_alloc() API Currently page pool supports the below use cases: use case 1: allocate page without page splitting using page_pool_alloc_pages() API if the driver knows that the memory it need is always bigger than half of the page allocated from page pool. use case 2: allocate page frag with page splitting using page_pool_alloc_frag() API if the driver knows that the memory it need is always smaller than or equal to the half of the page allocated from page pool. There is emerging use case [1] & [2] that is a mix of the above two case: the driver doesn't know the size of memory it need beforehand, so the driver may use something like below to allocate memory with least memory utilization and performance penalty: if (size << 1 > max_size) page = page_pool_alloc_pages(); else page = page_pool_alloc_frag(); To avoid the driver doing something like above, add the page_pool_alloc() API to support the above use case, and update the true size of memory that is acctually allocated by updating '*size' back to the driver in order to avoid exacerbating truesize underestimate problem. Rename page_pool_free() which is used in the destroy process to __page_pool_destroy() to avoid confusion with the newly added API. 1. https://lore.kernel.org/all/d3ae6bd3537fbce379382ac6a42f67e22f27ece2.1683896626.git.lorenzo@kernel.org/ 2. https://lore.kernel.org/all/20230526054621.18371-3-liangchen.linux@gmail.com/ Signed-off-by: Yunsheng Lin CC: Lorenzo Bianconi CC: Alexander Duyck CC: Liang Chen CC: Alexander Lobakin Link: https://lore.kernel.org/r/20231020095952.11055-4-linyunsheng@huawei.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 66 +++++++++++++++++++++++++++++++++++++++++ net/core/page_pool.c | 4 +-- 2 files changed, 68 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 759489c037c7..1b76e05dc4d2 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -82,6 +82,66 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, return page_pool_alloc_frag(pool, offset, size, gfp); } +static inline struct page *page_pool_alloc(struct page_pool *pool, + unsigned int *offset, + unsigned int *size, gfp_t gfp) +{ + unsigned int max_size = PAGE_SIZE << pool->p.order; + struct page *page; + + if ((*size << 1) > max_size) { + *size = max_size; + *offset = 0; + return page_pool_alloc_pages(pool, gfp); + } + + page = page_pool_alloc_frag(pool, offset, *size, gfp); + if (unlikely(!page)) + return NULL; + + /* There is very likely not enough space for another fragment, so append + * the remaining size to the current fragment to avoid truesize + * underestimate problem. + */ + if (pool->frag_offset + *size > max_size) { + *size = max_size - *offset; + pool->frag_offset = max_size; + } + + return page; +} + +static inline struct page *page_pool_dev_alloc(struct page_pool *pool, + unsigned int *offset, + unsigned int *size) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc(pool, offset, size, gfp); +} + +static inline void *page_pool_alloc_va(struct page_pool *pool, + unsigned int *size, gfp_t gfp) +{ + unsigned int offset; + struct page *page; + + /* Mask off __GFP_HIGHMEM to ensure we can use page_address() */ + page = page_pool_alloc(pool, &offset, size, gfp & ~__GFP_HIGHMEM); + if (unlikely(!page)) + return NULL; + + return page_address(page) + offset; +} + +static inline void *page_pool_dev_alloc_va(struct page_pool *pool, + unsigned int *size) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc_va(pool, size, gfp); +} + /** * page_pool_get_dma_dir() - Retrieve the stored DMA direction. * @pool: pool from which page was allocated @@ -221,6 +281,12 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, #define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \ (sizeof(dma_addr_t) > sizeof(unsigned long)) +static inline void page_pool_free_va(struct page_pool *pool, void *va, + bool allow_direct) +{ + page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct); +} + /** * page_pool_get_dma_addr() - Retrieve the stored DMA address. * @page: page allocated from a page pool diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 2a3671c97ca7..5e409b98aba0 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -809,7 +809,7 @@ static void page_pool_empty_ring(struct page_pool *pool) } } -static void page_pool_free(struct page_pool *pool) +static void __page_pool_destroy(struct page_pool *pool) { if (pool->disconnect) pool->disconnect(pool); @@ -860,7 +860,7 @@ static int page_pool_release(struct page_pool *pool) page_pool_scrub(pool); inflight = page_pool_inflight(pool); if (!inflight) - page_pool_free(pool); + __page_pool_destroy(pool); return inflight; } -- cgit v1.2.3 From 2def8ff3fdb66d10ebe3ec84787799ac0244eb23 Mon Sep 17 00:00:00 2001 From: Abel Wu Date: Thu, 19 Oct 2023 20:00:24 +0800 Subject: sock: Code cleanup on __sk_mem_raise_allocated() Code cleanup for both better simplicity and readability. No functional change intended. Signed-off-by: Abel Wu Acked-by: Shakeel Butt Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231019120026.42215-1-wuyun.abel@bytedance.com Signed-off-by: Paolo Abeni --- net/core/sock.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 290165954379..43842520db86 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3039,17 +3039,19 @@ EXPORT_SYMBOL(sk_wait_data); */ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { - bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg; + struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL; struct proto *prot = sk->sk_prot; - bool charged = true; + bool charged = false; long allocated; sk_memory_allocated_add(sk, amt); allocated = sk_memory_allocated(sk); - if (memcg_charge && - !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt, - gfp_memcg_charge()))) - goto suppress_allocation; + + if (memcg) { + if (!mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge())) + goto suppress_allocation; + charged = true; + } /* Under limit. */ if (allocated <= sk_prot_mem_limits(sk, 0)) { @@ -3104,8 +3106,8 @@ suppress_allocation: */ if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { /* Force charge with __GFP_NOFAIL */ - if (memcg_charge && !charged) { - mem_cgroup_charge_skmem(sk->sk_memcg, amt, + if (memcg && !charged) { + mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge() | __GFP_NOFAIL); } return 1; @@ -3117,8 +3119,8 @@ suppress_allocation: sk_memory_allocated_sub(sk, amt); - if (memcg_charge && charged) - mem_cgroup_uncharge_skmem(sk->sk_memcg, amt); + if (charged) + mem_cgroup_uncharge_skmem(memcg, amt); return 0; } -- cgit v1.2.3 From 2e12072c67b5f65fc71a569985a1262531fbdc06 Mon Sep 17 00:00:00 2001 From: Abel Wu Date: Thu, 19 Oct 2023 20:00:25 +0800 Subject: sock: Doc behaviors for pressure heurisitics There are now two accounting infrastructures for skmem, while the heuristics in __sk_mem_raise_allocated() were actually introduced before memcg was born. Add some comments to clarify whether they can be applied to both infrastructures or not. Suggested-by: Shakeel Butt Signed-off-by: Abel Wu Acked-by: Shakeel Butt Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231019120026.42215-2-wuyun.abel@bytedance.com Signed-off-by: Paolo Abeni --- net/core/sock.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 43842520db86..9f969e3c2ddf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3067,7 +3067,14 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) if (allocated > sk_prot_mem_limits(sk, 2)) goto suppress_allocation; - /* guarantee minimum buffer size under pressure */ + /* Guarantee minimum buffer size under pressure (either global + * or memcg) to make sure features described in RFC 7323 (TCP + * Extensions for High Performance) work properly. + * + * This rule does NOT stand when exceeds global or memcg's hard + * limit, or else a DoS attack can be taken place by spawning + * lots of sockets whose usage are under minimum buffer size. + */ if (kind == SK_MEM_RECV) { if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot)) return 1; @@ -3088,6 +3095,11 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) if (!sk_under_memory_pressure(sk)) return 1; + + /* Try to be fair among all the sockets under global + * pressure by allowing the ones that below average + * usage to raise. + */ alloc = sk_sockets_allocated_read_positive(sk); if (sk_prot_mem_limits(sk, 2) > alloc * sk_mem_pages(sk->sk_wmem_queued + -- cgit v1.2.3 From 66e6369e312d161708786123fb44ecd53ff32d82 Mon Sep 17 00:00:00 2001 From: Abel Wu Date: Thu, 19 Oct 2023 20:00:26 +0800 Subject: sock: Ignore memcg pressure heuristics when raising allocated Before sockets became aware of net-memcg's memory pressure since commit e1aab161e013 ("socket: initial cgroup code."), the memory usage would be granted to raise if below average even when under protocol's pressure. This provides fairness among the sockets of same protocol. That commit changes this because the heuristic will also be effective when only memcg is under pressure which makes no sense. So revert that behavior. After reverting, __sk_mem_raise_allocated() no longer considers memcg's pressure. As memcgs are isolated from each other w.r.t. memory accounting, consuming one's budget won't affect others. So except the places where buffer sizes are needed to be tuned, allow workloads to use the memory they are provisioned. Signed-off-by: Abel Wu Acked-by: Shakeel Butt Acked-by: Paolo Abeni Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231019120026.42215-3-wuyun.abel@bytedance.com Signed-off-by: Paolo Abeni --- net/core/sock.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 9f969e3c2ddf..1d28e3e87970 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3035,7 +3035,13 @@ EXPORT_SYMBOL(sk_wait_data); * @amt: pages to allocate * @kind: allocation type * - * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc + * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc. + * + * Unlike the globally shared limits among the sockets under same protocol, + * consuming the budget of a memcg won't have direct effect on other ones. + * So be optimistic about memcg's tolerance, and leave the callers to decide + * whether or not to raise allocated through sk_under_memory_pressure() or + * its variants. */ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { @@ -3093,7 +3099,11 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) if (sk_has_memory_pressure(sk)) { u64 alloc; - if (!sk_under_memory_pressure(sk)) + /* The following 'average' heuristic is within the + * scope of global accounting, so it only makes + * sense for global memory pressure. + */ + if (!sk_under_global_memory_pressure(sk)) return 1; /* Try to be fair among all the sockets under global -- cgit v1.2.3 From 99b29a499b5fdfb7ab274835b8e4d4c11df2f6d7 Mon Sep 17 00:00:00 2001 From: Albert Huang Date: Mon, 23 Oct 2023 20:57:31 +0800 Subject: xsk: Avoid starving the xsk further down the list In the previous implementation, when multiple xsk sockets were associated with a single xsk_buff_pool, a situation could arise where the xsk_tx_list maintained data at the front for one xsk socket while starving the xsk sockets at the back of the list. This could result in issues such as the inability to transmit packets, increased latency, and jitter. To address this problem, we introduce a new variable called tx_budget_spent, which limits each xsk to transmit a maximum of MAX_PER_SOCKET_BUDGET tx descriptors. This allocation ensures equitable opportunities for subsequent xsk sockets to send tx descriptors. The value of MAX_PER_SOCKET_BUDGET is set to 32. Signed-off-by: Albert Huang Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20231023125732.82261-1-huangjie.albert@bytedance.com --- include/net/xdp_sock.h | 7 +++++++ net/xdp/xsk.c | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'net') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 7dd0df2f6f8e..f83128007fb0 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -63,6 +63,13 @@ struct xdp_sock { struct xsk_queue *tx ____cacheline_aligned_in_smp; struct list_head tx_list; + /* record the number of tx descriptors sent by this xsk and + * when it exceeds MAX_PER_SOCKET_BUDGET, an opportunity needs + * to be given to other xsks for sending tx descriptors, thereby + * preventing other XSKs from being starved. + */ + u32 tx_budget_spent; + /* Protects generic receive. */ spinlock_t rx_lock; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ba070fd37d24..ae9f8cb611f6 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -33,6 +33,7 @@ #include "xsk.h" #define TX_BATCH_SIZE 32 +#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE) static DEFINE_PER_CPU(struct list_head, xskmap_flush_list); @@ -423,16 +424,25 @@ EXPORT_SYMBOL(xsk_tx_release); bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { + bool budget_exhausted = false; struct xdp_sock *xs; rcu_read_lock(); +again: list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { + if (xs->tx_budget_spent >= MAX_PER_SOCKET_BUDGET) { + budget_exhausted = true; + continue; + } + if (!xskq_cons_peek_desc(xs->tx, desc, pool)) { if (xskq_has_descs(xs->tx)) xskq_cons_release(xs->tx); continue; } + xs->tx_budget_spent++; + /* This is the backpressure mechanism for the Tx path. * Reserve space in the completion queue and only proceed * if there is space in it. This avoids having to implement @@ -446,6 +456,14 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) return true; } + if (budget_exhausted) { + list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) + xs->tx_budget_spent = 0; + + budget_exhausted = false; + goto again; + } + out: rcu_read_unlock(); return false; -- cgit v1.2.3 From 6d25d1dc76bf5943a5c1f4bb74d66d5eac58eb77 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Mon, 23 Oct 2023 14:47:29 +0800 Subject: net: sched: sch_qfq: Use non-work-conserving warning handler A helper function for printing non-work-conserving alarms is added in commit b00355db3f88 ("pkt_sched: sch_hfsc: sch_htb: Add non-work-conserving warning handler."). In this commit, use qdisc_warn_nonwc() instead of WARN_ONCE() to handle the non-work-conserving warning in qfq Qdisc. Signed-off-by: Liu Jian Link: https://lore.kernel.org/r/20231023064729.370649-1-liujian56@huawei.com Signed-off-by: Paolo Abeni --- net/sched/sch_qfq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 546c10adcacd..5598f8be18ae 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1003,7 +1003,7 @@ static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg, *cl = list_first_entry(&agg->active, struct qfq_class, alist); skb = (*cl)->qdisc->ops->peek((*cl)->qdisc); if (skb == NULL) - WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n"); + qdisc_warn_nonwc("qfq_dequeue", (*cl)->qdisc); else *len = qdisc_pkt_len(skb); -- cgit v1.2.3 From 8079fc30f79799e59d9602e7e080d434936a482d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 13 Oct 2023 14:18:15 +0200 Subject: netfilter: nft_set_rbtree: rename gc deactivate+erase function Next patch adds a cllaer that doesn't hold the priv->write lock and will need a similar function. Rename the existing function to make it clear that it can only be used for opportunistic gc during insertion. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index e34662f4a71e..d59be2bc6e6c 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -221,14 +221,15 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, return rbe; } -static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, - struct nft_rbtree *priv, - struct nft_rbtree_elem *rbe) +static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set, + struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe) { struct nft_set_elem elem = { .priv = rbe, }; + lockdep_assert_held_write(&priv->lock); nft_setelem_data_deactivate(net, set, &elem); rb_erase(&rbe->node, &priv->root); } @@ -263,7 +264,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, rbe_prev = NULL; if (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); - nft_rbtree_gc_remove(net, set, priv, rbe_prev); + nft_rbtree_gc_elem_remove(net, set, priv, rbe_prev); /* There is always room in this trans gc for this element, * memory allocation never actually happens, hence, the warning @@ -277,7 +278,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, nft_trans_gc_elem_add(gc, rbe_prev); } - nft_rbtree_gc_remove(net, set, priv, rbe); + nft_rbtree_gc_elem_remove(net, set, priv, rbe); gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); if (WARN_ON_ONCE(!gc)) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 7d259f021aaa78904b6c836d975e8e00d83a182a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 13 Oct 2023 14:18:16 +0200 Subject: netfilter: nft_set_rbtree: prefer sync gc to async worker There is no need for asynchronous garbage collection, rbtree inserts can only happen from the netlink control plane. We already perform on-demand gc on insertion, in the area of the tree where the insertion takes place, but we don't do a full tree walk there for performance reasons. Do a full gc walk at the end of the transaction instead and remove the async worker. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 124 +++++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index d59be2bc6e6c..7d1004f9e7d2 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -19,7 +19,7 @@ struct nft_rbtree { struct rb_root root; rwlock_t lock; seqcount_rwlock_t count; - struct delayed_work gc_work; + unsigned long last_gc; }; struct nft_rbtree_elem { @@ -48,8 +48,7 @@ static int nft_rbtree_cmp(const struct nft_set *set, static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe) { - return nft_set_elem_expired(&rbe->ext) || - nft_set_elem_is_dead(&rbe->ext); + return nft_set_elem_expired(&rbe->ext); } static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, @@ -508,6 +507,15 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, return err; } +static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe) +{ + write_lock_bh(&priv->lock); + write_seqcount_begin(&priv->count); + rb_erase(&rbe->node, &priv->root); + write_seqcount_end(&priv->count); + write_unlock_bh(&priv->lock); +} + static void nft_rbtree_remove(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) @@ -515,11 +523,7 @@ static void nft_rbtree_remove(const struct net *net, struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe = elem->priv; - write_lock_bh(&priv->lock); - write_seqcount_begin(&priv->count); - rb_erase(&rbe->node, &priv->root); - write_seqcount_end(&priv->count); - write_unlock_bh(&priv->lock); + nft_rbtree_erase(priv, rbe); } static void nft_rbtree_activate(const struct net *net, @@ -613,45 +617,40 @@ cont: read_unlock_bh(&priv->lock); } -static void nft_rbtree_gc(struct work_struct *work) +static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, + struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe) { + struct nft_set_elem elem = { + .priv = rbe, + }; + + nft_setelem_data_deactivate(net, set, &elem); + nft_rbtree_erase(priv, rbe); +} + +static void nft_rbtree_gc(struct nft_set *set) +{ + struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe, *rbe_end = NULL; struct nftables_pernet *nft_net; - struct nft_rbtree *priv; + struct rb_node *node, *next; struct nft_trans_gc *gc; - struct rb_node *node; - struct nft_set *set; - unsigned int gc_seq; struct net *net; - priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); net = read_pnet(&set->net); nft_net = nft_pernet(net); - gc_seq = READ_ONCE(nft_net->gc_seq); - if (nft_set_gc_is_pending(set)) - goto done; - - gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); if (!gc) - goto done; - - read_lock_bh(&priv->lock); - for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + return; - /* Ruleset has been updated, try later. */ - if (READ_ONCE(nft_net->gc_seq) != gc_seq) { - nft_trans_gc_destroy(gc); - gc = NULL; - goto try_later; - } + for (node = rb_first(&priv->root); node ; node = next) { + next = rb_next(node); rbe = rb_entry(node, struct nft_rbtree_elem, node); - if (nft_set_elem_is_dead(&rbe->ext)) - goto dead_elem; - /* elements are reversed in the rbtree for historical reasons, * from highest to lowest value, that is why end element is * always visited before the start element. @@ -663,37 +662,34 @@ static void nft_rbtree_gc(struct work_struct *work) if (!nft_set_elem_expired(&rbe->ext)) continue; - nft_set_elem_dead(&rbe->ext); - - if (!rbe_end) - continue; - - nft_set_elem_dead(&rbe_end->ext); - - gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) goto try_later; - nft_trans_gc_elem_add(gc, rbe_end); - rbe_end = NULL; -dead_elem: - gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + /* end element needs to be removed first, it has + * no timeout extension. + */ + if (rbe_end) { + nft_rbtree_gc_remove(net, set, priv, rbe_end); + nft_trans_gc_elem_add(gc, rbe_end); + rbe_end = NULL; + } + + gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) goto try_later; + nft_rbtree_gc_remove(net, set, priv, rbe); nft_trans_gc_elem_add(gc, rbe); } - gc = nft_trans_gc_catchall_async(gc, gc_seq); - try_later: - read_unlock_bh(&priv->lock); - if (gc) - nft_trans_gc_queue_async_done(gc); -done: - queue_delayed_work(system_power_efficient_wq, &priv->gc_work, - nft_set_gc_interval(set)); + if (gc) { + gc = nft_trans_gc_catchall_sync(gc); + nft_trans_gc_queue_sync_done(gc); + priv->last_gc = jiffies; + } } static u64 nft_rbtree_privsize(const struct nlattr * const nla[], @@ -712,11 +708,6 @@ static int nft_rbtree_init(const struct nft_set *set, seqcount_rwlock_init(&priv->count, &priv->lock); priv->root = RB_ROOT; - INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc); - if (set->flags & NFT_SET_TIMEOUT) - queue_delayed_work(system_power_efficient_wq, &priv->gc_work, - nft_set_gc_interval(set)); - return 0; } @@ -727,8 +718,6 @@ static void nft_rbtree_destroy(const struct nft_ctx *ctx, struct nft_rbtree_elem *rbe; struct rb_node *node; - cancel_delayed_work_sync(&priv->gc_work); - rcu_barrier(); while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); @@ -754,6 +743,21 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, return true; } +static void nft_rbtree_commit(struct nft_set *set) +{ + struct nft_rbtree *priv = nft_set_priv(set); + + if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set))) + nft_rbtree_gc(set); +} + +static void nft_rbtree_gc_init(const struct nft_set *set) +{ + struct nft_rbtree *priv = nft_set_priv(set); + + priv->last_gc = jiffies; +} + const struct nft_set_type nft_set_rbtree_type = { .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .ops = { @@ -767,6 +771,8 @@ const struct nft_set_type nft_set_rbtree_type = { .deactivate = nft_rbtree_deactivate, .flush = nft_rbtree_flush, .activate = nft_rbtree_activate, + .commit = nft_rbtree_commit, + .gc_init = nft_rbtree_gc_init, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, .get = nft_rbtree_get, -- cgit v1.2.3 From 8877393029e764036892d39614900987cbd21ca6 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 19 Oct 2023 16:03:34 +0200 Subject: netfilter: nf_tables: Open-code audit log call in nf_tables_getrule() The table lookup will be dropped from that function, so remove that dependency from audit logging code. Using whatever is in nla[NFTA_RULE_TABLE] is sufficient as long as the previous rule info filling succeded. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 38f9b224098e..ce3bb38262c4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3589,15 +3589,18 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb) static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { + struct nftables_pernet *nft_net = nft_pernet(info->net); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; + u32 portid = NETLINK_CB(skb).portid; const struct nft_chain *chain; const struct nft_rule *rule; struct net *net = info->net; struct nft_table *table; struct sk_buff *skb2; bool reset = false; + char *buf; int err; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { @@ -3637,16 +3640,24 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) reset = true; - err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid, + err = nf_tables_fill_rule_info(skb2, net, portid, info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, family, table, chain, rule, 0, reset); if (err < 0) goto err_fill_rule_info; - if (reset) - audit_log_rule_reset(table, nft_pernet(net)->base_seq, 1); + if (!reset) + return nfnetlink_unicast(skb2, net, portid); - return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + buf = kasprintf(GFP_ATOMIC, "%.*s:%u", + nla_len(nla[NFTA_RULE_TABLE]), + (char *)nla_data(nla[NFTA_RULE_TABLE]), + nft_net->base_seq); + audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, + AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC); + kfree(buf); + + return nfnetlink_unicast(skb2, net, portid); err_fill_rule_info: kfree_skb(skb2); -- cgit v1.2.3 From 1578c32877191815f631af32ba5dfc1f1b20c1b4 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 19 Oct 2023 16:03:35 +0200 Subject: netfilter: nf_tables: Introduce nf_tables_getrule_single() Outsource the reply skb preparation for non-dump getrule requests into a distinct function. Prep work for rule reset locking. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 74 +++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ce3bb38262c4..d39990c3ae1d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3586,65 +3586,81 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb) } /* called with rcu_read_lock held */ -static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, - const struct nlattr * const nla[]) +static struct sk_buff * +nf_tables_getrule_single(u32 portid, const struct nfnl_info *info, + const struct nlattr * const nla[], bool reset) { - struct nftables_pernet *nft_net = nft_pernet(info->net); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; - u32 portid = NETLINK_CB(skb).portid; const struct nft_chain *chain; const struct nft_rule *rule; struct net *net = info->net; struct nft_table *table; struct sk_buff *skb2; - bool reset = false; - char *buf; int err; - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .start= nf_tables_dump_rules_start, - .dump = nf_tables_dump_rules, - .done = nf_tables_dump_rules_done, - .module = THIS_MODULE, - .data = (void *)nla, - }; - - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); - } - table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]); - return PTR_ERR(table); + return ERR_CAST(table); } chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask); if (IS_ERR(chain)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); - return PTR_ERR(chain); + return ERR_CAST(chain); } rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); - return PTR_ERR(rule); + return ERR_CAST(rule); } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) - return -ENOMEM; - - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) - reset = true; + return ERR_PTR(-ENOMEM); err = nf_tables_fill_rule_info(skb2, net, portid, info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, family, table, chain, rule, 0, reset); - if (err < 0) - goto err_fill_rule_info; + if (err < 0) { + kfree_skb(skb2); + return ERR_PTR(err); + } + + return skb2; +} + +static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct nftables_pernet *nft_net = nft_pernet(info->net); + u32 portid = NETLINK_CB(skb).portid; + struct net *net = info->net; + struct sk_buff *skb2; + bool reset = false; + char *buf; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start= nf_tables_dump_rules_start, + .dump = nf_tables_dump_rules, + .done = nf_tables_dump_rules_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) + reset = true; + + skb2 = nf_tables_getrule_single(portid, info, nla, reset); + if (IS_ERR(skb2)) + return PTR_ERR(skb2); if (!reset) return nfnetlink_unicast(skb2, net, portid); @@ -3658,10 +3674,6 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, kfree(buf); return nfnetlink_unicast(skb2, net, portid); - -err_fill_rule_info: - kfree_skb(skb2); - return err; } void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) -- cgit v1.2.3 From 3cb03edb4de33fd04c4ea55f47397b96a8657c53 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 19 Oct 2023 16:03:36 +0200 Subject: netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests Rule reset is not concurrency-safe per-se, so multiple CPUs may reset the same rule at the same time. At least counter and quota expressions will suffer from value underruns in this case. Prevent this by introducing dedicated locking callbacks for nfnetlink and the asynchronous dump handling to serialize access. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 77 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d39990c3ae1d..03a306d15f43 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3551,6 +3551,23 @@ done: return skb->len; } +static int nf_tables_dumpreset_rules(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); + int ret; + + /* Mutex is held is to prevent that two concurrent dump-and-reset calls + * do not underrun counters and quotas. The commit_mutex is used for + * the lack a better lock, this is not transaction path. + */ + mutex_lock(&nft_net->commit_mutex); + ret = nf_tables_dump_rules(skb, cb); + mutex_unlock(&nft_net->commit_mutex); + + return ret; +} + static int nf_tables_dump_rules_start(struct netlink_callback *cb) { struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; @@ -3570,12 +3587,18 @@ static int nf_tables_dump_rules_start(struct netlink_callback *cb) return -ENOMEM; } } - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) - ctx->reset = true; - return 0; } +static int nf_tables_dumpreset_rules_start(struct netlink_callback *cb) +{ + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; + + ctx->reset = true; + + return nf_tables_dump_rules_start(cb); +} + static int nf_tables_dump_rules_done(struct netlink_callback *cb) { struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; @@ -3636,12 +3659,9 @@ nf_tables_getrule_single(u32 portid, const struct nfnl_info *info, static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - struct nftables_pernet *nft_net = nft_pernet(info->net); u32 portid = NETLINK_CB(skb).portid; struct net *net = info->net; struct sk_buff *skb2; - bool reset = false; - char *buf; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -3655,15 +3675,46 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) - reset = true; - - skb2 = nf_tables_getrule_single(portid, info, nla, reset); + skb2 = nf_tables_getrule_single(portid, info, nla, false); if (IS_ERR(skb2)) return PTR_ERR(skb2); - if (!reset) - return nfnetlink_unicast(skb2, net, portid); + return nfnetlink_unicast(skb2, net, portid); +} + +static int nf_tables_getrule_reset(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct nftables_pernet *nft_net = nft_pernet(info->net); + u32 portid = NETLINK_CB(skb).portid; + struct net *net = info->net; + struct sk_buff *skb2; + char *buf; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start= nf_tables_dumpreset_rules_start, + .dump = nf_tables_dumpreset_rules, + .done = nf_tables_dump_rules_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + rcu_read_unlock(); + mutex_lock(&nft_net->commit_mutex); + skb2 = nf_tables_getrule_single(portid, info, nla, true); + mutex_unlock(&nft_net->commit_mutex); + rcu_read_lock(); + module_put(THIS_MODULE); + + if (IS_ERR(skb2)) + return PTR_ERR(skb2); buf = kasprintf(GFP_ATOMIC, "%.*s:%u", nla_len(nla[NFTA_RULE_TABLE]), @@ -8995,7 +9046,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_rule_policy, }, [NFT_MSG_GETRULE_RESET] = { - .call = nf_tables_getrule, + .call = nf_tables_getrule_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, -- cgit v1.2.3 From ee6f05dcd6727669b6f49a8a6dafad94a40ee872 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 20 Oct 2023 13:14:25 +0200 Subject: br_netfilter: use single forward hook for ip and arp br_netfilter registers two forward hooks, one for ip and one for arp. Just use a common function for both and then call the arp/ip helper as needed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 72 +++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 4c0c9f838f5c..6adcb45bca75 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -570,18 +570,12 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff } -/* This is the 'purely bridged' case. For IP, we pass the packet to - * netfilter with indev and outdev set to the bridge device, - * but we are still able to filter on the 'real' indev/outdev - * because of the physdev module. For ARP, indev and outdev are the - * bridge ports. */ -static unsigned int br_nf_forward_ip(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) +static unsigned int br_nf_forward_ip(struct sk_buff *skb, + const struct nf_hook_state *state, + u8 pf) { struct nf_bridge_info *nf_bridge; struct net_device *parent; - u_int8_t pf; nf_bridge = nf_bridge_info_get(skb); if (!nf_bridge) @@ -600,15 +594,6 @@ static unsigned int br_nf_forward_ip(void *priv, if (!parent) return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); - if (IS_IP(skb) || is_vlan_ip(skb, state->net) || - is_pppoe_ip(skb, state->net)) - pf = NFPROTO_IPV4; - else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || - is_pppoe_ipv6(skb, state->net)) - pf = NFPROTO_IPV6; - else - return NF_ACCEPT; - nf_bridge_pull_encap_header(skb); if (skb->pkt_type == PACKET_OTHERHOST) { @@ -620,19 +605,18 @@ static unsigned int br_nf_forward_ip(void *priv, if (br_validate_ipv4(state->net, skb)) return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; - } - - if (pf == NFPROTO_IPV6) { + skb->protocol = htons(ETH_P_IP); + } else if (pf == NFPROTO_IPV6) { if (br_validate_ipv6(state->net, skb)) return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + skb->protocol = htons(ETH_P_IPV6); + } else { + WARN_ON_ONCE(1); + return NF_DROP; } nf_bridge->physoutdev = skb->dev; - if (pf == NFPROTO_IPV4) - skb->protocol = htons(ETH_P_IP); - else - skb->protocol = htons(ETH_P_IPV6); NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, brnf_get_logical_dev(skb, state->in, state->net), @@ -641,8 +625,7 @@ static unsigned int br_nf_forward_ip(void *priv, return NF_STOLEN; } -static unsigned int br_nf_forward_arp(void *priv, - struct sk_buff *skb, +static unsigned int br_nf_forward_arp(struct sk_buff *skb, const struct nf_hook_state *state) { struct net_bridge_port *p; @@ -659,11 +642,8 @@ static unsigned int br_nf_forward_arp(void *priv, if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES)) return NF_ACCEPT; - if (!IS_ARP(skb)) { - if (!is_vlan_arp(skb, state->net)) - return NF_ACCEPT; + if (is_vlan_arp(skb, state->net)) nf_bridge_pull_encap_header(skb); - } if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0); @@ -680,6 +660,28 @@ static unsigned int br_nf_forward_arp(void *priv, return NF_STOLEN; } +/* This is the 'purely bridged' case. For IP, we pass the packet to + * netfilter with indev and outdev set to the bridge device, + * but we are still able to filter on the 'real' indev/outdev + * because of the physdev module. For ARP, indev and outdev are the + * bridge ports. + */ +static unsigned int br_nf_forward(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + if (IS_IP(skb) || is_vlan_ip(skb, state->net) || + is_pppoe_ip(skb, state->net)) + return br_nf_forward_ip(skb, state, NFPROTO_IPV4); + if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || + is_pppoe_ipv6(skb, state->net)) + return br_nf_forward_ip(skb, state, NFPROTO_IPV6); + if (IS_ARP(skb) || is_vlan_arp(skb, state->net)) + return br_nf_forward_arp(skb, state); + + return NF_ACCEPT; +} + static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { struct brnf_frag_data *data; @@ -937,13 +939,7 @@ static const struct nf_hook_ops br_nf_ops[] = { .priority = NF_BR_PRI_BRNF, }, { - .hook = br_nf_forward_ip, - .pf = NFPROTO_BRIDGE, - .hooknum = NF_BR_FORWARD, - .priority = NF_BR_PRI_BRNF - 1, - }, - { - .hook = br_nf_forward_arp, + .hook = br_nf_forward, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_BRNF, -- cgit v1.2.3 From 643d1260366424412e8269caead410d333e3263f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 20 Oct 2023 14:38:15 +0200 Subject: netfilter: conntrack: switch connlabels to atomic_t The spinlock is back from the day when connabels did not have a fixed size and reallocation had to be supported. Remove it. This change also allows to call the helpers from softirq or timers without deadlocks. Also add WARN()s to catch refcounting imbalances. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 2 +- include/net/netns/conntrack.h | 2 +- net/netfilter/nf_conntrack_labels.c | 17 ++++++++--------- 3 files changed, 10 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index fcb19a4e8f2b..6903f72bcc15 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -39,7 +39,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) #ifdef CONFIG_NF_CONNTRACK_LABELS struct net *net = nf_ct_net(ct); - if (net->ct.labels_used == 0) + if (atomic_read(&net->ct.labels_used) == 0) return NULL; return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 1f463b3957c7..bae914815aa3 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -107,7 +107,7 @@ struct netns_ct { struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_ip_net nf_ct_proto; #if defined(CONFIG_NF_CONNTRACK_LABELS) - unsigned int labels_used; + atomic_t labels_used; #endif }; #endif diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 6e70e137a0a6..6c46aad23313 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -11,8 +11,6 @@ #include #include -static DEFINE_SPINLOCK(nf_connlabels_lock); - static int replace_u32(u32 *address, u32 mask, u32 new) { u32 old, tmp; @@ -60,23 +58,24 @@ EXPORT_SYMBOL_GPL(nf_connlabels_replace); int nf_connlabels_get(struct net *net, unsigned int bits) { + int v; + if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long)) return -ERANGE; - spin_lock(&nf_connlabels_lock); - net->ct.labels_used++; - spin_unlock(&nf_connlabels_lock); - BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX); + v = atomic_inc_return_relaxed(&net->ct.labels_used); + WARN_ON_ONCE(v <= 0); + return 0; } EXPORT_SYMBOL_GPL(nf_connlabels_get); void nf_connlabels_put(struct net *net) { - spin_lock(&nf_connlabels_lock); - net->ct.labels_used--; - spin_unlock(&nf_connlabels_lock); + int v = atomic_dec_return_relaxed(&net->ct.labels_used); + + WARN_ON_ONCE(v < 0); } EXPORT_SYMBOL_GPL(nf_connlabels_put); -- cgit v1.2.3 From ff16111cc10c82ee065ffbd9fa8d6210394ff8c6 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 20 Oct 2023 19:34:28 +0200 Subject: netfilter: nf_tables: Drop pointless memset in nf_tables_dump_obj The code does not make use of cb->args fields past the first one, no need to zero them. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 03a306d15f43..fa216d1cfb74 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7717,9 +7717,6 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) goto cont; if (idx < s_idx) goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); if (filter && filter->table && strcmp(filter->table, table->name)) goto cont; -- cgit v1.2.3 From 4279cc60b354d2d2b970655a70a151cbfa1d958b Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 20 Oct 2023 19:34:29 +0200 Subject: netfilter: nf_tables: Unconditionally allocate nft_obj_filter Prep work for moving the filter into struct netlink_callback's scratch area. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fa216d1cfb74..e2e0586307f5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7717,11 +7717,9 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) goto cont; if (idx < s_idx) goto cont; - if (filter && filter->table && - strcmp(filter->table, table->name)) + if (filter->table && strcmp(filter->table, table->name)) goto cont; - if (filter && - filter->type != NFT_OBJECT_UNSPEC && + if (filter->type != NFT_OBJECT_UNSPEC && obj->ops->type->type != filter->type) goto cont; @@ -7756,23 +7754,21 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb) const struct nlattr * const *nla = cb->data; struct nft_obj_filter *filter = NULL; - if (nla[NFTA_OBJ_TABLE] || nla[NFTA_OBJ_TYPE]) { - filter = kzalloc(sizeof(*filter), GFP_ATOMIC); - if (!filter) - return -ENOMEM; + filter = kzalloc(sizeof(*filter), GFP_ATOMIC); + if (!filter) + return -ENOMEM; - if (nla[NFTA_OBJ_TABLE]) { - filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); - if (!filter->table) { - kfree(filter); - return -ENOMEM; - } + if (nla[NFTA_OBJ_TABLE]) { + filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); + if (!filter->table) { + kfree(filter); + return -ENOMEM; } - - if (nla[NFTA_OBJ_TYPE]) - filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); } + if (nla[NFTA_OBJ_TYPE]) + filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + cb->data = filter; return 0; } @@ -7781,10 +7777,8 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) { struct nft_obj_filter *filter = cb->data; - if (filter) { - kfree(filter->table); - kfree(filter); - } + kfree(filter->table); + kfree(filter); return 0; } -- cgit v1.2.3 From ecf49cad807061d880bea27a5da8e0114ddc7690 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 20 Oct 2023 19:34:30 +0200 Subject: netfilter: nf_tables: A better name for nft_obj_filter Name it for what it is supposed to become, a real nft_obj_dump_ctx. No functional change intended. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e2e0586307f5..2b81069ea3f6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7681,7 +7681,7 @@ static void audit_log_obj_reset(const struct nft_table *table, kfree(buf); } -struct nft_obj_filter { +struct nft_obj_dump_ctx { char *table; u32 type; }; @@ -7691,7 +7691,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; - struct nft_obj_filter *filter = cb->data; + struct nft_obj_dump_ctx *ctx = cb->data; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nftables_pernet *nft_net; @@ -7717,10 +7717,10 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) goto cont; if (idx < s_idx) goto cont; - if (filter->table && strcmp(filter->table, table->name)) + if (ctx->table && strcmp(ctx->table, table->name)) goto cont; - if (filter->type != NFT_OBJECT_UNSPEC && - obj->ops->type->type != filter->type) + if (ctx->type != NFT_OBJECT_UNSPEC && + obj->ops->type->type != ctx->type) goto cont; rc = nf_tables_fill_obj_info(skb, net, @@ -7752,33 +7752,33 @@ cont: static int nf_tables_dump_obj_start(struct netlink_callback *cb) { const struct nlattr * const *nla = cb->data; - struct nft_obj_filter *filter = NULL; + struct nft_obj_dump_ctx *ctx = NULL; - filter = kzalloc(sizeof(*filter), GFP_ATOMIC); - if (!filter) + ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); + if (!ctx) return -ENOMEM; if (nla[NFTA_OBJ_TABLE]) { - filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); - if (!filter->table) { - kfree(filter); + ctx->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); + if (!ctx->table) { + kfree(ctx); return -ENOMEM; } } if (nla[NFTA_OBJ_TYPE]) - filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - cb->data = filter; + cb->data = ctx; return 0; } static int nf_tables_dump_obj_done(struct netlink_callback *cb) { - struct nft_obj_filter *filter = cb->data; + struct nft_obj_dump_ctx *ctx = cb->data; - kfree(filter->table); - kfree(filter); + kfree(ctx->table); + kfree(ctx); return 0; } -- cgit v1.2.3 From 2eda95cfa2fc43bcb21a801dc1d16a0b7cc73860 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 20 Oct 2023 19:34:31 +0200 Subject: netfilter: nf_tables: Carry s_idx in nft_obj_dump_ctx Prep work for moving the context into struct netlink_callback scratch area. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2b81069ea3f6..3585ddd99ef8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7682,6 +7682,7 @@ static void audit_log_obj_reset(const struct nft_table *table, } struct nft_obj_dump_ctx { + unsigned int s_idx; char *table; u32 type; }; @@ -7689,14 +7690,14 @@ struct nft_obj_dump_ctx { static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - const struct nft_table *table; - unsigned int idx = 0, s_idx = cb->args[0]; struct nft_obj_dump_ctx *ctx = cb->data; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nftables_pernet *nft_net; + const struct nft_table *table; unsigned int entries = 0; struct nft_object *obj; + unsigned int idx = 0; bool reset = false; int rc = 0; @@ -7715,7 +7716,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry_rcu(obj, &table->objects, list) { if (!nft_is_active(net, obj)) goto cont; - if (idx < s_idx) + if (idx < ctx->s_idx) goto cont; if (ctx->table && strcmp(ctx->table, table->name)) goto cont; @@ -7745,7 +7746,7 @@ cont: } rcu_read_unlock(); - cb->args[0] = idx; + ctx->s_idx = idx; return skb->len; } -- cgit v1.2.3 From 5a893b9cdf6fa5758f43d323a1d7fa6d1bf489ff Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 20 Oct 2023 19:34:32 +0200 Subject: netfilter: nf_tables: nft_obj_filter fits into cb->ctx No need to allocate it if one may just use struct netlink_callback's scratch area for it. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3585ddd99ef8..c84e2cc6d3b3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7690,7 +7690,7 @@ struct nft_obj_dump_ctx { static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - struct nft_obj_dump_ctx *ctx = cb->data; + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nftables_pernet *nft_net; @@ -7752,34 +7752,28 @@ cont: static int nf_tables_dump_obj_start(struct netlink_callback *cb) { + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; const struct nlattr * const *nla = cb->data; - struct nft_obj_dump_ctx *ctx = NULL; - ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); - if (!ctx) - return -ENOMEM; + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); if (nla[NFTA_OBJ_TABLE]) { ctx->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); - if (!ctx->table) { - kfree(ctx); + if (!ctx->table) return -ENOMEM; - } } if (nla[NFTA_OBJ_TYPE]) ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - cb->data = ctx; return 0; } static int nf_tables_dump_obj_done(struct netlink_callback *cb) { - struct nft_obj_dump_ctx *ctx = cb->data; + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; kfree(ctx->table); - kfree(ctx); return 0; } -- cgit v1.2.3 From a552339063d37b3b1133d9dfc31f851edafb27bb Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 20 Oct 2023 19:34:33 +0200 Subject: netfilter: nf_tables: Carry reset boolean in nft_obj_dump_ctx Relieve the dump callback from having to inspect nlmsg_type upon each call, just do it once at start of the dump. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c84e2cc6d3b3..ecb251f6c6a6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7685,6 +7685,7 @@ struct nft_obj_dump_ctx { unsigned int s_idx; char *table; u32 type; + bool reset; }; static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) @@ -7698,12 +7699,8 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) unsigned int entries = 0; struct nft_object *obj; unsigned int idx = 0; - bool reset = false; int rc = 0; - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) - reset = true; - rcu_read_lock(); nft_net = nft_pernet(net); cb->seq = READ_ONCE(nft_net->base_seq); @@ -7730,7 +7727,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) NFT_MSG_NEWOBJ, NLM_F_MULTI | NLM_F_APPEND, table->family, table, - obj, reset); + obj, ctx->reset); if (rc < 0) break; @@ -7739,7 +7736,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) cont: idx++; } - if (reset && entries) + if (ctx->reset && entries) audit_log_obj_reset(table, nft_net->base_seq, entries); if (rc < 0) break; @@ -7766,6 +7763,9 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb) if (nla[NFTA_OBJ_TYPE]) ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) + ctx->reset = true; + return 0; } -- cgit v1.2.3 From 26cec9d4144eb23c45cd5c033d5c141f04d61a9c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 Oct 2023 22:20:10 +0200 Subject: netfilter: nft_set_pipapo: no need to call pipapo_deactivate() from flush Use the element object that is already offered instead. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 75a9dee353e2..bea63aa2df4b 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1814,8 +1814,9 @@ static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set, { struct nft_pipapo_elem *e = elem; - return pipapo_deactivate(net, set, (const u8 *)nft_set_ext_key(&e->ext), - &e->ext); + nft_set_elem_change_active(net, set, &e->ext); + + return true; } /** -- cgit v1.2.3 From 6509a2e410c3cb36c78a0a85c6102debe171337e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 Oct 2023 22:20:23 +0200 Subject: netfilter: nf_tables: set backend .flush always succeeds .flush is always successful since this results from iterating over the set elements to toggle mark the element as inactive in the next generation. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 9 +-------- net/netfilter/nft_set_bitmap.c | 4 +--- net/netfilter/nft_set_hash.c | 7 ++----- net/netfilter/nft_set_pipapo.c | 4 +--- net/netfilter/nft_set_rbtree.c | 4 +--- 6 files changed, 7 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8de040d2d2cf..d0f5c477c254 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -449,7 +449,7 @@ struct nft_set_ops { void * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); - bool (*flush)(const struct net *net, + void (*flush)(const struct net *net, const struct nft_set *set, void *priv); void (*remove)(const struct net *net, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ecb251f6c6a6..35db40857bc6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7146,17 +7146,13 @@ static int nft_setelem_flush(const struct nft_ctx *ctx, struct nft_set_elem *elem) { struct nft_trans *trans; - int err; trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, sizeof(struct nft_trans_elem), GFP_ATOMIC); if (!trans) return -ENOMEM; - if (!set->ops->flush(ctx->net, set, elem->priv)) { - err = -ENOENT; - goto err1; - } + set->ops->flush(ctx->net, set, elem->priv); set->ndeact++; nft_setelem_data_deactivate(ctx->net, set, elem); @@ -7165,9 +7161,6 @@ static int nft_setelem_flush(const struct nft_ctx *ctx, nft_trans_commit_list_add_tail(ctx->net, trans); return 0; -err1: - kfree(trans); - return err; } static int __nft_set_catchall_flush(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 1e5e7a181e0b..2ee6e3672b41 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -174,7 +174,7 @@ static void nft_bitmap_activate(const struct net *net, nft_set_elem_change_active(net, set, &be->ext); } -static bool nft_bitmap_flush(const struct net *net, +static void nft_bitmap_flush(const struct net *net, const struct nft_set *set, void *_be) { struct nft_bitmap *priv = nft_set_priv(set); @@ -186,8 +186,6 @@ static bool nft_bitmap_flush(const struct net *net, /* Enter 10 state, similar to deactivation. */ priv->bitmap[idx] &= ~(genmask << off); nft_set_elem_change_active(net, set, &be->ext); - - return true; } static void *nft_bitmap_deactivate(const struct net *net, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 2013de934cef..e758b887ad86 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -192,14 +192,12 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set, nft_set_elem_change_active(net, set, &he->ext); } -static bool nft_rhash_flush(const struct net *net, +static void nft_rhash_flush(const struct net *net, const struct nft_set *set, void *priv) { struct nft_rhash_elem *he = priv; nft_set_elem_change_active(net, set, &he->ext); - - return true; } static void *nft_rhash_deactivate(const struct net *net, @@ -590,13 +588,12 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set, nft_set_elem_change_active(net, set, &he->ext); } -static bool nft_hash_flush(const struct net *net, +static void nft_hash_flush(const struct net *net, const struct nft_set *set, void *priv) { struct nft_hash_elem *he = priv; nft_set_elem_change_active(net, set, &he->ext); - return true; } static void *nft_hash_deactivate(const struct net *net, diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index bea63aa2df4b..dba073aa9ad6 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1809,14 +1809,12 @@ static void *nft_pipapo_deactivate(const struct net *net, * * Return: true if element was found and deactivated. */ -static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set, +static void nft_pipapo_flush(const struct net *net, const struct nft_set *set, void *elem) { struct nft_pipapo_elem *e = elem; nft_set_elem_change_active(net, set, &e->ext); - - return true; } /** diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 7d1004f9e7d2..60ff591eb265 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -535,14 +535,12 @@ static void nft_rbtree_activate(const struct net *net, nft_set_elem_change_active(net, set, &rbe->ext); } -static bool nft_rbtree_flush(const struct net *net, +static void nft_rbtree_flush(const struct net *net, const struct nft_set *set, void *priv) { struct nft_rbtree_elem *rbe = priv; nft_set_elem_change_active(net, set, &rbe->ext); - - return true; } static void *nft_rbtree_deactivate(const struct net *net, -- cgit v1.2.3 From 9dad402b89e81a0516bad5e0ac009b7a0a80898f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 Oct 2023 22:23:07 +0200 Subject: netfilter: nf_tables: expose opaque set element as struct nft_elem_priv Add placeholder structure and place it at the beginning of each struct nft_*_elem for each existing set backend, instead of exposing elements as void type to the frontend which defeats compiler type checks. Use this pointer to this new type to replace void *. This patch updates the following set backend API to use this new struct nft_elem_priv placeholder structure: - update - deactivate - flush - get as well as the following helper functions: - nft_set_elem_ext() - nft_set_elem_init() - nft_set_elem_destroy() - nf_tables_set_elem_destroy() This patch adds nft_elem_priv_cast() to cast struct nft_elem_priv to native element representation from the corresponding set backend. BUILD_BUG_ON() makes sure this .priv placeholder is always at the top of the opaque set element representation. Suggested-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 38 ++++++++++++------- net/netfilter/nf_tables_api.c | 27 +++++++------ net/netfilter/nft_dynset.c | 23 +++++------ net/netfilter/nft_set_bitmap.c | 35 +++++++++-------- net/netfilter/nft_set_hash.c | 80 ++++++++++++++++++++++----------------- net/netfilter/nft_set_pipapo.c | 41 ++++++++++++-------- net/netfilter/nft_set_pipapo.h | 4 +- net/netfilter/nft_set_rbtree.c | 46 ++++++++++++---------- 8 files changed, 173 insertions(+), 121 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d0f5c477c254..d287a778be65 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -274,6 +274,9 @@ struct nft_userdata { unsigned char data[]; }; +/* placeholder structure for opaque set element backend representation. */ +struct nft_elem_priv { }; + /** * struct nft_set_elem - generic representation of set elements * @@ -294,9 +297,14 @@ struct nft_set_elem { u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; struct nft_data val; } data; - void *priv; + struct nft_elem_priv *priv; }; +static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) +{ + return (void *)priv; +} + struct nft_set; struct nft_set_iter { u8 genmask; @@ -430,7 +438,8 @@ struct nft_set_ops { const struct nft_set_ext **ext); bool (*update)(struct nft_set *set, const u32 *key, - void *(*new)(struct nft_set *, + struct nft_elem_priv * + (*new)(struct nft_set *, const struct nft_expr *, struct nft_regs *), const struct nft_expr *expr, @@ -446,19 +455,19 @@ struct nft_set_ops { void (*activate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); - void * (*deactivate)(const struct net *net, + struct nft_elem_priv * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); void (*flush)(const struct net *net, const struct nft_set *set, - void *priv); + struct nft_elem_priv *priv); void (*remove)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter); - void * (*get)(const struct net *net, + struct nft_elem_priv * (*get)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags); @@ -796,9 +805,9 @@ static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) } static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, - void *elem) + const struct nft_elem_priv *elem_priv) { - return elem + set->ops->elemsize; + return (void *)elem_priv + set->ops->elemsize; } static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) @@ -810,16 +819,19 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, const struct nft_set *set, const struct nlattr *attr); -void *nft_set_elem_init(const struct nft_set *set, - const struct nft_set_ext_tmpl *tmpl, - const u32 *key, const u32 *key_end, const u32 *data, - u64 timeout, u64 expiration, gfp_t gfp); +struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const u32 *key, const u32 *key_end, + const u32 *data, + u64 timeout, u64 expiration, gfp_t gfp); int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[]); -void nft_set_elem_destroy(const struct nft_set *set, void *elem, +void nft_set_elem_destroy(const struct nft_set *set, + const struct nft_elem_priv *elem_priv, bool destroy_expr); void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem); + const struct nft_set *set, + const struct nft_elem_priv *elem_priv); struct nft_expr_ops; /** diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 35db40857bc6..2ae81452113a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -601,7 +601,7 @@ static int nft_mapelem_deactivate(const struct nft_ctx *ctx, struct nft_set_elem_catchall { struct list_head list; struct rcu_head rcu; - void *elem; + struct nft_elem_priv *elem; }; static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, @@ -6218,10 +6218,11 @@ static int nft_set_ext_memcpy(const struct nft_set_ext_tmpl *tmpl, u8 id, return 0; } -void *nft_set_elem_init(const struct nft_set *set, - const struct nft_set_ext_tmpl *tmpl, - const u32 *key, const u32 *key_end, - const u32 *data, u64 timeout, u64 expiration, gfp_t gfp) +struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const u32 *key, const u32 *key_end, + const u32 *data, + u64 timeout, u64 expiration, gfp_t gfp) { struct nft_set_ext *ext; void *elem; @@ -6286,10 +6287,11 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, } /* Drop references and destroy. Called from gc, dynset and abort path. */ -void nft_set_elem_destroy(const struct nft_set *set, void *elem, +void nft_set_elem_destroy(const struct nft_set *set, + const struct nft_elem_priv *elem_priv, bool destroy_expr) { - struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_ctx ctx = { .net = read_pnet(&set->net), .family = set->table->family, @@ -6300,10 +6302,10 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, nft_data_release(nft_set_ext_data(ext), set->dtype); if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext)); - if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) nft_use_dec(&(*nft_set_ext_obj(ext))->use); - kfree(elem); + + kfree(elem_priv); } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); @@ -6311,14 +6313,15 @@ EXPORT_SYMBOL_GPL(nft_set_elem_destroy); * path via nft_setelem_data_deactivate(). */ void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem) + const struct nft_set *set, + const struct nft_elem_priv *elem_priv) { - struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext)); - kfree(elem); + kfree(elem_priv); } int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 5c5cc01c73c5..b18a79039125 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -44,33 +44,34 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv, return 0; } -static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, - struct nft_regs *regs) +static struct nft_elem_priv *nft_dynset_new(struct nft_set *set, + const struct nft_expr *expr, + struct nft_regs *regs) { const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set_ext *ext; + void *elem_priv; u64 timeout; - void *elem; if (!atomic_add_unless(&set->nelems, 1, set->size)) return NULL; timeout = priv->timeout ? : set->timeout; - elem = nft_set_elem_init(set, &priv->tmpl, - ®s->data[priv->sreg_key], NULL, - ®s->data[priv->sreg_data], - timeout, 0, GFP_ATOMIC); - if (IS_ERR(elem)) + elem_priv = nft_set_elem_init(set, &priv->tmpl, + ®s->data[priv->sreg_key], NULL, + ®s->data[priv->sreg_data], + timeout, 0, GFP_ATOMIC); + if (IS_ERR(elem_priv)) goto err1; - ext = nft_set_elem_ext(set, elem); + ext = nft_set_elem_ext(set, elem_priv); if (priv->num_exprs && nft_dynset_expr_setup(priv, ext) < 0) goto err2; - return elem; + return elem_priv; err2: - nft_set_elem_destroy(set, elem, false); + nft_set_elem_destroy(set, elem_priv, false); err1: if (set->size) atomic_dec(&set->nelems); diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 2ee6e3672b41..a320e7614aaa 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -13,6 +13,7 @@ #include struct nft_bitmap_elem { + struct nft_elem_priv priv; struct list_head head; struct nft_set_ext ext; }; @@ -104,8 +105,9 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this, return NULL; } -static void *nft_bitmap_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_bitmap_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { const struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); @@ -116,7 +118,7 @@ static void *nft_bitmap_get(const struct net *net, const struct nft_set *set, !nft_set_elem_active(&be->ext, genmask)) continue; - return be; + return &be->priv; } return ERR_PTR(-ENOENT); } @@ -125,8 +127,8 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) { + struct nft_bitmap_elem *new = nft_elem_priv_cast(elem->priv), *be; struct nft_bitmap *priv = nft_set_priv(set); - struct nft_bitmap_elem *new = elem->priv, *be; u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -148,8 +150,8 @@ static void nft_bitmap_remove(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { + struct nft_bitmap_elem *be = nft_elem_priv_cast(elem->priv); struct nft_bitmap *priv = nft_set_priv(set); - struct nft_bitmap_elem *be = elem->priv; u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -163,8 +165,8 @@ static void nft_bitmap_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { + struct nft_bitmap_elem *be = nft_elem_priv_cast(elem->priv); struct nft_bitmap *priv = nft_set_priv(set); - struct nft_bitmap_elem *be = elem->priv; u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -175,11 +177,12 @@ static void nft_bitmap_activate(const struct net *net, } static void nft_bitmap_flush(const struct net *net, - const struct nft_set *set, void *_be) + const struct nft_set *set, + struct nft_elem_priv *elem_priv) { + struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv); struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); - struct nft_bitmap_elem *be = _be; u32 idx, off; nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); @@ -188,12 +191,12 @@ static void nft_bitmap_flush(const struct net *net, nft_set_elem_change_active(net, set, &be->ext); } -static void *nft_bitmap_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_bitmap_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { + struct nft_bitmap_elem *this = nft_elem_priv_cast(elem->priv), *be; struct nft_bitmap *priv = nft_set_priv(set); - struct nft_bitmap_elem *this = elem->priv, *be; u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -207,7 +210,7 @@ static void *nft_bitmap_deactivate(const struct net *net, priv->bitmap[idx] &= ~(genmask << off); nft_set_elem_change_active(net, set, &be->ext); - return be; + return &be->priv; } static void nft_bitmap_walk(const struct nft_ctx *ctx, @@ -224,7 +227,7 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, if (!nft_set_elem_active(&be->ext, iter->genmask)) goto cont; - elem.priv = be; + elem.priv = &be->priv; iter->err = iter->fn(ctx, set, iter, &elem); @@ -263,6 +266,8 @@ static int nft_bitmap_init(const struct nft_set *set, { struct nft_bitmap *priv = nft_set_priv(set); + BUILD_BUG_ON(offsetof(struct nft_bitmap_elem, priv) != 0); + INIT_LIST_HEAD(&priv->list); priv->bitmap_size = nft_bitmap_size(set->klen); @@ -276,7 +281,7 @@ static void nft_bitmap_destroy(const struct nft_ctx *ctx, struct nft_bitmap_elem *be, *n; list_for_each_entry_safe(be, n, &priv->list, head) - nf_tables_set_elem_destroy(ctx, set, be); + nf_tables_set_elem_destroy(ctx, set, &be->priv); } static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index e758b887ad86..0691565caa81 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -27,6 +27,7 @@ struct nft_rhash { }; struct nft_rhash_elem { + struct nft_elem_priv priv; struct rhash_head node; struct nft_set_ext ext; }; @@ -95,8 +96,9 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, return !!he; } -static void *nft_rhash_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_rhash_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he; @@ -108,13 +110,14 @@ static void *nft_rhash_get(const struct net *net, const struct nft_set *set, he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); if (he != NULL) - return he; + return &he->priv; return ERR_PTR(-ENOENT); } static bool nft_rhash_update(struct nft_set *set, const u32 *key, - void *(*new)(struct nft_set *, + struct nft_elem_priv * + (*new)(struct nft_set *, const struct nft_expr *, struct nft_regs *regs), const struct nft_expr *expr, @@ -123,6 +126,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he, *prev; + struct nft_elem_priv *elem_priv; struct nft_rhash_cmp_arg arg = { .genmask = NFT_GENMASK_ANY, .set = set, @@ -133,10 +137,11 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, if (he != NULL) goto out; - he = new(set, expr, regs); - if (he == NULL) + elem_priv = new(set, expr, regs); + if (!elem_priv) goto err1; + he = nft_elem_priv_cast(elem_priv); prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, nft_rhash_params); if (IS_ERR(prev)) @@ -144,7 +149,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, /* Another cpu may race to insert the element with the same key */ if (prev) { - nft_set_elem_destroy(set, he, true); + nft_set_elem_destroy(set, &he->priv, true); atomic_dec(&set->nelems); he = prev; } @@ -154,7 +159,7 @@ out: return true; err2: - nft_set_elem_destroy(set, he, true); + nft_set_elem_destroy(set, &he->priv, true); atomic_dec(&set->nelems); err1: return false; @@ -164,8 +169,8 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) { + struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv); struct nft_rhash *priv = nft_set_priv(set); - struct nft_rhash_elem *he = elem->priv; struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_next(net), .set = set, @@ -187,22 +192,23 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set, static void nft_rhash_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_rhash_elem *he = elem->priv; + struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv); nft_set_elem_change_active(net, set, &he->ext); } static void nft_rhash_flush(const struct net *net, - const struct nft_set *set, void *priv) + const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_rhash_elem *he = priv; + struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &he->ext); } -static void *nft_rhash_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_rhash_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he; @@ -219,15 +225,15 @@ static void *nft_rhash_deactivate(const struct net *net, rcu_read_unlock(); - return he; + return &he->priv; } static void nft_rhash_remove(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { + struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv); struct nft_rhash *priv = nft_set_priv(set); - struct nft_rhash_elem *he = elem->priv; rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); } @@ -278,7 +284,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; - elem.priv = he; + elem.priv = &he->priv; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) @@ -404,6 +410,8 @@ static int nft_rhash_init(const struct nft_set *set, struct rhashtable_params params = nft_rhash_params; int err; + BUILD_BUG_ON(offsetof(struct nft_rhash_elem, priv) != 0); + params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT; params.key_len = set->klen; @@ -426,8 +434,9 @@ struct nft_rhash_ctx { static void nft_rhash_elem_destroy(void *ptr, void *arg) { struct nft_rhash_ctx *rhash_ctx = arg; + struct nft_rhash_elem *he = ptr; - nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr); + nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, &he->priv); } static void nft_rhash_destroy(const struct nft_ctx *ctx, @@ -474,6 +483,7 @@ struct nft_hash { }; struct nft_hash_elem { + struct nft_elem_priv priv; struct hlist_node node; struct nft_set_ext ext; }; @@ -499,8 +509,9 @@ bool nft_hash_lookup(const struct net *net, const struct nft_set *set, return false; } -static void *nft_hash_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_hash_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); @@ -512,7 +523,7 @@ static void *nft_hash_get(const struct net *net, const struct nft_set *set, hlist_for_each_entry_rcu(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) && nft_set_elem_active(&he->ext, genmask)) - return he; + return &he->priv; } return ERR_PTR(-ENOENT); } @@ -562,7 +573,7 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) { - struct nft_hash_elem *this = elem->priv, *he; + struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he; struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); u32 hash; @@ -583,25 +594,26 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, static void nft_hash_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_hash_elem *he = elem->priv; + struct nft_hash_elem *he = nft_elem_priv_cast(elem->priv); nft_set_elem_change_active(net, set, &he->ext); } static void nft_hash_flush(const struct net *net, - const struct nft_set *set, void *priv) + const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_hash_elem *he = priv; + struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &he->ext); } -static void *nft_hash_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_hash_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { + struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he; struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *this = elem->priv, *he; u8 genmask = nft_genmask_next(net); u32 hash; @@ -611,7 +623,7 @@ static void *nft_hash_deactivate(const struct net *net, set->klen) && nft_set_elem_active(&he->ext, genmask)) { nft_set_elem_change_active(net, set, &he->ext); - return he; + return &he->priv; } } return NULL; @@ -621,7 +633,7 @@ static void nft_hash_remove(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_hash_elem *he = elem->priv; + struct nft_hash_elem *he = nft_elem_priv_cast(elem->priv); hlist_del_rcu(&he->node); } @@ -641,7 +653,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; - elem.priv = he; + elem.priv = &he->priv; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) @@ -682,7 +694,7 @@ static void nft_hash_destroy(const struct nft_ctx *ctx, for (i = 0; i < priv->buckets; i++) { hlist_for_each_entry_safe(he, next, &priv->table[i], node) { hlist_del_rcu(&he->node); - nf_tables_set_elem_destroy(ctx, set, he); + nf_tables_set_elem_destroy(ctx, set, &he->priv); } } } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index dba073aa9ad6..0969d2cb637b 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -599,11 +599,18 @@ out: * @elem: nftables API element representation containing key data * @flags: Unused */ -static void *nft_pipapo_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_pipapo_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { - return pipapo_get(net, set, (const u8 *)elem->key.val.data, - nft_genmask_cur(net)); + static struct nft_pipapo_elem *e; + + e = pipapo_get(net, set, (const u8 *)elem->key.val.data, + nft_genmask_cur(net)); + if (IS_ERR(e)) + return ERR_CAST(e); + + return &e->priv; } /** @@ -1162,10 +1169,10 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const u8 *start = (const u8 *)elem->key.val.data, *end; - struct nft_pipapo_elem *e = elem->priv, *dup; struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; u8 genmask = nft_genmask_next(net); + struct nft_pipapo_elem *e, *dup; struct nft_pipapo_field *f; const u8 *start_p, *end_p; int i, bsize_max, err = 0; @@ -1263,6 +1270,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, put_cpu_ptr(m->scratch); } + e = nft_elem_priv_cast(elem->priv); *ext2 = &e->ext; pipapo_map(m, rulemap, e); @@ -1541,7 +1549,7 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, { struct nft_set_elem elem = { - .priv = e, + .priv = &e->priv, }; nft_setelem_data_deactivate(net, set, &elem); @@ -1742,7 +1750,7 @@ static void nft_pipapo_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_pipapo_elem *e = elem->priv; + struct nft_pipapo_elem *e = nft_elem_priv_cast(elem->priv); nft_set_elem_change_active(net, set, &e->ext); } @@ -1782,9 +1790,9 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set, * * Return: deactivated element if found, NULL otherwise. */ -static void *nft_pipapo_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_pipapo_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); @@ -1810,9 +1818,9 @@ static void *nft_pipapo_deactivate(const struct net *net, * Return: true if element was found and deactivated. */ static void nft_pipapo_flush(const struct net *net, const struct nft_set *set, - void *elem) + struct nft_elem_priv *elem_priv) { - struct nft_pipapo_elem *e = elem; + struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &e->ext); } @@ -1949,10 +1957,11 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; - struct nft_pipapo_elem *e = elem->priv; int rules_f0, first_rule = 0; + struct nft_pipapo_elem *e; const u8 *data; + e = nft_elem_priv_cast(elem->priv); data = (const u8 *)nft_set_ext_key(&e->ext); while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { @@ -2039,7 +2048,7 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, e = f->mt[r].e; - elem.priv = e; + elem.priv = &e->priv; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) @@ -2113,6 +2122,8 @@ static int nft_pipapo_init(const struct nft_set *set, struct nft_pipapo_field *f; int err, i, field_count; + BUILD_BUG_ON(offsetof(struct nft_pipapo_elem, priv) != 0); + field_count = desc->field_count ? : 1; if (field_count > NFT_PIPAPO_MAX_FIELDS) @@ -2207,7 +2218,7 @@ static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx, e = f->mt[r].e; - nf_tables_set_elem_destroy(ctx, set, e); + nf_tables_set_elem_destroy(ctx, set, &e->priv); } } diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 2e164a319945..1040223da5fa 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -170,10 +170,12 @@ struct nft_pipapo_elem; /** * struct nft_pipapo_elem - API-facing representation of single set element + * @priv: element placeholder * @ext: nftables API extensions */ struct nft_pipapo_elem { - struct nft_set_ext ext; + struct nft_elem_priv priv; + struct nft_set_ext ext; }; int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 60ff591eb265..475f22568342 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -23,6 +23,7 @@ struct nft_rbtree { }; struct nft_rbtree_elem { + struct nft_elem_priv priv; struct rb_node node; struct nft_set_ext ext; }; @@ -196,8 +197,9 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, return false; } -static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_rbtree_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { struct nft_rbtree *priv = nft_set_priv(set); unsigned int seq = read_seqcount_begin(&priv->count); @@ -208,16 +210,17 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); if (ret || !read_seqcount_retry(&priv->count, seq)) - return rbe; + return &rbe->priv; read_lock_bh(&priv->lock); seq = read_seqcount_begin(&priv->count); ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); - if (!ret) - rbe = ERR_PTR(-ENOENT); read_unlock_bh(&priv->lock); - return rbe; + if (!ret) + return ERR_PTR(-ENOENT); + + return &rbe->priv; } static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set, @@ -225,7 +228,7 @@ static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set, struct nft_rbtree_elem *rbe) { struct nft_set_elem elem = { - .priv = rbe, + .priv = &rbe->priv, }; lockdep_assert_held_write(&priv->lock); @@ -487,8 +490,8 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) { + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); struct nft_rbtree *priv = nft_set_priv(set); - struct nft_rbtree_elem *rbe = elem->priv; int err; do { @@ -520,8 +523,8 @@ static void nft_rbtree_remove(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); struct nft_rbtree *priv = nft_set_priv(set); - struct nft_rbtree_elem *rbe = elem->priv; nft_rbtree_erase(priv, rbe); } @@ -530,26 +533,27 @@ static void nft_rbtree_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_rbtree_elem *rbe = elem->priv; + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); nft_set_elem_change_active(net, set, &rbe->ext); } static void nft_rbtree_flush(const struct net *net, - const struct nft_set *set, void *priv) + const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_rbtree_elem *rbe = priv; + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &rbe->ext); } -static void *nft_rbtree_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { + struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv); const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; - struct nft_rbtree_elem *rbe, *this = elem->priv; u8 genmask = nft_genmask_next(net); int d; @@ -577,8 +581,8 @@ static void *nft_rbtree_deactivate(const struct net *net, parent = parent->rb_left; continue; } - nft_rbtree_flush(net, set, rbe); - return rbe; + nft_rbtree_flush(net, set, &rbe->priv); + return &rbe->priv; } } return NULL; @@ -602,7 +606,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; - elem.priv = rbe; + elem.priv = &rbe->priv; iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) { @@ -702,6 +706,8 @@ static int nft_rbtree_init(const struct nft_set *set, { struct nft_rbtree *priv = nft_set_priv(set); + BUILD_BUG_ON(offsetof(struct nft_rbtree_elem, priv) != 0); + rwlock_init(&priv->lock); seqcount_rwlock_init(&priv->count, &priv->lock); priv->root = RB_ROOT; @@ -719,7 +725,7 @@ static void nft_rbtree_destroy(const struct nft_ctx *ctx, while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nf_tables_set_elem_destroy(ctx, set, rbe); + nf_tables_set_elem_destroy(ctx, set, &rbe->priv); } } -- cgit v1.2.3 From 0e1ea651c9717ddcd8e0648d8468477a31867b0a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 16 Oct 2023 14:29:27 +0200 Subject: netfilter: nf_tables: shrink memory consumption of set elements Instead of copying struct nft_set_elem into struct nft_trans_elem, store the pointer to the opaque set element object in the transaction. Adapt set backend API (and set backend implementations) to take the pointer to opaque set element representation whenever required. This patch deconstifies .remove() and .activate() set backend API since these modify the set element opaque object. And it also constify nft_set_elem_ext() this provides access to the nft_set_ext struct without updating the object. According to pahole on x86_64, this patch shrinks struct nft_trans_elem size from 216 to 24 bytes. This patch also reduces stack memory consumption by removing the template struct nft_set_elem object, using the opaque set element object instead such as from the set iterator API, catchall elements and the get element command. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 18 ++--- net/netfilter/nf_tables_api.c | 166 +++++++++++++++++--------------------- net/netfilter/nft_set_bitmap.c | 16 ++-- net/netfilter/nft_set_hash.c | 26 +++--- net/netfilter/nft_set_pipapo.c | 25 +++--- net/netfilter/nft_set_rbtree.c | 25 ++---- 6 files changed, 116 insertions(+), 160 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d287a778be65..b63f35fb2a99 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -314,7 +314,7 @@ struct nft_set_iter { int (*fn)(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); }; /** @@ -454,7 +454,7 @@ struct nft_set_ops { struct nft_set_ext **ext); void (*activate)(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); struct nft_elem_priv * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); @@ -463,7 +463,7 @@ struct nft_set_ops { struct nft_elem_priv *priv); void (*remove)(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter); @@ -1073,7 +1073,7 @@ struct nft_chain { int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); @@ -1650,14 +1650,14 @@ struct nft_trans_table { struct nft_trans_elem { struct nft_set *set; - struct nft_set_elem elem; + struct nft_elem_priv *elem_priv; bool bound; }; #define nft_trans_elem_set(trans) \ (((struct nft_trans_elem *)trans->data)->set) -#define nft_trans_elem(trans) \ - (((struct nft_trans_elem *)trans->data)->elem) +#define nft_trans_elem_priv(trans) \ + (((struct nft_trans_elem *)trans->data)->elem_priv) #define nft_trans_elem_set_bound(trans) \ (((struct nft_trans_elem *)trans->data)->bound) @@ -1698,7 +1698,7 @@ struct nft_trans_gc { struct nft_set *set; u32 seq; u16 count; - void *priv[NFT_TRANS_GC_BATCHCOUNT]; + struct nft_elem_priv *priv[NFT_TRANS_GC_BATCHCOUNT]; struct rcu_head rcu; }; @@ -1721,7 +1721,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc); void nft_setelem_data_deactivate(const struct net *net, const struct nft_set *set, - struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2ae81452113a..79b0ed92367a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -591,9 +591,9 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, static int nft_mapelem_deactivate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - nft_setelem_data_deactivate(ctx->net, set, elem); + nft_setelem_data_deactivate(ctx->net, set, elem_priv); return 0; } @@ -609,7 +609,6 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; - struct nft_set_elem elem; struct nft_set_ext *ext; list_for_each_entry(catchall, &set->catchall_list, list) { @@ -617,8 +616,7 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, if (!nft_set_elem_active(ext, genmask)) continue; - elem.priv = catchall->elem; - nft_setelem_data_deactivate(ctx->net, set, &elem); + nft_setelem_data_deactivate(ctx->net, set, catchall->elem); break; } } @@ -3807,9 +3805,9 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_ctx *pctx = (struct nft_ctx *)ctx; const struct nft_data *data; int err; @@ -3839,7 +3837,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; - struct nft_set_elem elem; struct nft_set_ext *ext; int ret = 0; @@ -3848,8 +3845,7 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) if (!nft_set_elem_active(ext, genmask)) continue; - elem.priv = catchall->elem; - ret = nft_setelem_validate(ctx, set, NULL, &elem); + ret = nft_setelem_validate(ctx, set, NULL, catchall->elem); if (ret < 0) return ret; } @@ -5301,9 +5297,9 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, static int nft_setelem_data_validate(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); @@ -5316,9 +5312,9 @@ static int nft_setelem_data_validate(const struct nft_ctx *ctx, static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - return nft_setelem_data_validate(ctx, set, elem); + return nft_setelem_data_validate(ctx, set, elem_priv); } static int nft_set_catchall_bind_check(const struct nft_ctx *ctx, @@ -5326,7 +5322,6 @@ static int nft_set_catchall_bind_check(const struct nft_ctx *ctx, { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; - struct nft_set_elem elem; struct nft_set_ext *ext; int ret = 0; @@ -5335,8 +5330,7 @@ static int nft_set_catchall_bind_check(const struct nft_ctx *ctx, if (!nft_set_elem_active(ext, genmask)) continue; - elem.priv = catchall->elem; - ret = nft_setelem_data_validate(ctx, set, &elem); + ret = nft_setelem_data_validate(ctx, set, catchall->elem); if (ret < 0) break; } @@ -5403,14 +5397,14 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, static void nft_setelem_data_activate(const struct net *net, const struct nft_set *set, - struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); static int nft_mapelem_activate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - nft_setelem_data_activate(ctx->net, set, elem); + nft_setelem_data_activate(ctx->net, set, elem_priv); return 0; } @@ -5420,7 +5414,6 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx, { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; - struct nft_set_elem elem; struct nft_set_ext *ext; list_for_each_entry(catchall, &set->catchall_list, list) { @@ -5428,8 +5421,7 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx, if (!nft_set_elem_active(ext, genmask)) continue; - elem.priv = catchall->elem; - nft_setelem_data_activate(ctx->net, set, &elem); + nft_setelem_data_activate(ctx->net, set, catchall->elem); break; } } @@ -5608,10 +5600,10 @@ nla_put_failure: static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set *set, - const struct nft_set_elem *elem, + const struct nft_elem_priv *elem_priv, bool reset) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -5697,16 +5689,16 @@ struct nft_set_dump_args { static int nf_tables_dump_setelem(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_set_dump_args *args; if (nft_set_elem_expired(ext)) return 0; args = container_of(iter, struct nft_set_dump_args, iter); - return nf_tables_fill_setelem(args->skb, set, elem, args->reset); + return nf_tables_fill_setelem(args->skb, set, elem_priv, args->reset); } static void audit_log_nft_set_reset(const struct nft_table *table, @@ -5731,7 +5723,6 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, { struct nft_set_elem_catchall *catchall; u8 genmask = nft_genmask_cur(net); - struct nft_set_elem elem; struct nft_set_ext *ext; int ret = 0; @@ -5741,8 +5732,7 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, nft_set_elem_expired(ext)) continue; - elem.priv = catchall->elem; - ret = nf_tables_fill_setelem(skb, set, &elem, reset); + ret = nf_tables_fill_setelem(skb, set, catchall->elem, reset); if (reset && !ret) audit_log_nft_set_reset(set->table, base_seq, 1); break; @@ -5867,7 +5857,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, const struct nft_ctx *ctx, u32 seq, u32 portid, int event, u16 flags, const struct nft_set *set, - const struct nft_set_elem *elem, + const struct nft_elem_priv *elem_priv, bool reset) { struct nlmsghdr *nlh; @@ -5889,7 +5879,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, if (nest == NULL) goto nla_put_failure; - err = nf_tables_fill_setelem(skb, set, elem, reset); + err = nf_tables_fill_setelem(skb, set, elem_priv, reset); if (err < 0) goto nla_put_failure; @@ -6039,7 +6029,7 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid, - NFT_MSG_NEWSETELEM, 0, set, &elem, + NFT_MSG_NEWSETELEM, 0, set, elem.priv, reset); if (err < 0) goto err_fill_setelem; @@ -6122,7 +6112,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb, static void nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set *set, - const struct nft_set_elem *elem, + const struct nft_elem_priv *elem_priv, int event) { struct nftables_pernet *nft_net; @@ -6143,7 +6133,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags, - set, elem, false); + set, elem_priv, false); if (err < 0) { kfree_skb(skb); goto err; @@ -6456,9 +6446,9 @@ static int nft_setelem_insert(const struct net *net, } static bool nft_setelem_is_catchall(const struct nft_set *set, - const struct nft_set_elem *elem) + const struct nft_elem_priv *elem_priv) { - struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && *nft_set_ext_flags(ext) & NFT_SET_ELEM_CATCHALL) @@ -6468,14 +6458,14 @@ static bool nft_setelem_is_catchall(const struct nft_set *set, } static void nft_setelem_activate(struct net *net, struct nft_set *set, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); - if (nft_setelem_is_catchall(set, elem)) { + if (nft_setelem_is_catchall(set, elem_priv)) { nft_set_elem_change_active(net, set, ext); } else { - set->ops->activate(net, set, elem); + set->ops->activate(net, set, elem_priv); } } @@ -6533,12 +6523,12 @@ static int nft_setelem_deactivate(const struct net *net, static void nft_setelem_catchall_remove(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { struct nft_set_elem_catchall *catchall, *next; list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { - if (catchall->elem == elem->priv) { + if (catchall->elem == elem_priv) { list_del_rcu(&catchall->list); kfree_rcu(catchall, rcu); break; @@ -6548,12 +6538,12 @@ static void nft_setelem_catchall_remove(const struct net *net, static void nft_setelem_remove(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - if (nft_setelem_is_catchall(set, elem)) - nft_setelem_catchall_remove(net, set, elem); + if (nft_setelem_is_catchall(set, elem_priv)) + nft_setelem_catchall_remove(net, set, elem_priv); else - set->ops->remove(net, set, elem); + set->ops->remove(net, set, elem_priv); } static bool nft_setelem_valid_key_end(const struct nft_set *set, @@ -6921,12 +6911,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } } - nft_trans_elem(trans) = elem; + nft_trans_elem_priv(trans) = elem.priv; nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err_set_full: - nft_setelem_remove(ctx->net, set, &elem); + nft_setelem_remove(ctx->net, set, elem.priv); err_element_clash: kfree(trans); err_elem_free: @@ -7027,9 +7017,9 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) static void nft_setelem_data_activate(const struct net *net, const struct nft_set *set, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_hold(nft_set_ext_data(ext), set->dtype); @@ -7039,9 +7029,9 @@ static void nft_setelem_data_activate(const struct net *net, void nft_setelem_data_deactivate(const struct net *net, const struct nft_set *set, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); @@ -7126,9 +7116,9 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto fail_ops; - nft_setelem_data_deactivate(ctx->net, set, &elem); + nft_setelem_data_deactivate(ctx->net, set, elem.priv); - nft_trans_elem(trans) = elem; + nft_trans_elem_priv(trans) = elem.priv; nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -7146,7 +7136,7 @@ fail_elem: static int nft_setelem_flush(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { struct nft_trans *trans; @@ -7155,12 +7145,12 @@ static int nft_setelem_flush(const struct nft_ctx *ctx, if (!trans) return -ENOMEM; - set->ops->flush(ctx->net, set, elem->priv); + set->ops->flush(ctx->net, set, elem_priv); set->ndeact++; - nft_setelem_data_deactivate(ctx->net, set, elem); + nft_setelem_data_deactivate(ctx->net, set, elem_priv); nft_trans_elem_set(trans) = set; - nft_trans_elem(trans) = *elem; + nft_trans_elem_priv(trans) = elem_priv; nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -7168,7 +7158,7 @@ static int nft_setelem_flush(const struct nft_ctx *ctx, static int __nft_set_catchall_flush(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { struct nft_trans *trans; @@ -7177,9 +7167,9 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx, if (!trans) return -ENOMEM; - nft_setelem_data_deactivate(ctx->net, set, elem); + nft_setelem_data_deactivate(ctx->net, set, elem_priv); nft_trans_elem_set(trans) = set; - nft_trans_elem(trans) = *elem; + nft_trans_elem_priv(trans) = elem_priv; nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -7190,7 +7180,6 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx, { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; - struct nft_set_elem elem; struct nft_set_ext *ext; int ret = 0; @@ -7199,8 +7188,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx, if (!nft_set_elem_active(ext, genmask)) continue; - elem.priv = catchall->elem; - ret = __nft_set_catchall_flush(ctx, set, &elem); + ret = __nft_set_catchall_flush(ctx, set, catchall->elem); if (ret < 0) break; nft_set_elem_change_active(ctx->net, set, ext); @@ -9278,7 +9266,7 @@ static void nft_commit_release(struct nft_trans *trans) case NFT_MSG_DESTROYSETELEM: nf_tables_set_elem_destroy(&trans->ctx, nft_trans_elem_set(trans), - nft_trans_elem(trans).priv); + nft_trans_elem_priv(trans)); break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: @@ -9507,16 +9495,12 @@ void nft_chain_del(struct nft_chain *chain) static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx, struct nft_trans_gc *trans) { - void **priv = trans->priv; + struct nft_elem_priv **priv = trans->priv; unsigned int i; for (i = 0; i < trans->count; i++) { - struct nft_set_elem elem = { - .priv = priv[i], - }; - - nft_setelem_data_deactivate(ctx->net, trans->set, &elem); - nft_setelem_remove(ctx->net, trans->set, &elem); + nft_setelem_data_deactivate(ctx->net, trans->set, priv[i]); + nft_setelem_remove(ctx->net, trans->set, priv[i]); } } @@ -9529,7 +9513,7 @@ void nft_trans_gc_destroy(struct nft_trans_gc *trans) static void nft_trans_gc_trans_free(struct rcu_head *rcu) { - struct nft_set_elem elem = {}; + struct nft_elem_priv *elem_priv; struct nft_trans_gc *trans; struct nft_ctx ctx = {}; unsigned int i; @@ -9538,11 +9522,11 @@ static void nft_trans_gc_trans_free(struct rcu_head *rcu) ctx.net = read_pnet(&trans->set->net); for (i = 0; i < trans->count; i++) { - elem.priv = trans->priv[i]; - if (!nft_setelem_is_catchall(trans->set, &elem)) + elem_priv = trans->priv[i]; + if (!nft_setelem_is_catchall(trans->set, elem_priv)) atomic_dec(&trans->set->nelems); - nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv); + nf_tables_set_elem_destroy(&ctx, trans->set, elem_priv); } nft_trans_gc_destroy(trans); @@ -10110,9 +10094,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWSETELEM: te = (struct nft_trans_elem *)trans->data; - nft_setelem_activate(net, te->set, &te->elem); + nft_setelem_activate(net, te->set, te->elem_priv); nf_tables_setelem_notify(&trans->ctx, te->set, - &te->elem, + te->elem_priv, NFT_MSG_NEWSETELEM); if (te->set->ops->commit && list_empty(&te->set->pending_update)) { @@ -10126,10 +10110,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) te = (struct nft_trans_elem *)trans->data; nf_tables_setelem_notify(&trans->ctx, te->set, - &te->elem, + te->elem_priv, trans->msg_type); - nft_setelem_remove(net, te->set, &te->elem); - if (!nft_setelem_is_catchall(te->set, &te->elem)) { + nft_setelem_remove(net, te->set, te->elem_priv); + if (!nft_setelem_is_catchall(te->set, te->elem_priv)) { atomic_dec(&te->set->nelems); te->set->ndeact--; } @@ -10249,7 +10233,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv, true); + nft_trans_elem_priv(trans), true); break; case NFT_MSG_NEWOBJ: nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); @@ -10396,8 +10380,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; } te = (struct nft_trans_elem *)trans->data; - nft_setelem_remove(net, te->set, &te->elem); - if (!nft_setelem_is_catchall(te->set, &te->elem)) + nft_setelem_remove(net, te->set, te->elem_priv); + if (!nft_setelem_is_catchall(te->set, te->elem_priv)) atomic_dec(&te->set->nelems); if (te->set->ops->abort && @@ -10410,9 +10394,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DESTROYSETELEM: te = (struct nft_trans_elem *)trans->data; - nft_setelem_data_activate(net, te->set, &te->elem); - nft_setelem_activate(net, te->set, &te->elem); - if (!nft_setelem_is_catchall(te->set, &te->elem)) + nft_setelem_data_activate(net, te->set, te->elem_priv); + nft_setelem_activate(net, te->set, te->elem_priv); + if (!nft_setelem_is_catchall(te->set, te->elem_priv)) te->set->ndeact--; if (te->set->ops->abort && @@ -10588,9 +10572,9 @@ static int nft_check_loops(const struct nft_ctx *ctx, static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index a320e7614aaa..963edb514641 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -146,11 +146,10 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, return 0; } -static void nft_bitmap_remove(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_bitmap_remove(const struct net *net, const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_bitmap_elem *be = nft_elem_priv_cast(elem->priv); + struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv); struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -163,9 +162,9 @@ static void nft_bitmap_remove(const struct net *net, static void nft_bitmap_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_bitmap_elem *be = nft_elem_priv_cast(elem->priv); + struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv); struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -219,7 +218,6 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, { const struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be; - struct nft_set_elem elem; list_for_each_entry_rcu(be, &priv->list, head) { if (iter->count < iter->skip) @@ -227,9 +225,7 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, if (!nft_set_elem_active(&be->ext, iter->genmask)) goto cont; - elem.priv = &be->priv; - - iter->err = iter->fn(ctx, set, iter, &elem); + iter->err = iter->fn(ctx, set, iter, &be->priv); if (iter->err < 0) return; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 0691565caa81..e6c00891e334 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -190,9 +190,9 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set, } static void nft_rhash_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv); + struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &he->ext); } @@ -230,9 +230,9 @@ nft_rhash_deactivate(const struct net *net, const struct nft_set *set, static void nft_rhash_remove(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv); + struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); struct nft_rhash *priv = nft_set_priv(set); rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); @@ -264,7 +264,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he; struct rhashtable_iter hti; - struct nft_set_elem elem; rhashtable_walk_enter(&priv->ht, &hti); rhashtable_walk_start(&hti); @@ -284,9 +283,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; - elem.priv = &he->priv; - - iter->err = iter->fn(ctx, set, iter, &elem); + iter->err = iter->fn(ctx, set, iter, &he->priv); if (iter->err < 0) break; @@ -592,9 +589,9 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, } static void nft_hash_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_hash_elem *he = nft_elem_priv_cast(elem->priv); + struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &he->ext); } @@ -631,9 +628,9 @@ nft_hash_deactivate(const struct net *net, const struct nft_set *set, static void nft_hash_remove(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_hash_elem *he = nft_elem_priv_cast(elem->priv); + struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); hlist_del_rcu(&he->node); } @@ -643,7 +640,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; - struct nft_set_elem elem; int i; for (i = 0; i < priv->buckets; i++) { @@ -653,9 +649,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; - elem.priv = &he->priv; - - iter->err = iter->fn(ctx, set, iter, &elem); + iter->err = iter->fn(ctx, set, iter, &he->priv); if (iter->err < 0) return; cont: diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 0969d2cb637b..f540c2be0caa 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1548,11 +1548,7 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, struct nft_pipapo_elem *e) { - struct nft_set_elem elem = { - .priv = &e->priv, - }; - - nft_setelem_data_deactivate(net, set, &elem); + nft_setelem_data_deactivate(net, set, &e->priv); } /** @@ -1739,7 +1735,7 @@ static void nft_pipapo_abort(const struct nft_set *set) * nft_pipapo_activate() - Mark element reference as active given key, commit * @net: Network namespace * @set: nftables API set representation - * @elem: nftables API element representation containing key data + * @elem_priv: nftables API element representation containing key data * * On insertion, elements are added to a copy of the matching data currently * in use for lookups, and not directly inserted into current lookup data. Both @@ -1748,9 +1744,9 @@ static void nft_pipapo_abort(const struct nft_set *set) */ static void nft_pipapo_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_pipapo_elem *e = nft_elem_priv_cast(elem->priv); + struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &e->ext); } @@ -1803,7 +1799,7 @@ nft_pipapo_deactivate(const struct net *net, const struct nft_set *set, * nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive * @net: Network namespace * @set: nftables API set representation - * @elem: nftables API element representation containing key data + * @elem_priv: nftables API element representation containing key data * * This is functionally the same as nft_pipapo_deactivate(), with a slightly * different interface, and it's also called once for each element in a set @@ -1945,7 +1941,7 @@ static bool pipapo_match_field(struct nft_pipapo_field *f, * nft_pipapo_remove() - Remove element given key, commit * @net: Network namespace * @set: nftables API set representation - * @elem: nftables API element representation containing key data + * @elem_priv: nftables API element representation containing key data * * Similarly to nft_pipapo_activate(), this is used as commit operation by the * API, but it's called once per element in the pending transaction, so we can't @@ -1953,7 +1949,7 @@ static bool pipapo_match_field(struct nft_pipapo_field *f, * the matched element here, if any, and commit the updated matching data. */ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; @@ -1961,7 +1957,7 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, struct nft_pipapo_elem *e; const u8 *data; - e = nft_elem_priv_cast(elem->priv); + e = nft_elem_priv_cast(elem_priv); data = (const u8 *)nft_set_ext_key(&e->ext); while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { @@ -2038,7 +2034,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, for (r = 0; r < f->rules; r++) { struct nft_pipapo_elem *e; - struct nft_set_elem elem; if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e) continue; @@ -2048,9 +2043,7 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, e = f->mt[r].e; - elem.priv = &e->priv; - - iter->err = iter->fn(ctx, set, iter, &elem); + iter->err = iter->fn(ctx, set, iter, &e->priv); if (iter->err < 0) goto out; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 475f22568342..25baa9cdb77d 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -227,12 +227,8 @@ static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set, struct nft_rbtree *priv, struct nft_rbtree_elem *rbe) { - struct nft_set_elem elem = { - .priv = &rbe->priv, - }; - lockdep_assert_held_write(&priv->lock); - nft_setelem_data_deactivate(net, set, &elem); + nft_setelem_data_deactivate(net, set, &rbe->priv); rb_erase(&rbe->node, &priv->root); } @@ -521,9 +517,9 @@ static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rb static void nft_rbtree_remove(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); struct nft_rbtree *priv = nft_set_priv(set); nft_rbtree_erase(priv, rbe); @@ -531,9 +527,9 @@ static void nft_rbtree_remove(const struct net *net, static void nft_rbtree_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &rbe->ext); } @@ -594,7 +590,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; - struct nft_set_elem elem; struct rb_node *node; read_lock_bh(&priv->lock); @@ -606,9 +601,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; - elem.priv = &rbe->priv; - - iter->err = iter->fn(ctx, set, iter, &elem); + iter->err = iter->fn(ctx, set, iter, &rbe->priv); if (iter->err < 0) { read_unlock_bh(&priv->lock); return; @@ -623,11 +616,7 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, struct nft_rbtree *priv, struct nft_rbtree_elem *rbe) { - struct nft_set_elem elem = { - .priv = rbe, - }; - - nft_setelem_data_deactivate(net, set, &elem); + nft_setelem_data_deactivate(net, set, &rbe->priv); nft_rbtree_erase(priv, rbe); } -- cgit v1.2.3 From 078996fcd657e6e0c3a72b7d5806d04c32e74250 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 Oct 2023 22:23:35 +0200 Subject: netfilter: nf_tables: set->ops->insert returns opaque set element in case of EEXIST Return struct nft_elem_priv instead of struct nft_set_ext for consistency with ("netfilter: nf_tables: expose opaque set element as struct nft_elem_priv") and to prepare the introduction of element timeout updates from control path. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 17 ++++++++++------- net/netfilter/nft_set_bitmap.c | 4 ++-- net/netfilter/nft_set_hash.c | 8 ++++---- net/netfilter/nft_set_pipapo.c | 10 +++++----- net/netfilter/nft_set_rbtree.c | 10 +++++----- 6 files changed, 27 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b63f35fb2a99..3bbd13ab1ecf 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -451,7 +451,7 @@ struct nft_set_ops { int (*insert)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext); + struct nft_elem_priv **priv); void (*activate)(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 79b0ed92367a..ed3329fcbe7f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6406,7 +6406,7 @@ EXPORT_SYMBOL_GPL(nft_set_catchall_lookup); static int nft_setelem_catchall_insert(const struct net *net, struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **pext) + struct nft_elem_priv **priv) { struct nft_set_elem_catchall *catchall; u8 genmask = nft_genmask_next(net); @@ -6415,7 +6415,7 @@ static int nft_setelem_catchall_insert(const struct net *net, list_for_each_entry(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (nft_set_elem_active(ext, genmask)) { - *pext = ext; + *priv = catchall->elem; return -EEXIST; } } @@ -6433,14 +6433,15 @@ static int nft_setelem_catchall_insert(const struct net *net, static int nft_setelem_insert(const struct net *net, struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext, unsigned int flags) + struct nft_elem_priv **elem_priv, + unsigned int flags) { int ret; if (flags & NFT_SET_ELEM_CATCHALL) - ret = nft_setelem_catchall_insert(net, set, elem, ext); + ret = nft_setelem_catchall_insert(net, set, elem, elem_priv); else - ret = set->ops->insert(net, set, elem, ext); + ret = set->ops->insert(net, set, elem, elem_priv); return ret; } @@ -6576,13 +6577,14 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; struct nft_set_binding *binding; + struct nft_elem_priv *elem_priv; struct nft_object *obj = NULL; struct nft_userdata *udata; struct nft_data_desc desc; enum nft_registers dreg; struct nft_trans *trans; - u64 timeout; u64 expiration; + u64 timeout; int err, i; u8 ulen; @@ -6875,9 +6877,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ext->genmask = nft_genmask_cur(ctx->net); - err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags); + err = nft_setelem_insert(ctx->net, set, &elem, &elem_priv, flags); if (err) { if (err == -EEXIST) { + ext2 = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) || nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^ diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 963edb514641..32df7a16835d 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -125,7 +125,7 @@ nft_bitmap_get(const struct net *net, const struct nft_set *set, static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext) + struct nft_elem_priv **elem_priv) { struct nft_bitmap_elem *new = nft_elem_priv_cast(elem->priv), *be; struct nft_bitmap *priv = nft_set_priv(set); @@ -134,7 +134,7 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, be = nft_bitmap_elem_find(set, new, genmask); if (be) { - *ext = &be->ext; + *elem_priv = &be->priv; return -EEXIST; } diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index e6c00891e334..6c2061bfdae6 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -167,7 +167,7 @@ err1: static int nft_rhash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext) + struct nft_elem_priv **elem_priv) { struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv); struct nft_rhash *priv = nft_set_priv(set); @@ -183,7 +183,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set, if (IS_ERR(prev)) return PTR_ERR(prev); if (prev) { - *ext = &prev->ext; + *elem_priv = &prev->priv; return -EEXIST; } return 0; @@ -568,7 +568,7 @@ static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv, static int nft_hash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext) + struct nft_elem_priv **elem_priv) { struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he; struct nft_hash *priv = nft_set_priv(set); @@ -580,7 +580,7 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, if (!memcmp(nft_set_ext_key(&this->ext), nft_set_ext_key(&he->ext), set->klen) && nft_set_elem_active(&he->ext, genmask)) { - *ext = &he->ext; + *elem_priv = &he->priv; return -EEXIST; } } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index f540c2be0caa..701977af3ee8 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1158,13 +1158,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, * @net: Network namespace * @set: nftables API set representation * @elem: nftables API element representation containing key data - * @ext2: Filled with pointer to &struct nft_set_ext in inserted element + * @elem_priv: Filled with pointer to &struct nft_set_ext in inserted element * * Return: 0 on success, error pointer on failure. */ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext2) + struct nft_elem_priv **elem_priv) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; @@ -1195,7 +1195,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) && !memcmp(end, dup_end->data, sizeof(*dup_end->data))) { - *ext2 = &dup->ext; + *elem_priv = &dup->priv; return -EEXIST; } @@ -1210,7 +1210,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, if (PTR_ERR(dup) != -ENOENT) { if (IS_ERR(dup)) return PTR_ERR(dup); - *ext2 = &dup->ext; + *elem_priv = &dup->priv; return -ENOTEMPTY; } @@ -1271,7 +1271,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, } e = nft_elem_priv_cast(elem->priv); - *ext2 = &e->ext; + *elem_priv = &e->priv; pipapo_map(m, rulemap, e); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 25baa9cdb77d..6f1186abd47b 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -306,7 +306,7 @@ static bool nft_rbtree_update_first(const struct nft_set *set, static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, - struct nft_set_ext **ext) + struct nft_elem_priv **elem_priv) { struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; struct rb_node *node, *next, *parent, **p, *first = NULL; @@ -423,7 +423,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, */ if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { - *ext = &rbe_ge->ext; + *elem_priv = &rbe_ge->priv; return -EEXIST; } @@ -432,7 +432,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, */ if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) && nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) { - *ext = &rbe_le->ext; + *elem_priv = &rbe_le->priv; return -EEXIST; } @@ -484,7 +484,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext) + struct nft_elem_priv **elem_priv) { struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); struct nft_rbtree *priv = nft_set_priv(set); @@ -498,7 +498,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, write_lock_bh(&priv->lock); write_seqcount_begin(&priv->count); - err = __nft_rbtree_insert(net, set, rbe, ext); + err = __nft_rbtree_insert(net, set, rbe, elem_priv); write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); } while (err == -EAGAIN); -- cgit v1.2.3 From 9cdee063476988102bbc5e0e9551e10c5ed00d3e Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 24 Oct 2023 15:10:40 +0200 Subject: netfilter: nf_tables: Carry reset boolean in nft_set_dump_ctx Relieve the dump callback from having to check nlmsg_type upon each call. Prep work for set element reset locking. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ed3329fcbe7f..3c1fd8283bf4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5715,6 +5715,7 @@ static void audit_log_nft_set_reset(const struct nft_table *table, struct nft_set_dump_ctx { const struct nft_set *set; struct nft_ctx ctx; + bool reset; }; static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, @@ -5752,7 +5753,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) bool set_found = false; struct nlmsghdr *nlh; struct nlattr *nest; - bool reset = false; u32 portid, seq; int event; @@ -5800,12 +5800,9 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto nla_put_failure; - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) - reset = true; - args.cb = cb; args.skb = skb; - args.reset = reset; + args.reset = dump_ctx->reset; args.iter.genmask = nft_genmask_cur(net); args.iter.skip = cb->args[0]; args.iter.count = 0; @@ -5815,11 +5812,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (!args.iter.err && args.iter.count == cb->args[0]) args.iter.err = nft_set_catchall_dump(net, skb, set, - reset, cb->seq); + dump_ctx->reset, cb->seq); nla_nest_end(skb, nest); nlmsg_end(skb, nlh); - if (reset && args.iter.count > args.iter.skip) + if (dump_ctx->reset && args.iter.count > args.iter.skip) audit_log_nft_set_reset(table, cb->seq, args.iter.count - args.iter.skip); @@ -6072,6 +6069,9 @@ static int nf_tables_getsetelem(struct sk_buff *skb, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) + reset = true; + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = nf_tables_dump_set_start, @@ -6082,6 +6082,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb, struct nft_set_dump_ctx dump_ctx = { .set = set, .ctx = ctx, + .reset = reset, }; c.data = &dump_ctx; @@ -6091,9 +6092,6 @@ static int nf_tables_getsetelem(struct sk_buff *skb, if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) return -EINVAL; - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) - reset = true; - nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_get_set_elem(&ctx, set, attr, reset); if (err < 0) { -- cgit v1.2.3 From 1d0507f46843b14b0cb051fe50ebc7e6432111ab Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 23 Oct 2023 11:17:07 -0700 Subject: net: mptcp: convert netlink from small_ops to ops in the current MPTCP control plane, all operations use a netlink attribute of the same type "MPTCP_PM_ATTR". However, add/del/get/flush operations only parse the first element in the message _ the one that describes MPTCP endpoints (that was named MPTCP_PM_ATTR_ADDR and mostly used in ADD_ADDR operations _ probably the similarity of "attr", "addr" and "add" might cause some confusion to human readers). Convert MPTCP from 'small_ops' to 'ops', thus allowing different attributes for each single operation, hopefully makes all this clearer to human readers. - use a separate attribute set for add/del/get/flush address operation, binary compatible with the existing one, to store the endpoint address. MPTCP_PM_ENDPOINT_ADDR is added to the uAPI (with the same value as MPTCP_PM_ATTR_ADDR) for these operations. - convert mptcp_pm_ops[] and add policy files accordingly. this prepares MPTCP control plane to be described as YAML spec. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/340 Acked-by: Paolo Abeni Signed-off-by: Davide Caratti Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-1-v2-3-16b1f701f900@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 8 ++ net/mptcp/pm_netlink.c | 191 ++++++++++++++++++++++++++++++--------------- 2 files changed, 135 insertions(+), 64 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index ee9c49f949a2..0e62937ab17c 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -65,6 +65,14 @@ enum { #define MPTCP_PM_ATTR_MAX (__MPTCP_PM_ATTR_MAX - 1) +enum { + MPTCP_PM_ENDPOINT_ADDR = 1, + + __MPTCP_PM_ENDPOINT_MAX +}; + +#define MPTCP_PM_ENDPOINT_MAX (__MPTCP_PM_ENDPOINT_MAX - 1) + enum { MPTCP_PM_ADDR_ATTR_UNSPEC, diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 9661f3812682..fd4e843505e5 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -48,6 +48,60 @@ struct pm_nl_pernet { #define MPTCP_PM_ADDR_MAX 8 #define ADD_ADDR_RETRANS_MAX 3 +static +const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1] = { + [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, }, + [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, }, + [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, }, + [MPTCP_PM_ADDR_ATTR_ADDR6] = NLA_POLICY_EXACT_LEN(16), + [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16, }, + [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32, }, + [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32, }, +}; + +/* MPTCP_PM_CMD_ADD_ADDR / DEL / GET / FLUSH - do */ +static +const struct nla_policy mptcp_pm_endpoint_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { + [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_SET_LIMITS - do */ +static +const struct nla_policy mptcp_pm_set_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1] = { + [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, +}; + +/* MPTCP_PM_CMD_SET_FLAGS - do */ +static +const struct nla_policy mptcp_pm_set_flags_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_ANNOUNCE - do */ +static +const struct nla_policy mptcp_pm_announce_nl_policy[MPTCP_PM_ATTR_TOKEN + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, +}; + +/* MPTCP_PM_CMD_REMOVE - do */ +static +const struct nla_policy mptcp_pm_remove_nl_policy[MPTCP_PM_ATTR_LOC_ID + 1] = { + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, }, +}; + +/* MPTCP_PM_CMD_SUBFLOW_CREATE / DESTROY - do */ +static +const struct nla_policy mptcp_pm_subflow_create_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) { return net_generic(net, pm_nl_pernet_id); @@ -1104,29 +1158,6 @@ static const struct genl_multicast_group mptcp_pm_mcgrps[] = { }, }; -static const struct nla_policy -mptcp_pm_addr_policy[MPTCP_PM_ADDR_ATTR_MAX + 1] = { - [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, }, - [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, }, - [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, }, - [MPTCP_PM_ADDR_ATTR_ADDR6] = - NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), - [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16 }, - [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32 }, - [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32 }, -}; - -static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { - [MPTCP_PM_ATTR_ADDR] = - NLA_POLICY_NESTED(mptcp_pm_addr_policy), - [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, - [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, - [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, - [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, }, - [MPTCP_PM_ATTR_ADDR_REMOTE] = - NLA_POLICY_NESTED(mptcp_pm_addr_policy), -}; - void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); @@ -1188,7 +1219,7 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[], /* no validation needed - was already done via nested policy */ err = nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, - mptcp_pm_addr_policy, info->extack); + mptcp_pm_address_nl_policy, info->extack); if (err) return err; @@ -1305,7 +1336,7 @@ next: static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; int ret; @@ -1486,7 +1517,7 @@ next: static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; unsigned int addr_max; @@ -1677,7 +1708,7 @@ nla_put_failure: static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; struct sk_buff *msg; @@ -2283,72 +2314,104 @@ nla_put_failure: nlmsg_free(skb); } -static const struct genl_small_ops mptcp_pm_ops[] = { +static const struct genl_ops mptcp_pm_ops[] = { { - .cmd = MPTCP_PM_CMD_ADD_ADDR, - .doit = mptcp_nl_cmd_add_addr, - .flags = GENL_UNS_ADMIN_PERM, + .cmd = MPTCP_PM_CMD_ADD_ADDR, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_nl_cmd_add_addr, + .policy = mptcp_pm_endpoint_nl_policy, + .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .flags = GENL_UNS_ADMIN_PERM, }, { - .cmd = MPTCP_PM_CMD_DEL_ADDR, - .doit = mptcp_nl_cmd_del_addr, - .flags = GENL_UNS_ADMIN_PERM, + .cmd = MPTCP_PM_CMD_DEL_ADDR, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_nl_cmd_del_addr, + .policy = mptcp_pm_endpoint_nl_policy, + .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .flags = GENL_UNS_ADMIN_PERM, }, { - .cmd = MPTCP_PM_CMD_FLUSH_ADDRS, - .doit = mptcp_nl_cmd_flush_addrs, - .flags = GENL_UNS_ADMIN_PERM, + .cmd = MPTCP_PM_CMD_GET_ADDR, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_nl_cmd_get_addr, + .dumpit = mptcp_nl_cmd_dump_addrs, + .policy = mptcp_pm_endpoint_nl_policy, + .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .flags = GENL_UNS_ADMIN_PERM, }, { - .cmd = MPTCP_PM_CMD_GET_ADDR, - .doit = mptcp_nl_cmd_get_addr, - .dumpit = mptcp_nl_cmd_dump_addrs, + .cmd = MPTCP_PM_CMD_FLUSH_ADDRS, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_nl_cmd_flush_addrs, + .policy = mptcp_pm_endpoint_nl_policy, + .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .flags = GENL_UNS_ADMIN_PERM, }, { - .cmd = MPTCP_PM_CMD_SET_LIMITS, - .doit = mptcp_nl_cmd_set_limits, - .flags = GENL_UNS_ADMIN_PERM, + .cmd = MPTCP_PM_CMD_SET_LIMITS, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_nl_cmd_set_limits, + .policy = mptcp_pm_set_limits_nl_policy, + .maxattr = MPTCP_PM_ATTR_SUBFLOWS, + .flags = GENL_UNS_ADMIN_PERM, }, { - .cmd = MPTCP_PM_CMD_GET_LIMITS, - .doit = mptcp_nl_cmd_get_limits, + .cmd = MPTCP_PM_CMD_GET_LIMITS, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_nl_cmd_get_limits, + .policy = mptcp_pm_set_limits_nl_policy, + .maxattr = MPTCP_PM_ATTR_SUBFLOWS, }, { - .cmd = MPTCP_PM_CMD_SET_FLAGS, - .doit = mptcp_nl_cmd_set_flags, - .flags = GENL_UNS_ADMIN_PERM, + .cmd = MPTCP_PM_CMD_SET_FLAGS, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_nl_cmd_set_flags, + .policy = mptcp_pm_set_flags_nl_policy, + .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, + .flags = GENL_UNS_ADMIN_PERM, }, { - .cmd = MPTCP_PM_CMD_ANNOUNCE, - .doit = mptcp_nl_cmd_announce, - .flags = GENL_UNS_ADMIN_PERM, + .cmd = MPTCP_PM_CMD_ANNOUNCE, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_nl_cmd_announce, + .policy = mptcp_pm_announce_nl_policy, + .maxattr = MPTCP_PM_ATTR_TOKEN, + .flags = GENL_UNS_ADMIN_PERM, }, { - .cmd = MPTCP_PM_CMD_REMOVE, - .doit = mptcp_nl_cmd_remove, - .flags = GENL_UNS_ADMIN_PERM, + .cmd = MPTCP_PM_CMD_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_nl_cmd_remove, + .policy = mptcp_pm_remove_nl_policy, + .maxattr = MPTCP_PM_ATTR_LOC_ID, + .flags = GENL_UNS_ADMIN_PERM, }, { - .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, - .doit = mptcp_nl_cmd_sf_create, - .flags = GENL_UNS_ADMIN_PERM, + .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_nl_cmd_sf_create, + .policy = mptcp_pm_subflow_create_nl_policy, + .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, + .flags = GENL_UNS_ADMIN_PERM, }, { - .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, - .doit = mptcp_nl_cmd_sf_destroy, - .flags = GENL_UNS_ADMIN_PERM, + .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_nl_cmd_sf_destroy, + .policy = mptcp_pm_subflow_create_nl_policy, + .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, + .flags = GENL_UNS_ADMIN_PERM, }, }; static struct genl_family mptcp_genl_family __ro_after_init = { .name = MPTCP_PM_NAME, .version = MPTCP_PM_VER, - .maxattr = MPTCP_PM_ATTR_MAX, - .policy = mptcp_pm_policy, .netnsok = true, .module = THIS_MODULE, - .small_ops = mptcp_pm_ops, - .n_small_ops = ARRAY_SIZE(mptcp_pm_ops), + .ops = mptcp_pm_ops, + .n_ops = ARRAY_SIZE(mptcp_pm_ops), .resv_start_op = MPTCP_PM_CMD_SUBFLOW_DESTROY + 1, .mcgrps = mptcp_pm_mcgrps, .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), -- cgit v1.2.3 From 1e07938e29c587eaae069f6c624daa4c2a56331c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 23 Oct 2023 11:17:10 -0700 Subject: net: mptcp: rename netlink handlers to mptcp_pm_nl__{doit,dumpit} so that they will match names generated from YAML spec. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/340 Suggested-by: Paolo Abeni Acked-by: Paolo Abeni Signed-off-by: Davide Caratti Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-1-v2-6-16b1f701f900@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 48 ++++++++++++++++++++++++------------------------ net/mptcp/pm_userspace.c | 8 ++++---- net/mptcp/protocol.h | 8 ++++---- 3 files changed, 32 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index fd4e843505e5..3fa9a364343f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1334,7 +1334,7 @@ next: return 0; } -static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) +static int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); @@ -1515,7 +1515,7 @@ next: return 0; } -static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) +static int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); @@ -1650,7 +1650,7 @@ static void __reset_counters(struct pm_nl_pernet *pernet) pernet->addrs = 0; } -static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) +static int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) { struct pm_nl_pernet *pernet = genl_info_pm_nl(info); LIST_HEAD(free_list); @@ -1706,7 +1706,7 @@ nla_put_failure: return -EMSGSIZE; } -static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info) +static int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); @@ -1756,8 +1756,8 @@ fail: return ret; } -static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg, - struct netlink_callback *cb) +static int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) { struct net *net = sock_net(msg->sk); struct mptcp_pm_addr_entry *entry; @@ -1815,7 +1815,7 @@ static int parse_limit(struct genl_info *info, int id, unsigned int *limit) } static int -mptcp_nl_cmd_set_limits(struct sk_buff *skb, struct genl_info *info) +mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) { struct pm_nl_pernet *pernet = genl_info_pm_nl(info); unsigned int rcv_addrs, subflows; @@ -1841,7 +1841,7 @@ unlock: } static int -mptcp_nl_cmd_get_limits(struct sk_buff *skb, struct genl_info *info) +mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) { struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct sk_buff *msg; @@ -1950,7 +1950,7 @@ int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 return 0; } -static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) +static int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) { struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, }; struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; @@ -2314,11 +2314,11 @@ nla_put_failure: nlmsg_free(skb); } -static const struct genl_ops mptcp_pm_ops[] = { +static const struct genl_ops mptcp_pm_nl_ops[] = { { .cmd = MPTCP_PM_CMD_ADD_ADDR, .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_nl_cmd_add_addr, + .doit = mptcp_pm_nl_add_addr_doit, .policy = mptcp_pm_endpoint_nl_policy, .maxattr = MPTCP_PM_ENDPOINT_ADDR, .flags = GENL_UNS_ADMIN_PERM, @@ -2326,7 +2326,7 @@ static const struct genl_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_DEL_ADDR, .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_nl_cmd_del_addr, + .doit = mptcp_pm_nl_del_addr_doit, .policy = mptcp_pm_endpoint_nl_policy, .maxattr = MPTCP_PM_ENDPOINT_ADDR, .flags = GENL_UNS_ADMIN_PERM, @@ -2334,8 +2334,8 @@ static const struct genl_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_GET_ADDR, .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_nl_cmd_get_addr, - .dumpit = mptcp_nl_cmd_dump_addrs, + .doit = mptcp_pm_nl_get_addr_doit, + .dumpit = mptcp_pm_nl_get_addr_dumpit, .policy = mptcp_pm_endpoint_nl_policy, .maxattr = MPTCP_PM_ENDPOINT_ADDR, .flags = GENL_UNS_ADMIN_PERM, @@ -2343,7 +2343,7 @@ static const struct genl_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_FLUSH_ADDRS, .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_nl_cmd_flush_addrs, + .doit = mptcp_pm_nl_flush_addrs_doit, .policy = mptcp_pm_endpoint_nl_policy, .maxattr = MPTCP_PM_ENDPOINT_ADDR, .flags = GENL_UNS_ADMIN_PERM, @@ -2351,7 +2351,7 @@ static const struct genl_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_SET_LIMITS, .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_nl_cmd_set_limits, + .doit = mptcp_pm_nl_set_limits_doit, .policy = mptcp_pm_set_limits_nl_policy, .maxattr = MPTCP_PM_ATTR_SUBFLOWS, .flags = GENL_UNS_ADMIN_PERM, @@ -2359,14 +2359,14 @@ static const struct genl_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_GET_LIMITS, .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_nl_cmd_get_limits, + .doit = mptcp_pm_nl_get_limits_doit, .policy = mptcp_pm_set_limits_nl_policy, .maxattr = MPTCP_PM_ATTR_SUBFLOWS, }, { .cmd = MPTCP_PM_CMD_SET_FLAGS, .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_nl_cmd_set_flags, + .doit = mptcp_pm_nl_set_flags_doit, .policy = mptcp_pm_set_flags_nl_policy, .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, .flags = GENL_UNS_ADMIN_PERM, @@ -2374,7 +2374,7 @@ static const struct genl_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_ANNOUNCE, .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_nl_cmd_announce, + .doit = mptcp_pm_nl_announce_doit, .policy = mptcp_pm_announce_nl_policy, .maxattr = MPTCP_PM_ATTR_TOKEN, .flags = GENL_UNS_ADMIN_PERM, @@ -2382,7 +2382,7 @@ static const struct genl_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_REMOVE, .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_nl_cmd_remove, + .doit = mptcp_pm_nl_remove_doit, .policy = mptcp_pm_remove_nl_policy, .maxattr = MPTCP_PM_ATTR_LOC_ID, .flags = GENL_UNS_ADMIN_PERM, @@ -2390,7 +2390,7 @@ static const struct genl_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_nl_cmd_sf_create, + .doit = mptcp_pm_nl_subflow_create_doit, .policy = mptcp_pm_subflow_create_nl_policy, .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, .flags = GENL_UNS_ADMIN_PERM, @@ -2398,7 +2398,7 @@ static const struct genl_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_nl_cmd_sf_destroy, + .doit = mptcp_pm_nl_subflow_destroy_doit, .policy = mptcp_pm_subflow_create_nl_policy, .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, .flags = GENL_UNS_ADMIN_PERM, @@ -2410,8 +2410,8 @@ static struct genl_family mptcp_genl_family __ro_after_init = { .version = MPTCP_PM_VER, .netnsok = true, .module = THIS_MODULE, - .ops = mptcp_pm_ops, - .n_ops = ARRAY_SIZE(mptcp_pm_ops), + .ops = mptcp_pm_nl_ops, + .n_ops = ARRAY_SIZE(mptcp_pm_nl_ops), .resv_start_op = MPTCP_PM_CMD_SUBFLOW_DESTROY + 1, .mcgrps = mptcp_pm_mcgrps, .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index d042d32beb4d..0f92e5b13a8a 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -145,7 +145,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); } -int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR]; @@ -208,7 +208,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) return err; } -int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; @@ -270,7 +270,7 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) return err; } -int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; @@ -394,7 +394,7 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, return NULL; } -int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3612545fa62e..4d6e40416f84 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -877,10 +877,10 @@ void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_free_local_addr_list(struct mptcp_sock *msk); -int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info); -int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info); -int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info); -int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); -- cgit v1.2.3 From aab4d8564947f391674391e5c346d7f6f1c49f89 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 23 Oct 2023 11:17:11 -0700 Subject: net: mptcp: use policy generated by YAML spec generated with: $ ./tools/net/ynl/ynl-gen-c.py --mode kernel \ > --spec Documentation/netlink/specs/mptcp.yaml --source \ > -o net/mptcp/mptcp_pm_gen.c $ ./tools/net/ynl/ynl-gen-c.py --mode kernel \ > --spec Documentation/netlink/specs/mptcp.yaml --header \ > -o net/mptcp/mptcp_pm_gen.h Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/340 Acked-by: Paolo Abeni Signed-off-by: Davide Caratti Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-1-v2-7-16b1f701f900@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/Makefile | 3 +- net/mptcp/mptcp_pm_gen.c | 179 +++++++++++++++++++++++++++++++++++++++++++++++ net/mptcp/mptcp_pm_gen.h | 58 +++++++++++++++ net/mptcp/pm_netlink.c | 165 +++---------------------------------------- net/mptcp/protocol.h | 6 +- 5 files changed, 250 insertions(+), 161 deletions(-) create mode 100644 net/mptcp/mptcp_pm_gen.c create mode 100644 net/mptcp/mptcp_pm_gen.h (limited to 'net') diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index 84e531f86b82..bcf1dbf3a432 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -2,7 +2,8 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ - mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o + mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o \ + mptcp_pm_gen.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c new file mode 100644 index 000000000000..a2325e70ddab --- /dev/null +++ b/net/mptcp/mptcp_pm_gen.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/mptcp.yaml */ +/* YNL-GEN kernel source */ + +#include +#include + +#include "mptcp_pm_gen.h" + +#include + +/* Common nested types */ +const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1] = { + [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, }, + [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, }, + [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, }, + [MPTCP_PM_ADDR_ATTR_ADDR6] = NLA_POLICY_EXACT_LEN(16), + [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16, }, + [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32, }, + [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32, }, +}; + +/* MPTCP_PM_CMD_ADD_ADDR - do */ +const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { + [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_DEL_ADDR - do */ +const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { + [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_GET_ADDR - do */ +const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { + [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_FLUSH_ADDRS - do */ +const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { + [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_SET_LIMITS - do */ +const struct nla_policy mptcp_pm_set_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1] = { + [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, +}; + +/* MPTCP_PM_CMD_GET_LIMITS - do */ +const struct nla_policy mptcp_pm_get_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1] = { + [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, +}; + +/* MPTCP_PM_CMD_SET_FLAGS - do */ +const struct nla_policy mptcp_pm_set_flags_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_ANNOUNCE - do */ +const struct nla_policy mptcp_pm_announce_nl_policy[MPTCP_PM_ATTR_TOKEN + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, +}; + +/* MPTCP_PM_CMD_REMOVE - do */ +const struct nla_policy mptcp_pm_remove_nl_policy[MPTCP_PM_ATTR_LOC_ID + 1] = { + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, }, +}; + +/* MPTCP_PM_CMD_SUBFLOW_CREATE - do */ +const struct nla_policy mptcp_pm_subflow_create_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_SUBFLOW_DESTROY - do */ +const struct nla_policy mptcp_pm_subflow_destroy_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* Ops table for mptcp_pm */ +const struct genl_ops mptcp_pm_nl_ops[11] = { + { + .cmd = MPTCP_PM_CMD_ADD_ADDR, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_add_addr_doit, + .policy = mptcp_pm_add_addr_nl_policy, + .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_DEL_ADDR, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_del_addr_doit, + .policy = mptcp_pm_del_addr_nl_policy, + .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_GET_ADDR, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_get_addr_doit, + .dumpit = mptcp_pm_nl_get_addr_dumpit, + .policy = mptcp_pm_get_addr_nl_policy, + .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_FLUSH_ADDRS, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_flush_addrs_doit, + .policy = mptcp_pm_flush_addrs_nl_policy, + .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_SET_LIMITS, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_set_limits_doit, + .policy = mptcp_pm_set_limits_nl_policy, + .maxattr = MPTCP_PM_ATTR_SUBFLOWS, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_GET_LIMITS, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_get_limits_doit, + .policy = mptcp_pm_get_limits_nl_policy, + .maxattr = MPTCP_PM_ATTR_SUBFLOWS, + }, + { + .cmd = MPTCP_PM_CMD_SET_FLAGS, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_set_flags_doit, + .policy = mptcp_pm_set_flags_nl_policy, + .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_ANNOUNCE, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_announce_doit, + .policy = mptcp_pm_announce_nl_policy, + .maxattr = MPTCP_PM_ATTR_TOKEN, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_remove_doit, + .policy = mptcp_pm_remove_nl_policy, + .maxattr = MPTCP_PM_ATTR_LOC_ID, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_subflow_create_doit, + .policy = mptcp_pm_subflow_create_nl_policy, + .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_subflow_destroy_doit, + .policy = mptcp_pm_subflow_destroy_nl_policy, + .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, + .flags = GENL_UNS_ADMIN_PERM, + }, +}; diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h new file mode 100644 index 000000000000..10579d184587 --- /dev/null +++ b/net/mptcp/mptcp_pm_gen.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/mptcp.yaml */ +/* YNL-GEN kernel header */ + +#ifndef _LINUX_MPTCP_PM_GEN_H +#define _LINUX_MPTCP_PM_GEN_H + +#include +#include + +#include + +/* Common nested types */ +extern const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1]; + +extern const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; + +extern const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; + +extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; + +extern const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; + +extern const struct nla_policy mptcp_pm_set_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1]; + +extern const struct nla_policy mptcp_pm_get_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1]; + +extern const struct nla_policy mptcp_pm_set_flags_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1]; + +extern const struct nla_policy mptcp_pm_announce_nl_policy[MPTCP_PM_ATTR_TOKEN + 1]; + +extern const struct nla_policy mptcp_pm_remove_nl_policy[MPTCP_PM_ATTR_LOC_ID + 1]; + +extern const struct nla_policy mptcp_pm_subflow_create_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1]; + +extern const struct nla_policy mptcp_pm_subflow_destroy_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1]; + +/* Ops table for mptcp_pm */ +extern const struct genl_ops mptcp_pm_nl_ops[11]; + +int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, + struct genl_info *info); +int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, + struct genl_info *info); + +#endif /* _LINUX_MPTCP_PM_GEN_H */ diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 3fa9a364343f..1529ec358815 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -48,60 +48,6 @@ struct pm_nl_pernet { #define MPTCP_PM_ADDR_MAX 8 #define ADD_ADDR_RETRANS_MAX 3 -static -const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1] = { - [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, }, - [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, }, - [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, }, - [MPTCP_PM_ADDR_ATTR_ADDR6] = NLA_POLICY_EXACT_LEN(16), - [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16, }, - [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32, }, - [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32, }, -}; - -/* MPTCP_PM_CMD_ADD_ADDR / DEL / GET / FLUSH - do */ -static -const struct nla_policy mptcp_pm_endpoint_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { - [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), -}; - -/* MPTCP_PM_CMD_SET_LIMITS - do */ -static -const struct nla_policy mptcp_pm_set_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1] = { - [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, - [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, -}; - -/* MPTCP_PM_CMD_SET_FLAGS - do */ -static -const struct nla_policy mptcp_pm_set_flags_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = { - [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), - [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, - [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), -}; - -/* MPTCP_PM_CMD_ANNOUNCE - do */ -static -const struct nla_policy mptcp_pm_announce_nl_policy[MPTCP_PM_ATTR_TOKEN + 1] = { - [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), - [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, -}; - -/* MPTCP_PM_CMD_REMOVE - do */ -static -const struct nla_policy mptcp_pm_remove_nl_policy[MPTCP_PM_ATTR_LOC_ID + 1] = { - [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, - [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, }, -}; - -/* MPTCP_PM_CMD_SUBFLOW_CREATE / DESTROY - do */ -static -const struct nla_policy mptcp_pm_subflow_create_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = { - [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), - [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, - [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), -}; - static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) { return net_generic(net, pm_nl_pernet_id); @@ -1334,7 +1280,7 @@ next: return 0; } -static int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); @@ -1515,7 +1461,7 @@ next: return 0; } -static int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); @@ -1650,7 +1596,7 @@ static void __reset_counters(struct pm_nl_pernet *pernet) pernet->addrs = 0; } -static int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) { struct pm_nl_pernet *pernet = genl_info_pm_nl(info); LIST_HEAD(free_list); @@ -1706,7 +1652,7 @@ nla_put_failure: return -EMSGSIZE; } -static int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); @@ -1756,8 +1702,8 @@ fail: return ret; } -static int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) { struct net *net = sock_net(msg->sk); struct mptcp_pm_addr_entry *entry; @@ -1814,8 +1760,7 @@ static int parse_limit(struct genl_info *info, int id, unsigned int *limit) return 0; } -static int -mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) { struct pm_nl_pernet *pernet = genl_info_pm_nl(info); unsigned int rcv_addrs, subflows; @@ -1840,8 +1785,7 @@ unlock: return ret; } -static int -mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) { struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct sk_buff *msg; @@ -1950,7 +1894,7 @@ int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 return 0; } -static int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) { struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, }; struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; @@ -2314,97 +2258,6 @@ nla_put_failure: nlmsg_free(skb); } -static const struct genl_ops mptcp_pm_nl_ops[] = { - { - .cmd = MPTCP_PM_CMD_ADD_ADDR, - .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_pm_nl_add_addr_doit, - .policy = mptcp_pm_endpoint_nl_policy, - .maxattr = MPTCP_PM_ENDPOINT_ADDR, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_DEL_ADDR, - .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_pm_nl_del_addr_doit, - .policy = mptcp_pm_endpoint_nl_policy, - .maxattr = MPTCP_PM_ENDPOINT_ADDR, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_GET_ADDR, - .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_pm_nl_get_addr_doit, - .dumpit = mptcp_pm_nl_get_addr_dumpit, - .policy = mptcp_pm_endpoint_nl_policy, - .maxattr = MPTCP_PM_ENDPOINT_ADDR, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_FLUSH_ADDRS, - .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_pm_nl_flush_addrs_doit, - .policy = mptcp_pm_endpoint_nl_policy, - .maxattr = MPTCP_PM_ENDPOINT_ADDR, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_SET_LIMITS, - .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_pm_nl_set_limits_doit, - .policy = mptcp_pm_set_limits_nl_policy, - .maxattr = MPTCP_PM_ATTR_SUBFLOWS, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_GET_LIMITS, - .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_pm_nl_get_limits_doit, - .policy = mptcp_pm_set_limits_nl_policy, - .maxattr = MPTCP_PM_ATTR_SUBFLOWS, - }, - { - .cmd = MPTCP_PM_CMD_SET_FLAGS, - .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_pm_nl_set_flags_doit, - .policy = mptcp_pm_set_flags_nl_policy, - .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_ANNOUNCE, - .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_pm_nl_announce_doit, - .policy = mptcp_pm_announce_nl_policy, - .maxattr = MPTCP_PM_ATTR_TOKEN, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_REMOVE, - .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_pm_nl_remove_doit, - .policy = mptcp_pm_remove_nl_policy, - .maxattr = MPTCP_PM_ATTR_LOC_ID, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, - .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_pm_nl_subflow_create_doit, - .policy = mptcp_pm_subflow_create_nl_policy, - .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, - .validate = GENL_DONT_VALIDATE_STRICT, - .doit = mptcp_pm_nl_subflow_destroy_doit, - .policy = mptcp_pm_subflow_create_nl_policy, - .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, - .flags = GENL_UNS_ADMIN_PERM, - }, -}; - static struct genl_family mptcp_genl_family __ro_after_init = { .name = MPTCP_PM_NAME, .version = MPTCP_PM_VER, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 4d6e40416f84..c4c05afdc48c 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -13,6 +13,8 @@ #include #include +#include "mptcp_pm_gen.h" + #define MPTCP_SUPPORTED_VERSION 1 /* MPTCP option bits */ @@ -877,10 +879,6 @@ void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_free_local_addr_list(struct mptcp_sock *msk); -int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info); -int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info); -int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info); -int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); -- cgit v1.2.3 From bd07063dd11f6fda903802a5868960be3690d327 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2023 08:23:41 -0700 Subject: net: don't use input buffer of __dev_alloc_name() as a scratch space Callers of __dev_alloc_name() want to pass dev->name as the output buffer. Make __dev_alloc_name() not clobber that buffer on failure, and remove the workarounds in callers. dev_alloc_name_ns() is now completely unnecessary. The extra strscpy() added here will be gone by the end of the patch series. Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231023152346.3639749-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1025dc79bc49..874c7daa81f5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1057,7 +1057,7 @@ EXPORT_SYMBOL(dev_valid_name); * __dev_alloc_name - allocate a name for a device * @net: network namespace to allocate the device name in * @name: name format string - * @buf: scratch buffer and result name string + * @res: result name string * * Passed a format string - eg "lt%d" it will try and find a suitable * id. It scans list of devices to build up a free map, then chooses @@ -1068,13 +1068,14 @@ EXPORT_SYMBOL(dev_valid_name); * Returns the number of the unit assigned or a negative errno code. */ -static int __dev_alloc_name(struct net *net, const char *name, char *buf) +static int __dev_alloc_name(struct net *net, const char *name, char *res) { int i = 0; const char *p; const int max_netdevices = 8*PAGE_SIZE; unsigned long *inuse; struct net_device *d; + char buf[IFNAMSIZ]; if (!dev_valid_name(name)) return -EINVAL; @@ -1124,8 +1125,10 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) } snprintf(buf, IFNAMSIZ, name, i); - if (!netdev_name_in_use(net, buf)) + if (!netdev_name_in_use(net, buf)) { + strscpy(res, buf, IFNAMSIZ); return i; + } /* It is possible to run out of possible slots * when the name is long and there isn't enough space left @@ -1154,20 +1157,6 @@ static int dev_prep_valid_name(struct net *net, struct net_device *dev, return 0; } -static int dev_alloc_name_ns(struct net *net, - struct net_device *dev, - const char *name) -{ - char buf[IFNAMSIZ]; - int ret; - - BUG_ON(!net); - ret = __dev_alloc_name(net, name, buf); - if (ret >= 0) - strscpy(dev->name, buf, IFNAMSIZ); - return ret; -} - /** * dev_alloc_name - allocate a name for a device * @dev: device @@ -1184,20 +1173,14 @@ static int dev_alloc_name_ns(struct net *net, int dev_alloc_name(struct net_device *dev, const char *name) { - return dev_alloc_name_ns(dev_net(dev), dev, name); + return __dev_alloc_name(dev_net(dev), name, dev->name); } EXPORT_SYMBOL(dev_alloc_name); static int dev_get_valid_name(struct net *net, struct net_device *dev, const char *name) { - char buf[IFNAMSIZ]; - int ret; - - ret = dev_prep_valid_name(net, dev, name, buf); - if (ret >= 0) - strscpy(dev->name, buf, IFNAMSIZ); - return ret; + return dev_prep_valid_name(net, dev, name, dev->name); } /** -- cgit v1.2.3 From 556c755a4d8143007c745b6ad894611a04173b53 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2023 08:23:42 -0700 Subject: net: make dev_alloc_name() call dev_prep_valid_name() __dev_alloc_name() handles both the sprintf and non-sprintf target names. This complicates the code. dev_prep_valid_name() already handles the non-sprintf case, before calling __dev_alloc_name(), make the only other caller also go thru dev_prep_valid_name(). This way we can drop the non-sprintf handling in __dev_alloc_name() in one of the next changes. commit 55a5ec9b7710 ("Revert "net: core: dev_get_valid_name is now the same as dev_alloc_name_ns"") and commit 029b6d140550 ("Revert "net: core: maybe return -EEXIST in __dev_alloc_name"") tell us that we can't start returning -EEXIST from dev_alloc_name() on name duplicates. Bite the bullet and pass the expected errno to dev_prep_valid_name(). dev_prep_valid_name() must now propagate out the allocated id for printf names. Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231023152346.3639749-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 874c7daa81f5..004e9f26b160 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1137,19 +1137,18 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res) return -ENFILE; } +/* Returns negative errno or allocated unit id (see __dev_alloc_name()) */ static int dev_prep_valid_name(struct net *net, struct net_device *dev, - const char *want_name, char *out_name) + const char *want_name, char *out_name, + int dup_errno) { - int ret; - if (!dev_valid_name(want_name)) return -EINVAL; if (strchr(want_name, '%')) { - ret = __dev_alloc_name(net, want_name, out_name); - return ret < 0 ? ret : 0; + return __dev_alloc_name(net, want_name, out_name); } else if (netdev_name_in_use(net, want_name)) { - return -EEXIST; + return -dup_errno; } else if (out_name != want_name) { strscpy(out_name, want_name, IFNAMSIZ); } @@ -1173,14 +1172,17 @@ static int dev_prep_valid_name(struct net *net, struct net_device *dev, int dev_alloc_name(struct net_device *dev, const char *name) { - return __dev_alloc_name(dev_net(dev), name, dev->name); + return dev_prep_valid_name(dev_net(dev), dev, name, dev->name, ENFILE); } EXPORT_SYMBOL(dev_alloc_name); static int dev_get_valid_name(struct net *net, struct net_device *dev, const char *name) { - return dev_prep_valid_name(net, dev, name, dev->name); + int ret; + + ret = dev_prep_valid_name(net, dev, name, dev->name, EEXIST); + return ret < 0 ? ret : 0; } /** @@ -11118,7 +11120,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, /* We get here if we can't use the current device name */ if (!pat) goto out; - err = dev_prep_valid_name(net, dev, pat, new_name); + err = dev_prep_valid_name(net, dev, pat, new_name, EEXIST); if (err < 0) goto out; } -- cgit v1.2.3 From 9a810468126c846299d867f73dd7053064c29be1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2023 08:23:43 -0700 Subject: net: reduce indentation of __dev_alloc_name() All callers of __dev_valid_name() go thru dev_prep_valid_name() which handles the non-printf case. Focus __dev_alloc_name() on the sprintf case, remove the indentation level. Minor functional change of returning -EINVAL if % is not found, which should now never happen. Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231023152346.3639749-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 56 ++++++++++++++++++++++++++------------------------------ 1 file changed, 26 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 004e9f26b160..bbfb02b4a228 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1080,50 +1080,46 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res) if (!dev_valid_name(name)) return -EINVAL; + /* Verify the string as this thing may have come from the user. + * There must be one "%d" and no other "%" characters. + */ p = strchr(name, '%'); - if (p) { - /* - * Verify the string as this thing may have come from - * the user. There must be either one "%d" and no other "%" - * characters. - */ - if (p[1] != 'd' || strchr(p + 2, '%')) - return -EINVAL; - - /* Use one page as a bit array of possible slots */ - inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC); - if (!inuse) - return -ENOMEM; + if (!p || p[1] != 'd' || strchr(p + 2, '%')) + return -EINVAL; - for_each_netdev(net, d) { - struct netdev_name_node *name_node; + /* Use one page as a bit array of possible slots */ + inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC); + if (!inuse) + return -ENOMEM; - netdev_for_each_altname(d, name_node) { - if (!sscanf(name_node->name, name, &i)) - continue; - if (i < 0 || i >= max_netdevices) - continue; + for_each_netdev(net, d) { + struct netdev_name_node *name_node; - /* avoid cases where sscanf is not exact inverse of printf */ - snprintf(buf, IFNAMSIZ, name, i); - if (!strncmp(buf, name_node->name, IFNAMSIZ)) - __set_bit(i, inuse); - } - if (!sscanf(d->name, name, &i)) + netdev_for_each_altname(d, name_node) { + if (!sscanf(name_node->name, name, &i)) continue; if (i < 0 || i >= max_netdevices) continue; - /* avoid cases where sscanf is not exact inverse of printf */ + /* avoid cases where sscanf is not exact inverse of printf */ snprintf(buf, IFNAMSIZ, name, i); - if (!strncmp(buf, d->name, IFNAMSIZ)) + if (!strncmp(buf, name_node->name, IFNAMSIZ)) __set_bit(i, inuse); } + if (!sscanf(d->name, name, &i)) + continue; + if (i < 0 || i >= max_netdevices) + continue; - i = find_first_zero_bit(inuse, max_netdevices); - bitmap_free(inuse); + /* avoid cases where sscanf is not exact inverse of printf */ + snprintf(buf, IFNAMSIZ, name, i); + if (!strncmp(buf, d->name, IFNAMSIZ)) + __set_bit(i, inuse); } + i = find_first_zero_bit(inuse, max_netdevices); + bitmap_free(inuse); + snprintf(buf, IFNAMSIZ, name, i); if (!netdev_name_in_use(net, buf)) { strscpy(res, buf, IFNAMSIZ); -- cgit v1.2.3 From 7ad17b04dc7bdcdd1f85e460c38da55b0afa2422 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2023 08:23:44 -0700 Subject: net: trust the bitmap in __dev_alloc_name() Prior to restructuring __dev_alloc_name() handled both printf and non-printf names. In a clever attempt at code reuse it always prints the name into a buffer and checks if it's a duplicate. Trust the bitmap, and return an error if its full. This shrinks the possible ID space by one from 32K to 32K - 1, as previously the max value would have been tried as a valid ID. It seems very unlikely that anyone would care as we heard no requests to increase the max beyond 32k. Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231023152346.3639749-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index bbfb02b4a228..d2698b4bbad4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1119,18 +1119,11 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res) i = find_first_zero_bit(inuse, max_netdevices); bitmap_free(inuse); + if (i == max_netdevices) + return -ENFILE; - snprintf(buf, IFNAMSIZ, name, i); - if (!netdev_name_in_use(net, buf)) { - strscpy(res, buf, IFNAMSIZ); - return i; - } - - /* It is possible to run out of possible slots - * when the name is long and there isn't enough space left - * for the digits, or if all bits are used. - */ - return -ENFILE; + snprintf(res, IFNAMSIZ, name, i); + return i; } /* Returns negative errno or allocated unit id (see __dev_alloc_name()) */ -- cgit v1.2.3 From 70e1b14c1bcbbb0854311ff8bed6cf4db75d5f05 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2023 08:23:45 -0700 Subject: net: remove dev_valid_name() check from __dev_alloc_name() __dev_alloc_name() is only called by dev_prep_valid_name(), which already checks that name is valid. Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231023152346.3639749-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d2698b4bbad4..0830f2967221 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1077,9 +1077,6 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res) struct net_device *d; char buf[IFNAMSIZ]; - if (!dev_valid_name(name)) - return -EINVAL; - /* Verify the string as this thing may have come from the user. * There must be one "%d" and no other "%" characters. */ -- cgit v1.2.3 From ce4cfa2318afcd74cc41992e306a28fa04e5d484 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2023 08:23:46 -0700 Subject: net: remove else after return in dev_prep_valid_name() Remove unnecessary else clauses after return. I copied this if / else construct from somewhere, it makes the code harder to read. Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231023152346.3639749-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 0830f2967221..a37a932a3e14 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1131,14 +1131,13 @@ static int dev_prep_valid_name(struct net *net, struct net_device *dev, if (!dev_valid_name(want_name)) return -EINVAL; - if (strchr(want_name, '%')) { + if (strchr(want_name, '%')) return __dev_alloc_name(net, want_name, out_name); - } else if (netdev_name_in_use(net, want_name)) { + + if (netdev_name_in_use(net, want_name)) return -dup_errno; - } else if (out_name != want_name) { + if (out_name != want_name) strscpy(out_name, want_name, IFNAMSIZ); - } - return 0; } -- cgit v1.2.3 From 6ca80638b90cec66547011ee1ef79e534589989a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 23 Oct 2023 11:17:28 -0700 Subject: net: dsa: Use conduit and user terms Use more inclusive terms throughout the DSA subsystem by moving away from "master" which is replaced by "conduit" and "slave" which is replaced by "user". No functional changes. Acked-by: Rob Herring Acked-by: Stephen Hemminger Reviewed-by: Vladimir Oltean Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20231023181729.1191071-2-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- .../bindings/net/dsa/mediatek,mt7530.yaml | 2 +- Documentation/networking/dsa/b53.rst | 14 +- Documentation/networking/dsa/bcm_sf2.rst | 2 +- Documentation/networking/dsa/configuration.rst | 100 +- Documentation/networking/dsa/dsa.rst | 162 +- Documentation/networking/dsa/lan9303.rst | 2 +- Documentation/networking/dsa/sja1105.rst | 6 +- .../boot/dts/marvell/armada-3720-espressobin.dtsi | 2 +- drivers/net/dsa/b53/b53_common.c | 4 +- drivers/net/dsa/b53/b53_mdio.c | 2 +- drivers/net/dsa/bcm_sf2.c | 41 +- drivers/net/dsa/bcm_sf2.h | 2 +- drivers/net/dsa/bcm_sf2_cfp.c | 4 +- drivers/net/dsa/lan9303-core.c | 4 +- drivers/net/dsa/lantiq_gswip.c | 34 +- drivers/net/dsa/microchip/ksz9477.c | 6 +- drivers/net/dsa/microchip/ksz_common.c | 20 +- drivers/net/dsa/microchip/ksz_ptp.c | 2 +- drivers/net/dsa/mt7530.c | 18 +- drivers/net/dsa/mv88e6xxx/chip.c | 4 +- drivers/net/dsa/ocelot/felix.c | 68 +- drivers/net/dsa/ocelot/felix.h | 6 +- drivers/net/dsa/qca/qca8k-8xxx.c | 50 +- drivers/net/dsa/qca/qca8k-common.c | 4 +- drivers/net/dsa/qca/qca8k-leds.c | 6 +- drivers/net/dsa/qca/qca8k.h | 2 +- drivers/net/dsa/realtek/realtek-smi.c | 28 +- drivers/net/dsa/realtek/realtek.h | 2 +- drivers/net/dsa/realtek/rtl8365mb.c | 2 +- drivers/net/dsa/sja1105/sja1105_main.c | 4 +- drivers/net/dsa/xrs700x/xrs700x.c | 12 +- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 2 +- include/linux/dsa/sja1105.h | 2 +- include/net/dsa.h | 56 +- include/net/dsa_stubs.h | 22 +- net/core/dev_ioctl.c | 2 +- net/dsa/Makefile | 6 +- net/dsa/conduit.c | 475 +++ net/dsa/conduit.h | 22 + net/dsa/dsa.c | 224 +- net/dsa/dsa.h | 12 +- net/dsa/master.c | 475 --- net/dsa/master.h | 22 - net/dsa/netlink.c | 14 +- net/dsa/port.c | 124 +- net/dsa/port.h | 4 +- net/dsa/slave.c | 3727 -------------------- net/dsa/slave.h | 69 - net/dsa/switch.c | 20 +- net/dsa/switch.h | 8 +- net/dsa/tag.c | 10 +- net/dsa/tag.h | 26 +- net/dsa/tag_8021q.c | 22 +- net/dsa/tag_8021q.h | 2 +- net/dsa/tag_ar9331.c | 4 +- net/dsa/tag_brcm.c | 14 +- net/dsa/tag_dsa.c | 6 +- net/dsa/tag_gswip.c | 4 +- net/dsa/tag_hellcreek.c | 4 +- net/dsa/tag_ksz.c | 12 +- net/dsa/tag_lan9303.c | 4 +- net/dsa/tag_mtk.c | 4 +- net/dsa/tag_none.c | 6 +- net/dsa/tag_ocelot.c | 22 +- net/dsa/tag_ocelot_8021q.c | 12 +- net/dsa/tag_qca.c | 6 +- net/dsa/tag_rtl4_a.c | 6 +- net/dsa/tag_rtl8_4.c | 6 +- net/dsa/tag_rzn1_a5psw.c | 4 +- net/dsa/tag_sja1105.c | 30 +- net/dsa/tag_trailer.c | 4 +- net/dsa/tag_xrs700x.c | 4 +- net/dsa/user.c | 3727 ++++++++++++++++++++ net/dsa/user.h | 69 + 76 files changed, 4958 insertions(+), 4955 deletions(-) create mode 100644 net/dsa/conduit.c create mode 100644 net/dsa/conduit.h delete mode 100644 net/dsa/master.c delete mode 100644 net/dsa/master.h delete mode 100644 net/dsa/slave.c delete mode 100644 net/dsa/slave.h create mode 100644 net/dsa/user.c create mode 100644 net/dsa/user.h (limited to 'net') diff --git a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml index 5038818e9f2e..1c2444121e60 100644 --- a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml +++ b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml @@ -60,7 +60,7 @@ description: | Check out example 6. - - Port 5 can be wired to an external phy. Port 5 becomes a DSA slave. + - Port 5 can be wired to an external phy. Port 5 becomes a DSA user port. For the multi-chip module MT7530, the external phy must be wired TX to TX to gmac1 of the SoC for this to work. Ubiquiti EdgeRouter X SFP is wired diff --git a/Documentation/networking/dsa/b53.rst b/Documentation/networking/dsa/b53.rst index b41637cdb82b..1cb3ff648f88 100644 --- a/Documentation/networking/dsa/b53.rst +++ b/Documentation/networking/dsa/b53.rst @@ -52,7 +52,7 @@ VLAN programming would basically change the CPU port's default PVID and make it untagged, undesirable. In difference to the configuration described in :ref:`dsa-vlan-configuration` -the default VLAN 1 has to be removed from the slave interface configuration in +the default VLAN 1 has to be removed from the user interface configuration in single port and gateway configuration, while there is no need to add an extra VLAN configuration in the bridge showcase. @@ -68,13 +68,13 @@ By default packages are tagged with vid 1: ip link add link eth0 name eth0.2 type vlan id 2 ip link add link eth0 name eth0.3 type vlan id 3 - # The master interface needs to be brought up before the slave ports. + # The conduit interface needs to be brought up before the user ports. ip link set eth0 up ip link set eth0.1 up ip link set eth0.2 up ip link set eth0.3 up - # bring up the slave interfaces + # bring up the user interfaces ip link set wan up ip link set lan1 up ip link set lan2 up @@ -113,11 +113,11 @@ bridge # tag traffic on CPU port ip link add link eth0 name eth0.1 type vlan id 1 - # The master interface needs to be brought up before the slave ports. + # The conduit interface needs to be brought up before the user ports. ip link set eth0 up ip link set eth0.1 up - # bring up the slave interfaces + # bring up the user interfaces ip link set wan up ip link set lan1 up ip link set lan2 up @@ -149,12 +149,12 @@ gateway ip link add link eth0 name eth0.1 type vlan id 1 ip link add link eth0 name eth0.2 type vlan id 2 - # The master interface needs to be brought up before the slave ports. + # The conduit interface needs to be brought up before the user ports. ip link set eth0 up ip link set eth0.1 up ip link set eth0.2 up - # bring up the slave interfaces + # bring up the user interfaces ip link set wan up ip link set lan1 up ip link set lan2 up diff --git a/Documentation/networking/dsa/bcm_sf2.rst b/Documentation/networking/dsa/bcm_sf2.rst index dee234039e1e..d2571435696f 100644 --- a/Documentation/networking/dsa/bcm_sf2.rst +++ b/Documentation/networking/dsa/bcm_sf2.rst @@ -67,7 +67,7 @@ MDIO indirect accesses ---------------------- Due to a limitation in how Broadcom switches have been designed, external -Broadcom switches connected to a SF2 require the use of the DSA slave MDIO bus +Broadcom switches connected to a SF2 require the use of the DSA user MDIO bus in order to properly configure them. By default, the SF2 pseudo-PHY address, and an external switch pseudo-PHY address will both be snooping for incoming MDIO transactions, since they are at the same address (30), resulting in some kind of diff --git a/Documentation/networking/dsa/configuration.rst b/Documentation/networking/dsa/configuration.rst index d2934c40f0f1..e6c9719874b0 100644 --- a/Documentation/networking/dsa/configuration.rst +++ b/Documentation/networking/dsa/configuration.rst @@ -31,38 +31,38 @@ at https://www.kernel.org/pub/linux/utils/net/iproute2/ Through DSA every port of a switch is handled like a normal linux Ethernet interface. The CPU port is the switch port connected to an Ethernet MAC chip. -The corresponding linux Ethernet interface is called the master interface. -All other corresponding linux interfaces are called slave interfaces. +The corresponding linux Ethernet interface is called the conduit interface. +All other corresponding linux interfaces are called user interfaces. -The slave interfaces depend on the master interface being up in order for them -to send or receive traffic. Prior to kernel v5.12, the state of the master +The user interfaces depend on the conduit interface being up in order for them +to send or receive traffic. Prior to kernel v5.12, the state of the conduit interface had to be managed explicitly by the user. Starting with kernel v5.12, the behavior is as follows: -- when a DSA slave interface is brought up, the master interface is +- when a DSA user interface is brought up, the conduit interface is automatically brought up. -- when the master interface is brought down, all DSA slave interfaces are +- when the conduit interface is brought down, all DSA user interfaces are automatically brought down. In this documentation the following Ethernet interfaces are used: *eth0* - the master interface + the conduit interface *eth1* - another master interface + another conduit interface *lan1* - a slave interface + a user interface *lan2* - another slave interface + another user interface *lan3* - a third slave interface + a third user interface *wan* - A slave interface dedicated for upstream traffic + A user interface dedicated for upstream traffic Further Ethernet interfaces can be configured similar. The configured IPs and networks are: @@ -96,11 +96,11 @@ without using a VLAN based configuration. ip addr add 192.0.2.5/30 dev lan2 ip addr add 192.0.2.9/30 dev lan3 - # For kernels earlier than v5.12, the master interface needs to be - # brought up manually before the slave ports. + # For kernels earlier than v5.12, the conduit interface needs to be + # brought up manually before the user ports. ip link set eth0 up - # bring up the slave interfaces + # bring up the user interfaces ip link set lan1 up ip link set lan2 up ip link set lan3 up @@ -108,11 +108,11 @@ without using a VLAN based configuration. *bridge* .. code-block:: sh - # For kernels earlier than v5.12, the master interface needs to be - # brought up manually before the slave ports. + # For kernels earlier than v5.12, the conduit interface needs to be + # brought up manually before the user ports. ip link set eth0 up - # bring up the slave interfaces + # bring up the user interfaces ip link set lan1 up ip link set lan2 up ip link set lan3 up @@ -134,11 +134,11 @@ without using a VLAN based configuration. *gateway* .. code-block:: sh - # For kernels earlier than v5.12, the master interface needs to be - # brought up manually before the slave ports. + # For kernels earlier than v5.12, the conduit interface needs to be + # brought up manually before the user ports. ip link set eth0 up - # bring up the slave interfaces + # bring up the user interfaces ip link set wan up ip link set lan1 up ip link set lan2 up @@ -178,14 +178,14 @@ configuration. ip link add link eth0 name eth0.2 type vlan id 2 ip link add link eth0 name eth0.3 type vlan id 3 - # For kernels earlier than v5.12, the master interface needs to be - # brought up manually before the slave ports. + # For kernels earlier than v5.12, the conduit interface needs to be + # brought up manually before the user ports. ip link set eth0 up ip link set eth0.1 up ip link set eth0.2 up ip link set eth0.3 up - # bring up the slave interfaces + # bring up the user interfaces ip link set lan1 up ip link set lan2 up ip link set lan3 up @@ -221,12 +221,12 @@ configuration. # tag traffic on CPU port ip link add link eth0 name eth0.1 type vlan id 1 - # For kernels earlier than v5.12, the master interface needs to be - # brought up manually before the slave ports. + # For kernels earlier than v5.12, the conduit interface needs to be + # brought up manually before the user ports. ip link set eth0 up ip link set eth0.1 up - # bring up the slave interfaces + # bring up the user interfaces ip link set lan1 up ip link set lan2 up ip link set lan3 up @@ -261,13 +261,13 @@ configuration. ip link add link eth0 name eth0.1 type vlan id 1 ip link add link eth0 name eth0.2 type vlan id 2 - # For kernels earlier than v5.12, the master interface needs to be - # brought up manually before the slave ports. + # For kernels earlier than v5.12, the conduit interface needs to be + # brought up manually before the user ports. ip link set eth0 up ip link set eth0.1 up ip link set eth0.2 up - # bring up the slave interfaces + # bring up the user interfaces ip link set wan up ip link set lan1 up ip link set lan2 up @@ -380,22 +380,22 @@ affinities according to the available CPU ports. Secondly, it is possible to perform load balancing between CPU ports on a per packet basis, rather than statically assigning user ports to CPU ports. -This can be achieved by placing the DSA masters under a LAG interface (bonding +This can be achieved by placing the DSA conduits under a LAG interface (bonding or team). DSA monitors this operation and creates a mirror of this software LAG -on the CPU ports facing the physical DSA masters that constitute the LAG slave +on the CPU ports facing the physical DSA conduits that constitute the LAG slave devices. To make use of multiple CPU ports, the firmware (device tree) description of -the switch must mark all the links between CPU ports and their DSA masters +the switch must mark all the links between CPU ports and their DSA conduits using the ``ethernet`` reference/phandle. At startup, only a single CPU port -and DSA master will be used - the numerically first port from the firmware +and DSA conduit will be used - the numerically first port from the firmware description which has an ``ethernet`` property. It is up to the user to -configure the system for the switch to use other masters. +configure the system for the switch to use other conduits. DSA uses the ``rtnl_link_ops`` mechanism (with a "dsa" ``kind``) to allow -changing the DSA master of a user port. The ``IFLA_DSA_MASTER`` u32 netlink -attribute contains the ifindex of the master device that handles each slave -device. The DSA master must be a valid candidate based on firmware node +changing the DSA conduit of a user port. The ``IFLA_DSA_MASTER`` u32 netlink +attribute contains the ifindex of the conduit device that handles each user +device. The DSA conduit must be a valid candidate based on firmware node information, or a LAG interface which contains only slaves which are valid candidates. @@ -403,7 +403,7 @@ Using iproute2, the following manipulations are possible: .. code-block:: sh - # See the DSA master in current use + # See the DSA conduit in current use ip -d link show dev swp0 (...) dsa master eth0 @@ -414,7 +414,7 @@ Using iproute2, the following manipulations are possible: ip link set swp2 type dsa master eth1 ip link set swp3 type dsa master eth0 - # CPU ports in LAG, using explicit assignment of the DSA master + # CPU ports in LAG, using explicit assignment of the DSA conduit ip link add bond0 type bond mode balance-xor && ip link set bond0 up ip link set eth1 down && ip link set eth1 master bond0 ip link set swp0 type dsa master bond0 @@ -426,7 +426,7 @@ Using iproute2, the following manipulations are possible: (...) dsa master bond0 - # CPU ports in LAG, relying on implicit migration of the DSA master + # CPU ports in LAG, relying on implicit migration of the DSA conduit ip link add bond0 type bond mode balance-xor && ip link set bond0 up ip link set eth0 down && ip link set eth0 master bond0 ip link set eth1 down && ip link set eth1 master bond0 @@ -436,23 +436,23 @@ Using iproute2, the following manipulations are possible: Notice that in the case of CPU ports under a LAG, the use of the ``IFLA_DSA_MASTER`` netlink attribute is not strictly needed, but rather, DSA -reacts to the ``IFLA_MASTER`` attribute change of its present master (``eth0``) +reacts to the ``IFLA_MASTER`` attribute change of its present conduit (``eth0``) and migrates all user ports to the new upper of ``eth0``, ``bond0``. Similarly, when ``bond0`` is destroyed using ``RTM_DELLINK``, DSA migrates the user ports -that were assigned to this interface to the first physical DSA master which is +that were assigned to this interface to the first physical DSA conduit which is eligible, based on the firmware description (it effectively reverts to the startup configuration). In a setup with more than 2 physical CPU ports, it is therefore possible to mix -static user to CPU port assignment with LAG between DSA masters. It is not -possible to statically assign a user port towards a DSA master that has any -upper interfaces (this includes LAG devices - the master must always be the LAG +static user to CPU port assignment with LAG between DSA conduits. It is not +possible to statically assign a user port towards a DSA conduit that has any +upper interfaces (this includes LAG devices - the conduit must always be the LAG in this case). -Live changing of the DSA master (and thus CPU port) affinity of a user port is +Live changing of the DSA conduit (and thus CPU port) affinity of a user port is permitted, in order to allow dynamic redistribution in response to traffic. -Physical DSA masters are allowed to join and leave at any time a LAG interface -used as a DSA master; however, DSA will reject a LAG interface as a valid -candidate for being a DSA master unless it has at least one physical DSA master +Physical DSA conduits are allowed to join and leave at any time a LAG interface +used as a DSA conduit; however, DSA will reject a LAG interface as a valid +candidate for being a DSA conduit unless it has at least one physical DSA conduit as a slave device. diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst index a94ddf83348a..7b2e69cd7ef0 100644 --- a/Documentation/networking/dsa/dsa.rst +++ b/Documentation/networking/dsa/dsa.rst @@ -25,7 +25,7 @@ presence of a management port connected to an Ethernet controller capable of receiving Ethernet frames from the switch. This is a very common setup for all kinds of Ethernet switches found in Small Home and Office products: routers, gateways, or even top-of-rack switches. This host Ethernet controller will -be later referred to as "master" and "cpu" in DSA terminology and code. +be later referred to as "conduit" and "cpu" in DSA terminology and code. The D in DSA stands for Distributed, because the subsystem has been designed with the ability to configure and manage cascaded switches on top of each other @@ -35,7 +35,7 @@ of multiple switches connected to each other is called a "switch tree". For each front-panel port, DSA creates specialized network devices which are used as controlling and data-flowing endpoints for use by the Linux networking -stack. These specialized network interfaces are referred to as "slave" network +stack. These specialized network interfaces are referred to as "user" network interfaces in DSA terminology and code. The ideal case for using DSA is when an Ethernet switch supports a "switch tag" @@ -56,12 +56,16 @@ Note that DSA does not currently create network interfaces for the "cpu" and - the "cpu" port is the Ethernet switch facing side of the management controller, and as such, would create a duplication of feature, since you - would get two interfaces for the same conduit: master netdev, and "cpu" netdev + would get two interfaces for the same conduit: conduit netdev, and "cpu" netdev - the "dsa" port(s) are just conduits between two or more switches, and as such cannot really be used as proper network interfaces either, only the downstream, or the top-most upstream interface makes sense with that model +NB: for the past 15 years, the DSA subsystem had been making use of the terms +"master" (rather than "conduit") and "slave" (rather than "user"). These terms +have been removed from the DSA codebase and phased out of the uAPI. + Switch tagging protocols ------------------------ @@ -80,14 +84,14 @@ methods of the ``struct dsa_device_ops`` structure, which are detailed below. Tagging protocols generally fall in one of three categories: 1. The switch-specific frame header is located before the Ethernet header, - shifting to the right (from the perspective of the DSA master's frame + shifting to the right (from the perspective of the DSA conduit's frame parser) the MAC DA, MAC SA, EtherType and the entire L2 payload. 2. The switch-specific frame header is located before the EtherType, keeping - the MAC DA and MAC SA in place from the DSA master's perspective, but + the MAC DA and MAC SA in place from the DSA conduit's perspective, but shifting the 'real' EtherType and L2 payload to the right. 3. The switch-specific frame header is located at the tail of the packet, keeping all frame headers in place and not altering the view of the packet - that the DSA master's frame parser has. + that the DSA conduit's frame parser has. A tagging protocol may tag all packets with switch tags of the same length, or the tag length might vary (for example packets with PTP timestamps might @@ -95,7 +99,7 @@ require an extended switch tag, or there might be one tag length on TX and a different one on RX). Either way, the tagging protocol driver must populate the ``struct dsa_device_ops::needed_headroom`` and/or ``struct dsa_device_ops::needed_tailroom`` with the length in octets of the longest switch frame header/trailer. The DSA -framework will automatically adjust the MTU of the master interface to +framework will automatically adjust the MTU of the conduit interface to accommodate for this extra size in order for DSA user ports to support the standard MTU (L2 payload length) of 1500 octets. The ``needed_headroom`` and ``needed_tailroom`` properties are also used to request from the network stack, @@ -140,18 +144,18 @@ adding or removing the ``ETH_P_EDSA`` EtherType and some padding octets). It is possible to construct cascaded setups of DSA switches even if their tagging protocols are not compatible with one another. In this case, there are no DSA links in this fabric, and each switch constitutes a disjoint DSA switch -tree. The DSA links are viewed as simply a pair of a DSA master (the out-facing +tree. The DSA links are viewed as simply a pair of a DSA conduit (the out-facing port of the upstream DSA switch) and a CPU port (the in-facing port of the downstream DSA switch). The tagging protocol of the attached DSA switch tree can be viewed through the -``dsa/tagging`` sysfs attribute of the DSA master:: +``dsa/tagging`` sysfs attribute of the DSA conduit:: cat /sys/class/net/eth0/dsa/tagging If the hardware and driver are capable, the tagging protocol of the DSA switch tree can be changed at runtime. This is done by writing the new tagging -protocol name to the same sysfs device attribute as above (the DSA master and +protocol name to the same sysfs device attribute as above (the DSA conduit and all attached switch ports must be down while doing this). It is desirable that all tagging protocols are testable with the ``dsa_loop`` @@ -159,7 +163,7 @@ mockup driver, which can be attached to any network interface. The goal is that any network interface should be capable of transmitting the same packet in the same way, and the tagger should decode the same received packet in the same way regardless of the driver used for the switch control path, and the driver used -for the DSA master. +for the DSA conduit. The transmission of a packet goes through the tagger's ``xmit`` function. The passed ``struct sk_buff *skb`` has ``skb->data`` pointing at @@ -183,44 +187,44 @@ virtual DSA user network interface corresponding to the physical front-facing switch port that the packet was received on. Since tagging protocols in category 1 and 2 break software (and most often also -hardware) packet dissection on the DSA master, features such as RPS (Receive -Packet Steering) on the DSA master would be broken. The DSA framework deals +hardware) packet dissection on the DSA conduit, features such as RPS (Receive +Packet Steering) on the DSA conduit would be broken. The DSA framework deals with this by hooking into the flow dissector and shifting the offset at which -the IP header is to be found in the tagged frame as seen by the DSA master. +the IP header is to be found in the tagged frame as seen by the DSA conduit. This behavior is automatic based on the ``overhead`` value of the tagging protocol. If not all packets are of equal size, the tagger can implement the ``flow_dissect`` method of the ``struct dsa_device_ops`` and override this default behavior by specifying the correct offset incurred by each individual RX packet. Tail taggers do not cause issues to the flow dissector. -Checksum offload should work with category 1 and 2 taggers when the DSA master +Checksum offload should work with category 1 and 2 taggers when the DSA conduit driver declares NETIF_F_HW_CSUM in vlan_features and looks at csum_start and csum_offset. For those cases, DSA will shift the checksum start and offset by -the tag size. If the DSA master driver still uses the legacy NETIF_F_IP_CSUM +the tag size. If the DSA conduit driver still uses the legacy NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM in vlan_features, the offload might only work if the offload hardware already expects that specific tag (perhaps due to matching -vendors). DSA slaves inherit those flags from the master port, and it is up to +vendors). DSA user ports inherit those flags from the conduit, and it is up to the driver to correctly fall back to software checksum when the IP header is not where the hardware expects. If that check is ineffective, the packets might go to the network without a proper checksum (the checksum field will have the pseudo IP header sum). For category 3, when the offload hardware does not already expect the switch tag in use, the checksum must be calculated before any -tag is inserted (i.e. inside the tagger). Otherwise, the DSA master would +tag is inserted (i.e. inside the tagger). Otherwise, the DSA conduit would include the tail tag in the (software or hardware) checksum calculation. Then, when the tag gets stripped by the switch during transmission, it will leave an incorrect IP checksum in place. Due to various reasons (most common being category 1 taggers being associated -with DSA-unaware masters, mangling what the master perceives as MAC DA), the -tagging protocol may require the DSA master to operate in promiscuous mode, to +with DSA-unaware conduits, mangling what the conduit perceives as MAC DA), the +tagging protocol may require the DSA conduit to operate in promiscuous mode, to receive all frames regardless of the value of the MAC DA. This can be done by -setting the ``promisc_on_master`` property of the ``struct dsa_device_ops``. -Note that this assumes a DSA-unaware master driver, which is the norm. +setting the ``promisc_on_conduit`` property of the ``struct dsa_device_ops``. +Note that this assumes a DSA-unaware conduit driver, which is the norm. -Master network devices ----------------------- +Conduit network devices +----------------------- -Master network devices are regular, unmodified Linux network device drivers for +Conduit network devices are regular, unmodified Linux network device drivers for the CPU/management Ethernet interface. Such a driver might occasionally need to know whether DSA is enabled (e.g.: to enable/disable specific offload features), but the DSA subsystem has been proven to work with industry standard drivers: @@ -232,14 +236,14 @@ Ethernet switch. Networking stack hooks ---------------------- -When a master netdev is used with DSA, a small hook is placed in the +When a conduit netdev is used with DSA, a small hook is placed in the networking stack is in order to have the DSA subsystem process the Ethernet switch specific tagging protocol. DSA accomplishes this by registering a specific (and fake) Ethernet type (later becoming ``skb->protocol``) with the networking stack, this is also known as a ``ptype`` or ``packet_type``. A typical Ethernet Frame receive sequence looks like this: -Master network device (e.g.: e1000e): +Conduit network device (e.g.: e1000e): 1. Receive interrupt fires: @@ -269,16 +273,16 @@ Master network device (e.g.: e1000e): - inspect and strip switch tag protocol to determine originating port - locate per-port network device - - invoke ``eth_type_trans()`` with the DSA slave network device + - invoke ``eth_type_trans()`` with the DSA user network device - invoked ``netif_receive_skb()`` -Past this point, the DSA slave network devices get delivered regular Ethernet +Past this point, the DSA user network devices get delivered regular Ethernet frames that can be processed by the networking stack. -Slave network devices ---------------------- +User network devices +-------------------- -Slave network devices created by DSA are stacked on top of their master network +User network devices created by DSA are stacked on top of their conduit network device, each of these network interfaces will be responsible for being a controlling and data-flowing end-point for each front-panel port of the switch. These interfaces are specialized in order to: @@ -289,31 +293,31 @@ These interfaces are specialized in order to: Wake-on-LAN, register dumps... - manage external/internal PHY: link, auto-negotiation, etc. -These slave network devices have custom net_device_ops and ethtool_ops function +These user network devices have custom net_device_ops and ethtool_ops function pointers which allow DSA to introduce a level of layering between the networking stack/ethtool and the switch driver implementation. -Upon frame transmission from these slave network devices, DSA will look up which +Upon frame transmission from these user network devices, DSA will look up which switch tagging protocol is currently registered with these network devices and invoke a specific transmit routine which takes care of adding the relevant switch tag in the Ethernet frames. -These frames are then queued for transmission using the master network device +These frames are then queued for transmission using the conduit network device ``ndo_start_xmit()`` function. Since they contain the appropriate switch tag, the Ethernet switch will be able to process these incoming frames from the management interface and deliver them to the physical switch port. When using multiple CPU ports, it is possible to stack a LAG (bonding/team) -device between the DSA slave devices and the physical DSA masters. The LAG -device is thus also a DSA master, but the LAG slave devices continue to be DSA -masters as well (just with no user port assigned to them; this is needed for -recovery in case the LAG DSA master disappears). Thus, the data path of the LAG -DSA master is used asymmetrically. On RX, the ``ETH_P_XDSA`` handler, which -calls ``dsa_switch_rcv()``, is invoked early (on the physical DSA master; -LAG slave). Therefore, the RX data path of the LAG DSA master is not used. -On the other hand, TX takes place linearly: ``dsa_slave_xmit`` calls -``dsa_enqueue_skb``, which calls ``dev_queue_xmit`` towards the LAG DSA master. -The latter calls ``dev_queue_xmit`` towards one physical DSA master or the +device between the DSA user devices and the physical DSA conduits. The LAG +device is thus also a DSA conduit, but the LAG slave devices continue to be DSA +conduits as well (just with no user port assigned to them; this is needed for +recovery in case the LAG DSA conduit disappears). Thus, the data path of the LAG +DSA conduit is used asymmetrically. On RX, the ``ETH_P_XDSA`` handler, which +calls ``dsa_switch_rcv()``, is invoked early (on the physical DSA conduit; +LAG slave). Therefore, the RX data path of the LAG DSA conduit is not used. +On the other hand, TX takes place linearly: ``dsa_user_xmit`` calls +``dsa_enqueue_skb``, which calls ``dev_queue_xmit`` towards the LAG DSA conduit. +The latter calls ``dev_queue_xmit`` towards one physical DSA conduit or the other, and in both cases, the packet exits the system through a hardware path towards the switch. @@ -352,11 +356,11 @@ perspective:: || swp0 | | swp1 | | swp2 | | swp3 || ++------+-+------+-+------+-+------++ -Slave MDIO bus --------------- +User MDIO bus +------------- -In order to be able to read to/from a switch PHY built into it, DSA creates a -slave MDIO bus which allows a specific switch driver to divert and intercept +In order to be able to read to/from a switch PHY built into it, DSA creates an +user MDIO bus which allows a specific switch driver to divert and intercept MDIO reads/writes towards specific PHY addresses. In most MDIO-connected switches, these functions would utilize direct or indirect PHY addressing mode to return standard MII registers from the switch builtin PHYs, allowing the PHY @@ -364,7 +368,7 @@ library and/or to return link status, link partner pages, auto-negotiation results, etc. For Ethernet switches which have both external and internal MDIO buses, the -slave MII bus can be utilized to mux/demux MDIO reads and writes towards either +user MII bus can be utilized to mux/demux MDIO reads and writes towards either internal or external MDIO devices this switch might be connected to: internal PHYs, external PHYs, or even external switches. @@ -381,10 +385,10 @@ DSA data structures are defined in ``include/net/dsa.h`` as well as - ``dsa_platform_data``: platform device configuration data which can reference a collection of dsa_chip_data structures if multiple switches are cascaded, - the master network device this switch tree is attached to needs to be + the conduit network device this switch tree is attached to needs to be referenced -- ``dsa_switch_tree``: structure assigned to the master network device under +- ``dsa_switch_tree``: structure assigned to the conduit network device under ``dsa_ptr``, this structure references a dsa_platform_data structure as well as the tagging protocol supported by the switch tree, and which receive/transmit function hooks should be invoked, information about the directly attached @@ -392,7 +396,7 @@ DSA data structures are defined in ``include/net/dsa.h`` as well as referenced to address individual switches in the tree. - ``dsa_switch``: structure describing a switch device in the tree, referencing - a ``dsa_switch_tree`` as a backpointer, slave network devices, master network + a ``dsa_switch_tree`` as a backpointer, user network devices, conduit network device, and a reference to the backing``dsa_switch_ops`` - ``dsa_switch_ops``: structure referencing function pointers, see below for a @@ -404,7 +408,7 @@ Design limitations Lack of CPU/DSA network devices ------------------------------- -DSA does not currently create slave network devices for the CPU or DSA ports, as +DSA does not currently create user network devices for the CPU or DSA ports, as described before. This might be an issue in the following cases: - inability to fetch switch CPU port statistics counters using ethtool, which @@ -419,7 +423,7 @@ described before. This might be an issue in the following cases: Common pitfalls using DSA setups -------------------------------- -Once a master network device is configured to use DSA (dev->dsa_ptr becomes +Once a conduit network device is configured to use DSA (dev->dsa_ptr becomes non-NULL), and the switch behind it expects a tagging protocol, this network interface can only exclusively be used as a conduit interface. Sending packets directly through this interface (e.g.: opening a socket using this interface) @@ -440,7 +444,7 @@ DSA currently leverages the following subsystems: MDIO/PHY library ---------------- -Slave network devices exposed by DSA may or may not be interfacing with PHY +User network devices exposed by DSA may or may not be interfacing with PHY devices (``struct phy_device`` as defined in ``include/linux/phy.h)``, but the DSA subsystem deals with all possible combinations: @@ -450,7 +454,7 @@ subsystem deals with all possible combinations: - special, non-autonegotiated or non MDIO-managed PHY devices: SFPs, MoCA; a.k.a fixed PHYs -The PHY configuration is done by the ``dsa_slave_phy_setup()`` function and the +The PHY configuration is done by the ``dsa_user_phy_setup()`` function and the logic basically looks like this: - if Device Tree is used, the PHY device is looked up using the standard @@ -463,7 +467,7 @@ logic basically looks like this: and connected transparently using the special fixed MDIO bus driver - finally, if the PHY is built into the switch, as is very common with - standalone switch packages, the PHY is probed using the slave MII bus created + standalone switch packages, the PHY is probed using the user MII bus created by DSA @@ -472,7 +476,7 @@ SWITCHDEV DSA directly utilizes SWITCHDEV when interfacing with the bridge layer, and more specifically with its VLAN filtering portion when configuring VLANs on top -of per-port slave network devices. As of today, the only SWITCHDEV objects +of per-port user network devices. As of today, the only SWITCHDEV objects supported by DSA are the FDB and VLAN objects. Devlink @@ -589,8 +593,8 @@ is torn down when the first switch unregisters. It is mandatory for DSA switch drivers to implement the ``shutdown()`` callback of their respective bus, and call ``dsa_switch_shutdown()`` from it (a minimal version of the full teardown performed by ``dsa_unregister_switch()``). -The reason is that DSA keeps a reference on the master net device, and if the -driver for the master device decides to unbind on shutdown, DSA's reference +The reason is that DSA keeps a reference on the conduit net device, and if the +driver for the conduit device decides to unbind on shutdown, DSA's reference will block that operation from finalizing. Either ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` must be called, @@ -615,7 +619,7 @@ Switch configuration tag formats. - ``change_tag_protocol``: when the default tagging protocol has compatibility - problems with the master or other issues, the driver may support changing it + problems with the conduit or other issues, the driver may support changing it at runtime, either through a device tree property or through sysfs. In that case, further calls to ``get_tag_protocol`` should report the protocol in current use. @@ -643,22 +647,22 @@ Switch configuration PHY cannot be found. In this case, probing of the DSA switch continues without that particular port. -- ``port_change_master``: method through which the affinity (association used +- ``port_change_conduit``: method through which the affinity (association used for traffic termination purposes) between a user port and a CPU port can be changed. By default all user ports from a tree are assigned to the first available CPU port that makes sense for them (most of the times this means the user ports of a tree are all assigned to the same CPU port, except for H topologies as described in commit 2c0b03258b8b). The ``port`` argument - represents the index of the user port, and the ``master`` argument represents - the new DSA master ``net_device``. The CPU port associated with the new - master can be retrieved by looking at ``struct dsa_port *cpu_dp = - master->dsa_ptr``. Additionally, the master can also be a LAG device where - all the slave devices are physical DSA masters. LAG DSA masters also have a - valid ``master->dsa_ptr`` pointer, however this is not unique, but rather a - duplicate of the first physical DSA master's (LAG slave) ``dsa_ptr``. In case - of a LAG DSA master, a further call to ``port_lag_join`` will be emitted + represents the index of the user port, and the ``conduit`` argument represents + the new DSA conduit ``net_device``. The CPU port associated with the new + conduit can be retrieved by looking at ``struct dsa_port *cpu_dp = + conduit->dsa_ptr``. Additionally, the conduit can also be a LAG device where + all the slave devices are physical DSA conduits. LAG DSA also have a + valid ``conduit->dsa_ptr`` pointer, however this is not unique, but rather a + duplicate of the first physical DSA conduit's (LAG slave) ``dsa_ptr``. In case + of a LAG DSA conduit, a further call to ``port_lag_join`` will be emitted separately for the physical CPU ports associated with the physical DSA - masters, requesting them to create a hardware LAG associated with the LAG + conduits, requesting them to create a hardware LAG associated with the LAG interface. PHY devices and link management @@ -670,16 +674,16 @@ PHY devices and link management should return a 32-bit bitmask of "flags" that is private between the switch driver and the Ethernet PHY driver in ``drivers/net/phy/\*``. -- ``phy_read``: Function invoked by the DSA slave MDIO bus when attempting to read +- ``phy_read``: Function invoked by the DSA user MDIO bus when attempting to read the switch port MDIO registers. If unavailable, return 0xffff for each read. For builtin switch Ethernet PHYs, this function should allow reading the link status, auto-negotiation results, link partner pages, etc. -- ``phy_write``: Function invoked by the DSA slave MDIO bus when attempting to write +- ``phy_write``: Function invoked by the DSA user MDIO bus when attempting to write to the switch port MDIO registers. If unavailable return a negative error code. -- ``adjust_link``: Function invoked by the PHY library when a slave network device +- ``adjust_link``: Function invoked by the PHY library when a user network device is attached to a PHY device. This function is responsible for appropriately configuring the switch port link parameters: speed, duplex, pause based on what the ``phy_device`` is providing. @@ -698,14 +702,14 @@ Ethtool operations typically return statistics strings, private flags strings, etc. - ``get_ethtool_stats``: ethtool function used to query per-port statistics and - return their values. DSA overlays slave network devices general statistics: + return their values. DSA overlays user network devices general statistics: RX/TX counters from the network device, with switch driver specific statistics per port - ``get_sset_count``: ethtool function used to query the number of statistics items - ``get_wol``: ethtool function used to obtain Wake-on-LAN settings per-port, this - function may for certain implementations also query the master network device + function may for certain implementations also query the conduit network device Wake-on-LAN settings if this interface needs to participate in Wake-on-LAN - ``set_wol``: ethtool function used to configure Wake-on-LAN settings per-port, @@ -747,13 +751,13 @@ Power management should resume all Ethernet switch activities and re-configure the switch to be in a fully active state -- ``port_enable``: function invoked by the DSA slave network device ndo_open +- ``port_enable``: function invoked by the DSA user network device ndo_open function when a port is administratively brought up, this function should fully enable a given switch port. DSA takes care of marking the port with ``BR_STATE_BLOCKING`` if the port is a bridge member, or ``BR_STATE_FORWARDING`` if it was not, and propagating these changes down to the hardware -- ``port_disable``: function invoked by the DSA slave network device ndo_close +- ``port_disable``: function invoked by the DSA user network device ndo_close function when a port is administratively brought down, this function should fully disable a given switch port. DSA takes care of marking the port with ``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is diff --git a/Documentation/networking/dsa/lan9303.rst b/Documentation/networking/dsa/lan9303.rst index e3c820db28ad..ab81b4e0139e 100644 --- a/Documentation/networking/dsa/lan9303.rst +++ b/Documentation/networking/dsa/lan9303.rst @@ -4,7 +4,7 @@ LAN9303 Ethernet switch driver The LAN9303 is a three port 10/100 Mbps ethernet switch with integrated phys for the two external ethernet ports. The third port is an RMII/MII interface to a -host master network interface (e.g. fixed link). +host conduit network interface (e.g. fixed link). Driver details diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst index e0219c1452ab..8ab60eef07d4 100644 --- a/Documentation/networking/dsa/sja1105.rst +++ b/Documentation/networking/dsa/sja1105.rst @@ -79,7 +79,7 @@ The hardware tags all traffic internally with a port-based VLAN (pvid), or it decodes the VLAN information from the 802.1Q tag. Advanced VLAN classification is not possible. Once attributed a VLAN tag, frames are checked against the port's membership rules and dropped at ingress if they don't match any VLAN. -This behavior is available when switch ports are enslaved to a bridge with +This behavior is available when switch ports join a bridge with ``vlan_filtering 1``. Normally the hardware is not configurable with respect to VLAN awareness, but @@ -122,7 +122,7 @@ on egress. Using ``vlan_filtering=1``, the behavior is the other way around: offloaded flows can be steered to TX queues based on the VLAN PCP, but the DSA net devices are no longer able to do that. To inject frames into a hardware TX queue with VLAN awareness active, it is necessary to create a VLAN -sub-interface on the DSA master port, and send normal (0x8100) VLAN-tagged +sub-interface on the DSA conduit port, and send normal (0x8100) VLAN-tagged towards the switch, with the VLAN PCP bits set appropriately. Management traffic (having DMAC 01-80-C2-xx-xx-xx or 01-19-1B-xx-xx-xx) is the @@ -389,7 +389,7 @@ MDIO bus and PHY management The SJA1105 does not have an MDIO bus and does not perform in-band AN either. Therefore there is no link state notification coming from the switch device. A board would need to hook up the PHYs connected to the switch to any other -MDIO bus available to Linux within the system (e.g. to the DSA master's MDIO +MDIO bus available to Linux within the system (e.g. to the DSA conduit's MDIO bus). Link state management then works by the driver manually keeping in sync (over SPI commands) the MAC link speed with the settings negotiated by the PHY. diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi index 5fc613d24151..49cbdb55b4b3 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi @@ -13,7 +13,7 @@ / { aliases { ethernet0 = ð0; - /* for dsa slave device */ + /* for DSA user port device */ ethernet1 = &switch0port1; ethernet2 = &switch0port2; ethernet3 = &switch0port3; diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 4e27dc913cf7..0d628b35fd5c 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -757,7 +757,7 @@ int b53_configure_vlan(struct dsa_switch *ds) /* Create an untagged VLAN entry for the default PVID in case * CONFIG_VLAN_8021Q is disabled and there are no calls to - * dsa_slave_vlan_rx_add_vid() to create the default VLAN + * dsa_user_vlan_rx_add_vid() to create the default VLAN * entry. Do this only when the tagging protocol is not * DSA_TAG_PROTO_NONE */ @@ -958,7 +958,7 @@ static struct phy_device *b53_get_phy_device(struct dsa_switch *ds, int port) return NULL; } - return mdiobus_get_phy(ds->slave_mii_bus, port); + return mdiobus_get_phy(ds->user_mii_bus, port); } void b53_get_strings(struct dsa_switch *ds, int port, u32 stringset, diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c index 4d55d8d18376..897e5e8b3d69 100644 --- a/drivers/net/dsa/b53/b53_mdio.c +++ b/drivers/net/dsa/b53/b53_mdio.c @@ -329,7 +329,7 @@ static int b53_mdio_probe(struct mdio_device *mdiodev) * layer setup */ if (of_machine_is_compatible("brcm,bcm7445d0") && - strcmp(mdiodev->bus->name, "sf2 slave mii")) + strcmp(mdiodev->bus->name, "sf2 user mii")) return -EPROBE_DEFER; dev = b53_switch_alloc(&mdiodev->dev, &b53_mdio_ops, mdiodev->bus); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 326937e91f52..cadee5505c29 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -623,19 +623,19 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) priv->master_mii_dn = dn; - priv->slave_mii_bus = mdiobus_alloc(); - if (!priv->slave_mii_bus) { + priv->user_mii_bus = mdiobus_alloc(); + if (!priv->user_mii_bus) { err = -ENOMEM; goto err_put_master_mii_bus_dev; } - priv->slave_mii_bus->priv = priv; - priv->slave_mii_bus->name = "sf2 slave mii"; - priv->slave_mii_bus->read = bcm_sf2_sw_mdio_read; - priv->slave_mii_bus->write = bcm_sf2_sw_mdio_write; - snprintf(priv->slave_mii_bus->id, MII_BUS_ID_SIZE, "sf2-%d", + priv->user_mii_bus->priv = priv; + priv->user_mii_bus->name = "sf2 user mii"; + priv->user_mii_bus->read = bcm_sf2_sw_mdio_read; + priv->user_mii_bus->write = bcm_sf2_sw_mdio_write; + snprintf(priv->user_mii_bus->id, MII_BUS_ID_SIZE, "sf2-%d", index++); - priv->slave_mii_bus->dev.of_node = dn; + priv->user_mii_bus->dev.of_node = dn; /* Include the pseudo-PHY address to divert reads towards our * workaround. This is only required for 7445D0, since 7445E0 @@ -653,9 +653,9 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) priv->indir_phy_mask = 0; ds->phys_mii_mask = priv->indir_phy_mask; - ds->slave_mii_bus = priv->slave_mii_bus; - priv->slave_mii_bus->parent = ds->dev->parent; - priv->slave_mii_bus->phy_mask = ~priv->indir_phy_mask; + ds->user_mii_bus = priv->user_mii_bus; + priv->user_mii_bus->parent = ds->dev->parent; + priv->user_mii_bus->phy_mask = ~priv->indir_phy_mask; /* We need to make sure that of_phy_connect() will not work by * removing the 'phandle' and 'linux,phandle' properties and @@ -682,14 +682,14 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) phy_device_remove(phydev); } - err = mdiobus_register(priv->slave_mii_bus); + err = mdiobus_register(priv->user_mii_bus); if (err && dn) - goto err_free_slave_mii_bus; + goto err_free_user_mii_bus; return 0; -err_free_slave_mii_bus: - mdiobus_free(priv->slave_mii_bus); +err_free_user_mii_bus: + mdiobus_free(priv->user_mii_bus); err_put_master_mii_bus_dev: put_device(&priv->master_mii_bus->dev); err_of_node_put: @@ -699,10 +699,9 @@ err_of_node_put: static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv) { - mdiobus_unregister(priv->slave_mii_bus); - mdiobus_free(priv->slave_mii_bus); + mdiobus_unregister(priv->user_mii_bus); + mdiobus_free(priv->user_mii_bus); put_device(&priv->master_mii_bus->dev); - of_node_put(priv->master_mii_dn); } static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port) @@ -915,7 +914,7 @@ static void bcm_sf2_sw_fixed_state(struct dsa_switch *ds, int port, * state machine and make it go in PHY_FORCING state instead. */ if (!status->link) - netif_carrier_off(dsa_to_port(ds, port)->slave); + netif_carrier_off(dsa_to_port(ds, port)->user); status->duplex = DUPLEX_FULL; } else { status->link = true; @@ -989,7 +988,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { - struct net_device *p = dsa_port_to_master(dsa_to_port(ds, port)); + struct net_device *p = dsa_port_to_conduit(dsa_to_port(ds, port)); struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_wolinfo pwol = { }; @@ -1013,7 +1012,7 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { - struct net_device *p = dsa_port_to_master(dsa_to_port(ds, port)); + struct net_device *p = dsa_port_to_conduit(dsa_to_port(ds, port)); struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index; struct ethtool_wolinfo pwol = { }; diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 00afc94ce522..424f896b5a6f 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -108,7 +108,7 @@ struct bcm_sf2_priv { /* Master and slave MDIO bus controller */ unsigned int indir_phy_mask; struct device_node *master_mii_dn; - struct mii_bus *slave_mii_bus; + struct mii_bus *user_mii_bus; struct mii_bus *master_mii_bus; /* Bitmask of ports needing BRCM tags */ diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index c4010b7bf089..c88ee3dd4299 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -1102,7 +1102,7 @@ static int bcm_sf2_cfp_rule_get_all(struct bcm_sf2_priv *priv, int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port, struct ethtool_rxnfc *nfc, u32 *rule_locs) { - struct net_device *p = dsa_port_to_master(dsa_to_port(ds, port)); + struct net_device *p = dsa_port_to_conduit(dsa_to_port(ds, port)); struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); int ret = 0; @@ -1145,7 +1145,7 @@ int bcm_sf2_get_rxnfc(struct dsa_switch *ds, int port, int bcm_sf2_set_rxnfc(struct dsa_switch *ds, int port, struct ethtool_rxnfc *nfc) { - struct net_device *p = dsa_port_to_master(dsa_to_port(ds, port)); + struct net_device *p = dsa_port_to_conduit(dsa_to_port(ds, port)); struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); int ret = 0; diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index ee67adeb2cdb..fcb20eac332a 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1084,7 +1084,7 @@ static int lan9303_port_enable(struct dsa_switch *ds, int port, if (!dsa_port_is_user(dp)) return 0; - vlan_vid_add(dsa_port_to_master(dp), htons(ETH_P_8021Q), port); + vlan_vid_add(dsa_port_to_conduit(dp), htons(ETH_P_8021Q), port); return lan9303_enable_processing_port(chip, port); } @@ -1097,7 +1097,7 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port) if (!dsa_port_is_user(dp)) return; - vlan_vid_del(dsa_port_to_master(dp), htons(ETH_P_8021Q), port); + vlan_vid_del(dsa_port_to_conduit(dp), htons(ETH_P_8021Q), port); lan9303_disable_processing_port(chip, port); lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN); diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 1a2d5797bf98..9c185c9f0963 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -510,22 +510,22 @@ static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np) struct dsa_switch *ds = priv->ds; int err; - ds->slave_mii_bus = mdiobus_alloc(); - if (!ds->slave_mii_bus) + ds->user_mii_bus = mdiobus_alloc(); + if (!ds->user_mii_bus) return -ENOMEM; - ds->slave_mii_bus->priv = priv; - ds->slave_mii_bus->read = gswip_mdio_rd; - ds->slave_mii_bus->write = gswip_mdio_wr; - ds->slave_mii_bus->name = "lantiq,xrx200-mdio"; - snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s-mii", + ds->user_mii_bus->priv = priv; + ds->user_mii_bus->read = gswip_mdio_rd; + ds->user_mii_bus->write = gswip_mdio_wr; + ds->user_mii_bus->name = "lantiq,xrx200-mdio"; + snprintf(ds->user_mii_bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(priv->dev)); - ds->slave_mii_bus->parent = priv->dev; - ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; + ds->user_mii_bus->parent = priv->dev; + ds->user_mii_bus->phy_mask = ~ds->phys_mii_mask; - err = of_mdiobus_register(ds->slave_mii_bus, mdio_np); + err = of_mdiobus_register(ds->user_mii_bus, mdio_np); if (err) - mdiobus_free(ds->slave_mii_bus); + mdiobus_free(ds->user_mii_bus); return err; } @@ -2196,8 +2196,8 @@ disable_switch: dsa_unregister_switch(priv->ds); mdio_bus: if (mdio_np) { - mdiobus_unregister(priv->ds->slave_mii_bus); - mdiobus_free(priv->ds->slave_mii_bus); + mdiobus_unregister(priv->ds->user_mii_bus); + mdiobus_free(priv->ds->user_mii_bus); } put_mdio_node: of_node_put(mdio_np); @@ -2219,10 +2219,10 @@ static void gswip_remove(struct platform_device *pdev) dsa_unregister_switch(priv->ds); - if (priv->ds->slave_mii_bus) { - mdiobus_unregister(priv->ds->slave_mii_bus); - of_node_put(priv->ds->slave_mii_bus->dev.of_node); - mdiobus_free(priv->ds->slave_mii_bus); + if (priv->ds->user_mii_bus) { + mdiobus_unregister(priv->ds->user_mii_bus); + of_node_put(priv->ds->user_mii_bus->dev.of_node); + mdiobus_free(priv->ds->user_mii_bus); } for (i = 0; i < priv->num_gphy_fw; i++) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index cde8ef33d029..a8b0e528b804 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1170,7 +1170,7 @@ int ksz9477_tc_cbs_set_cinc(struct ksz_device *dev, int port, u32 val) void ksz9477_hsr_join(struct dsa_switch *ds, int port, struct net_device *hsr) { struct ksz_device *dev = ds->priv; - struct net_device *slave; + struct net_device *user; struct dsa_port *hsr_dp; u8 data, hsr_ports = 0; @@ -1202,8 +1202,8 @@ void ksz9477_hsr_join(struct dsa_switch *ds, int port, struct net_device *hsr) ksz_port_cfg(dev, port, REG_PORT_LUE_CTRL, PORT_SRC_ADDR_FILTER, true); /* Setup HW supported features for lan HSR ports */ - slave = dsa_to_port(ds, port)->slave; - slave->features |= KSZ9477_SUPPORTED_HSR_FEATURES; + user = dsa_to_port(ds, port)->user; + user->features |= KSZ9477_SUPPORTED_HSR_FEATURES; } void ksz9477_hsr_leave(struct dsa_switch *ds, int port, struct net_device *hsr) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index b800ace40ce1..a8025ff988ec 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1945,14 +1945,14 @@ static int ksz_irq_phy_setup(struct ksz_device *dev) ret = irq; goto out; } - ds->slave_mii_bus->irq[phy] = irq; + ds->user_mii_bus->irq[phy] = irq; } } return 0; out: while (phy--) if (BIT(phy) & ds->phys_mii_mask) - irq_dispose_mapping(ds->slave_mii_bus->irq[phy]); + irq_dispose_mapping(ds->user_mii_bus->irq[phy]); return ret; } @@ -1964,7 +1964,7 @@ static void ksz_irq_phy_free(struct ksz_device *dev) for (phy = 0; phy < KSZ_MAX_NUM_PORTS; phy++) if (BIT(phy) & ds->phys_mii_mask) - irq_dispose_mapping(ds->slave_mii_bus->irq[phy]); + irq_dispose_mapping(ds->user_mii_bus->irq[phy]); } static int ksz_mdio_register(struct ksz_device *dev) @@ -1987,12 +1987,12 @@ static int ksz_mdio_register(struct ksz_device *dev) bus->priv = dev; bus->read = ksz_sw_mdio_read; bus->write = ksz_sw_mdio_write; - bus->name = "ksz slave smi"; + bus->name = "ksz user smi"; snprintf(bus->id, MII_BUS_ID_SIZE, "SMI-%d", ds->index); bus->parent = ds->dev; bus->phy_mask = ~ds->phys_mii_mask; - ds->slave_mii_bus = bus; + ds->user_mii_bus = bus; if (dev->irq > 0) { ret = ksz_irq_phy_setup(dev); @@ -2344,7 +2344,7 @@ static void ksz_mib_read_work(struct work_struct *work) if (!p->read) { const struct dsa_port *dp = dsa_to_port(dev->ds, i); - if (!netif_carrier_ok(dp->slave)) + if (!netif_carrier_ok(dp->user)) mib->cnt_ptr = dev->info->reg_mib_cnt; } port_r_cnt(dev, i); @@ -2464,7 +2464,7 @@ static void ksz_get_ethtool_stats(struct dsa_switch *ds, int port, mutex_lock(&mib->cnt_mutex); /* Only read dropped counters if no link. */ - if (!netif_carrier_ok(dp->slave)) + if (!netif_carrier_ok(dp->user)) mib->cnt_ptr = dev->info->reg_mib_cnt; port_r_cnt(dev, port); memcpy(buf, mib->counters, dev->info->mib_cnt * sizeof(u64)); @@ -2574,7 +2574,7 @@ static int ksz_port_setup(struct dsa_switch *ds, int port) if (!dsa_is_user_port(ds, port)) return 0; - /* setup slave port */ + /* setup user port */ dev->dev_ops->port_setup(dev, port, false); /* port_stp_state_set() will be called after to enable the port so @@ -3567,8 +3567,8 @@ static int ksz_port_set_mac_address(struct dsa_switch *ds, int port, static int ksz_switch_macaddr_get(struct dsa_switch *ds, int port, struct netlink_ext_ack *extack) { - struct net_device *slave = dsa_to_port(ds, port)->slave; - const unsigned char *addr = slave->dev_addr; + struct net_device *user = dsa_to_port(ds, port)->user; + const unsigned char *addr = user->dev_addr; struct ksz_switch_macaddr *switch_macaddr; struct ksz_device *dev = ds->priv; const u16 *regs = dev->info->regs; diff --git a/drivers/net/dsa/microchip/ksz_ptp.c b/drivers/net/dsa/microchip/ksz_ptp.c index 4e22a695a64c..1fe105913c75 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.c +++ b/drivers/net/dsa/microchip/ksz_ptp.c @@ -557,7 +557,7 @@ static void ksz_ptp_txtstamp_skb(struct ksz_device *dev, struct skb_shared_hwtstamps hwtstamps = {}; int ret; - /* timeout must include DSA master to transmit data, tstamp latency, + /* timeout must include DSA conduit to transmit data, tstamp latency, * IRQ latency and time for reading the time stamp. */ ret = wait_for_completion_timeout(&prt->tstamp_msg_comp, diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index ecf5d3deb36e..d27c6b70a2f6 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1113,7 +1113,7 @@ mt7530_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu) u32 val; /* When a new MTU is set, DSA always set the CPU port's MTU to the - * largest MTU of the slave ports. Because the switch only has a global + * largest MTU of the user ports. Because the switch only has a global * RX length register, only allowing CPU port here is enough. */ if (!dsa_is_cpu_port(ds, port)) @@ -2069,7 +2069,7 @@ mt7530_setup_mdio_irq(struct mt7530_priv *priv) unsigned int irq; irq = irq_create_mapping(priv->irq_domain, p); - ds->slave_mii_bus->irq[p] = irq; + ds->user_mii_bus->irq[p] = irq; } } } @@ -2163,7 +2163,7 @@ mt7530_setup_mdio(struct mt7530_priv *priv) if (!bus) return -ENOMEM; - ds->slave_mii_bus = bus; + ds->user_mii_bus = bus; bus->priv = priv; bus->name = KBUILD_MODNAME "-mii"; snprintf(bus->id, MII_BUS_ID_SIZE, KBUILD_MODNAME "-%d", idx++); @@ -2200,20 +2200,20 @@ mt7530_setup(struct dsa_switch *ds) u32 id, val; int ret, i; - /* The parent node of master netdev which holds the common system + /* The parent node of conduit netdev which holds the common system * controller also is the container for two GMACs nodes representing * as two netdev instances. */ dsa_switch_for_each_cpu_port(cpu_dp, ds) { - dn = cpu_dp->master->dev.of_node->parent; + dn = cpu_dp->conduit->dev.of_node->parent; /* It doesn't matter which CPU port is found first, - * their masters should share the same parent OF node + * their conduits should share the same parent OF node */ break; } if (!dn) { - dev_err(ds->dev, "parent OF node of DSA master not found"); + dev_err(ds->dev, "parent OF node of DSA conduit not found"); return -EINVAL; } @@ -2488,7 +2488,7 @@ mt7531_setup(struct dsa_switch *ds) if (mt7531_dual_sgmii_supported(priv)) { priv->p5_intf_sel = P5_INTF_SEL_GMAC5_SGMII; - /* Let ds->slave_mii_bus be able to access external phy. */ + /* Let ds->user_mii_bus be able to access external phy. */ mt7530_rmw(priv, MT7531_GPIO_MODE1, MT7531_GPIO11_RG_RXD2_MASK, MT7531_EXT_P_MDC_11); mt7530_rmw(priv, MT7531_GPIO_MODE1, MT7531_GPIO12_RG_RXD3_MASK, @@ -2717,7 +2717,7 @@ mt7531_mac_config(struct dsa_switch *ds, int port, unsigned int mode, case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: dp = dsa_to_port(ds, port); - phydev = dp->slave->phydev; + phydev = dp->user->phydev; return mt7531_rgmii_setup(priv, port, interface, phydev); case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_NA: diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index ab434a77b059..42b1acaca33a 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2486,7 +2486,7 @@ static int mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, else member = MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_TAGGED; - /* net/dsa/slave.c will call dsa_port_vlan_add() for the affected port + /* net/dsa/user.c will call dsa_port_vlan_add() for the affected port * and then the CPU port. Do not warn for duplicates for the CPU port. */ warn = !dsa_is_cpu_port(ds, port) && !dsa_is_dsa_port(ds, port); @@ -3719,7 +3719,7 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) return err; chip->ds = ds; - ds->slave_mii_bus = mv88e6xxx_default_mdio_bus(chip); + ds->user_mii_bus = mv88e6xxx_default_mdio_bus(chip); /* Since virtual bridges are mapped in the PVT, the number we support * depends on the physical switch topology. We need to let DSA figure diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 9a3e5ec16972..61e95487732d 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -42,22 +42,22 @@ static struct net_device *felix_classify_db(struct dsa_db db) } } -static int felix_cpu_port_for_master(struct dsa_switch *ds, - struct net_device *master) +static int felix_cpu_port_for_conduit(struct dsa_switch *ds, + struct net_device *conduit) { struct ocelot *ocelot = ds->priv; struct dsa_port *cpu_dp; int lag; - if (netif_is_lag_master(master)) { + if (netif_is_lag_master(conduit)) { mutex_lock(&ocelot->fwd_domain_lock); - lag = ocelot_bond_get_id(ocelot, master); + lag = ocelot_bond_get_id(ocelot, conduit); mutex_unlock(&ocelot->fwd_domain_lock); return lag; } - cpu_dp = master->dsa_ptr; + cpu_dp = conduit->dsa_ptr; return cpu_dp->index; } @@ -366,7 +366,7 @@ static int felix_update_trapping_destinations(struct dsa_switch *ds, * is the mode through which frames can be injected from and extracted to an * external CPU, over Ethernet. In NXP SoCs, the "external CPU" is the ARM CPU * running Linux, and this forms a DSA setup together with the enetc or fman - * DSA master. + * DSA conduit. */ static void felix_npi_port_init(struct ocelot *ocelot, int port) { @@ -441,16 +441,16 @@ static unsigned long felix_tag_npi_get_host_fwd_mask(struct dsa_switch *ds) return BIT(ocelot->num_phys_ports); } -static int felix_tag_npi_change_master(struct dsa_switch *ds, int port, - struct net_device *master, - struct netlink_ext_ack *extack) +static int felix_tag_npi_change_conduit(struct dsa_switch *ds, int port, + struct net_device *conduit, + struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_to_port(ds, port), *other_dp; struct ocelot *ocelot = ds->priv; - if (netif_is_lag_master(master)) { + if (netif_is_lag_master(conduit)) { NL_SET_ERR_MSG_MOD(extack, - "LAG DSA master only supported using ocelot-8021q"); + "LAG DSA conduit only supported using ocelot-8021q"); return -EOPNOTSUPP; } @@ -459,24 +459,24 @@ static int felix_tag_npi_change_master(struct dsa_switch *ds, int port, * come back up until they're all changed to the new one. */ dsa_switch_for_each_user_port(other_dp, ds) { - struct net_device *slave = other_dp->slave; + struct net_device *user = other_dp->user; - if (other_dp != dp && (slave->flags & IFF_UP) && - dsa_port_to_master(other_dp) != master) { + if (other_dp != dp && (user->flags & IFF_UP) && + dsa_port_to_conduit(other_dp) != conduit) { NL_SET_ERR_MSG_MOD(extack, - "Cannot change while old master still has users"); + "Cannot change while old conduit still has users"); return -EOPNOTSUPP; } } felix_npi_port_deinit(ocelot, ocelot->npi); - felix_npi_port_init(ocelot, felix_cpu_port_for_master(ds, master)); + felix_npi_port_init(ocelot, felix_cpu_port_for_conduit(ds, conduit)); return 0; } /* Alternatively to using the NPI functionality, that same hardware MAC - * connected internally to the enetc or fman DSA master can be configured to + * connected internally to the enetc or fman DSA conduit can be configured to * use the software-defined tag_8021q frame format. As far as the hardware is * concerned, it thinks it is a "dumb switch" - the queues of the CPU port * module are now disconnected from it, but can still be accessed through @@ -486,7 +486,7 @@ static const struct felix_tag_proto_ops felix_tag_npi_proto_ops = { .setup = felix_tag_npi_setup, .teardown = felix_tag_npi_teardown, .get_host_fwd_mask = felix_tag_npi_get_host_fwd_mask, - .change_master = felix_tag_npi_change_master, + .change_conduit = felix_tag_npi_change_conduit, }; static int felix_tag_8021q_setup(struct dsa_switch *ds) @@ -561,11 +561,11 @@ static unsigned long felix_tag_8021q_get_host_fwd_mask(struct dsa_switch *ds) return dsa_cpu_ports(ds); } -static int felix_tag_8021q_change_master(struct dsa_switch *ds, int port, - struct net_device *master, - struct netlink_ext_ack *extack) +static int felix_tag_8021q_change_conduit(struct dsa_switch *ds, int port, + struct net_device *conduit, + struct netlink_ext_ack *extack) { - int cpu = felix_cpu_port_for_master(ds, master); + int cpu = felix_cpu_port_for_conduit(ds, conduit); struct ocelot *ocelot = ds->priv; ocelot_port_unassign_dsa_8021q_cpu(ocelot, port); @@ -578,7 +578,7 @@ static const struct felix_tag_proto_ops felix_tag_8021q_proto_ops = { .setup = felix_tag_8021q_setup, .teardown = felix_tag_8021q_teardown, .get_host_fwd_mask = felix_tag_8021q_get_host_fwd_mask, - .change_master = felix_tag_8021q_change_master, + .change_conduit = felix_tag_8021q_change_conduit, }; static void felix_set_host_flood(struct dsa_switch *ds, unsigned long mask, @@ -741,14 +741,14 @@ static void felix_port_set_host_flood(struct dsa_switch *ds, int port, !!felix->host_flood_mc_mask, true); } -static int felix_port_change_master(struct dsa_switch *ds, int port, - struct net_device *master, - struct netlink_ext_ack *extack) +static int felix_port_change_conduit(struct dsa_switch *ds, int port, + struct net_device *conduit, + struct netlink_ext_ack *extack) { struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); - return felix->tag_proto_ops->change_master(ds, port, master, extack); + return felix->tag_proto_ops->change_conduit(ds, port, conduit, extack); } static int felix_set_ageing_time(struct dsa_switch *ds, @@ -953,7 +953,7 @@ static int felix_lag_join(struct dsa_switch *ds, int port, if (!dsa_is_cpu_port(ds, port)) return 0; - return felix_port_change_master(ds, port, lag.dev, extack); + return felix_port_change_conduit(ds, port, lag.dev, extack); } static int felix_lag_leave(struct dsa_switch *ds, int port, @@ -967,7 +967,7 @@ static int felix_lag_leave(struct dsa_switch *ds, int port, if (!dsa_is_cpu_port(ds, port)) return 0; - return felix_port_change_master(ds, port, lag.dev, NULL); + return felix_port_change_conduit(ds, port, lag.dev, NULL); } static int felix_lag_change(struct dsa_switch *ds, int port) @@ -1116,10 +1116,10 @@ static int felix_port_enable(struct dsa_switch *ds, int port, return 0; if (ocelot->npi >= 0) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); - if (felix_cpu_port_for_master(ds, master) != ocelot->npi) { - dev_err(ds->dev, "Multiple masters are not allowed\n"); + if (felix_cpu_port_for_conduit(ds, conduit) != ocelot->npi) { + dev_err(ds->dev, "Multiple conduits are not allowed\n"); return -EINVAL; } } @@ -2164,7 +2164,7 @@ const struct dsa_switch_ops felix_switch_ops = { .port_add_dscp_prio = felix_port_add_dscp_prio, .port_del_dscp_prio = felix_port_del_dscp_prio, .port_set_host_flood = felix_port_set_host_flood, - .port_change_master = felix_port_change_master, + .port_change_conduit = felix_port_change_conduit, }; EXPORT_SYMBOL_GPL(felix_switch_ops); @@ -2176,7 +2176,7 @@ struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port) if (!dsa_is_user_port(ds, port)) return NULL; - return dsa_to_port(ds, port)->slave; + return dsa_to_port(ds, port)->user; } EXPORT_SYMBOL_GPL(felix_port_to_netdev); diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 1d4befe7cfe8..dbf5872fe367 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -77,9 +77,9 @@ struct felix_tag_proto_ops { int (*setup)(struct dsa_switch *ds); void (*teardown)(struct dsa_switch *ds); unsigned long (*get_host_fwd_mask)(struct dsa_switch *ds); - int (*change_master)(struct dsa_switch *ds, int port, - struct net_device *master, - struct netlink_ext_ack *extack); + int (*change_conduit)(struct dsa_switch *ds, int port, + struct net_device *conduit, + struct netlink_ext_ack *extack); }; extern const struct dsa_switch_ops felix_switch_ops; diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index 4ce68e655a63..ec57d9d52072 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -323,14 +323,14 @@ static int qca8k_read_eth(struct qca8k_priv *priv, u32 reg, u32 *val, int len) mutex_lock(&mgmt_eth_data->mutex); - /* Check mgmt_master if is operational */ - if (!priv->mgmt_master) { + /* Check if the mgmt_conduit if is operational */ + if (!priv->mgmt_conduit) { kfree_skb(skb); mutex_unlock(&mgmt_eth_data->mutex); return -EINVAL; } - skb->dev = priv->mgmt_master; + skb->dev = priv->mgmt_conduit; reinit_completion(&mgmt_eth_data->rw_done); @@ -375,14 +375,14 @@ static int qca8k_write_eth(struct qca8k_priv *priv, u32 reg, u32 *val, int len) mutex_lock(&mgmt_eth_data->mutex); - /* Check mgmt_master if is operational */ - if (!priv->mgmt_master) { + /* Check if the mgmt_conduit if is operational */ + if (!priv->mgmt_conduit) { kfree_skb(skb); mutex_unlock(&mgmt_eth_data->mutex); return -EINVAL; } - skb->dev = priv->mgmt_master; + skb->dev = priv->mgmt_conduit; reinit_completion(&mgmt_eth_data->rw_done); @@ -508,7 +508,7 @@ qca8k_bulk_read(void *ctx, const void *reg_buf, size_t reg_len, struct qca8k_priv *priv = ctx; u32 reg = *(u16 *)reg_buf; - if (priv->mgmt_master && + if (priv->mgmt_conduit && !qca8k_read_eth(priv, reg, val_buf, val_len)) return 0; @@ -531,7 +531,7 @@ qca8k_bulk_gather_write(void *ctx, const void *reg_buf, size_t reg_len, u32 reg = *(u16 *)reg_buf; u32 *val = (u32 *)val_buf; - if (priv->mgmt_master && + if (priv->mgmt_conduit && !qca8k_write_eth(priv, reg, val, val_len)) return 0; @@ -626,7 +626,7 @@ qca8k_phy_eth_command(struct qca8k_priv *priv, bool read, int phy, struct sk_buff *write_skb, *clear_skb, *read_skb; struct qca8k_mgmt_eth_data *mgmt_eth_data; u32 write_val, clear_val = 0, val; - struct net_device *mgmt_master; + struct net_device *mgmt_conduit; int ret, ret1; bool ack; @@ -683,18 +683,18 @@ qca8k_phy_eth_command(struct qca8k_priv *priv, bool read, int phy, */ mutex_lock(&mgmt_eth_data->mutex); - /* Check if mgmt_master is operational */ - mgmt_master = priv->mgmt_master; - if (!mgmt_master) { + /* Check if mgmt_conduit is operational */ + mgmt_conduit = priv->mgmt_conduit; + if (!mgmt_conduit) { mutex_unlock(&mgmt_eth_data->mutex); mutex_unlock(&priv->bus->mdio_lock); ret = -EINVAL; - goto err_mgmt_master; + goto err_mgmt_conduit; } - read_skb->dev = mgmt_master; - clear_skb->dev = mgmt_master; - write_skb->dev = mgmt_master; + read_skb->dev = mgmt_conduit; + clear_skb->dev = mgmt_conduit; + write_skb->dev = mgmt_conduit; reinit_completion(&mgmt_eth_data->rw_done); @@ -780,7 +780,7 @@ exit: return ret; /* Error handling before lock */ -err_mgmt_master: +err_mgmt_conduit: kfree_skb(read_skb); err_read_skb: kfree_skb(clear_skb); @@ -959,12 +959,12 @@ qca8k_mdio_register(struct qca8k_priv *priv) ds->dst->index, ds->index); bus->parent = ds->dev; bus->phy_mask = ~ds->phys_mii_mask; - ds->slave_mii_bus = bus; + ds->user_mii_bus = bus; /* Check if the devicetree declare the port:phy mapping */ mdio = of_get_child_by_name(priv->dev->of_node, "mdio"); if (of_device_is_available(mdio)) { - bus->name = "qca8k slave mii"; + bus->name = "qca8k user mii"; bus->read = qca8k_internal_mdio_read; bus->write = qca8k_internal_mdio_write; return devm_of_mdiobus_register(priv->dev, bus, mdio); @@ -973,7 +973,7 @@ qca8k_mdio_register(struct qca8k_priv *priv) /* If a mapping can't be found the legacy mapping is used, * using the qca8k_port_to_phy function */ - bus->name = "qca8k-legacy slave mii"; + bus->name = "qca8k-legacy user mii"; bus->read = qca8k_legacy_mdio_read; bus->write = qca8k_legacy_mdio_write; return devm_mdiobus_register(priv->dev, bus); @@ -1728,10 +1728,10 @@ qca8k_get_tag_protocol(struct dsa_switch *ds, int port, } static void -qca8k_master_change(struct dsa_switch *ds, const struct net_device *master, - bool operational) +qca8k_conduit_change(struct dsa_switch *ds, const struct net_device *conduit, + bool operational) { - struct dsa_port *dp = master->dsa_ptr; + struct dsa_port *dp = conduit->dsa_ptr; struct qca8k_priv *priv = ds->priv; /* Ethernet MIB/MDIO is only supported for CPU port 0 */ @@ -1741,7 +1741,7 @@ qca8k_master_change(struct dsa_switch *ds, const struct net_device *master, mutex_lock(&priv->mgmt_eth_data.mutex); mutex_lock(&priv->mib_eth_data.mutex); - priv->mgmt_master = operational ? (struct net_device *)master : NULL; + priv->mgmt_conduit = operational ? (struct net_device *)conduit : NULL; mutex_unlock(&priv->mib_eth_data.mutex); mutex_unlock(&priv->mgmt_eth_data.mutex); @@ -2016,7 +2016,7 @@ static const struct dsa_switch_ops qca8k_switch_ops = { .get_phy_flags = qca8k_get_phy_flags, .port_lag_join = qca8k_port_lag_join, .port_lag_leave = qca8k_port_lag_leave, - .master_state_change = qca8k_master_change, + .conduit_state_change = qca8k_conduit_change, .connect_tag_protocol = qca8k_connect_tag_protocol, }; diff --git a/drivers/net/dsa/qca/qca8k-common.c b/drivers/net/dsa/qca/qca8k-common.c index 9ff0a3c1cb91..9243eff8918d 100644 --- a/drivers/net/dsa/qca/qca8k-common.c +++ b/drivers/net/dsa/qca/qca8k-common.c @@ -499,7 +499,7 @@ void qca8k_get_ethtool_stats(struct dsa_switch *ds, int port, u32 hi = 0; int ret; - if (priv->mgmt_master && priv->info->ops->autocast_mib && + if (priv->mgmt_conduit && priv->info->ops->autocast_mib && priv->info->ops->autocast_mib(ds, port, data) > 0) return; @@ -761,7 +761,7 @@ int qca8k_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu) int ret; /* We have only have a general MTU setting. - * DSA always set the CPU port's MTU to the largest MTU of the slave + * DSA always set the CPU port's MTU to the largest MTU of the user * ports. * Setting MTU just for the CPU port is sufficient to correctly set a * value for every port. diff --git a/drivers/net/dsa/qca/qca8k-leds.c b/drivers/net/dsa/qca/qca8k-leds.c index e8c16e76e34b..90e30c2909e4 100644 --- a/drivers/net/dsa/qca/qca8k-leds.c +++ b/drivers/net/dsa/qca/qca8k-leds.c @@ -356,8 +356,8 @@ static struct device *qca8k_cled_hw_control_get_device(struct led_classdev *ldev dp = dsa_to_port(priv->ds, qca8k_phy_to_port(led->port_num)); if (!dp) return NULL; - if (dp->slave) - return &dp->slave->dev; + if (dp->user) + return &dp->user->dev; return NULL; } @@ -429,7 +429,7 @@ qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int p init_data.default_label = ":port"; init_data.fwnode = led; init_data.devname_mandatory = true; - init_data.devicename = kasprintf(GFP_KERNEL, "%s:0%d", ds->slave_mii_bus->id, + init_data.devicename = kasprintf(GFP_KERNEL, "%s:0%d", ds->user_mii_bus->id, port_num); if (!init_data.devicename) return -ENOMEM; diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h index 8f88b7db384d..2ac7e88f8da5 100644 --- a/drivers/net/dsa/qca/qca8k.h +++ b/drivers/net/dsa/qca/qca8k.h @@ -458,7 +458,7 @@ struct qca8k_priv { struct mutex reg_mutex; struct device *dev; struct gpio_desc *reset_gpio; - struct net_device *mgmt_master; /* Track if mdio/mib Ethernet is available */ + struct net_device *mgmt_conduit; /* Track if mdio/mib Ethernet is available */ struct qca8k_mgmt_eth_data mgmt_eth_data; struct qca8k_mib_eth_data mib_eth_data; struct qca8k_mdio_cache mdio_cache; diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c index bfd11591faf4..755546ed8db6 100644 --- a/drivers/net/dsa/realtek/realtek-smi.c +++ b/drivers/net/dsa/realtek/realtek-smi.c @@ -378,25 +378,25 @@ static int realtek_smi_setup_mdio(struct dsa_switch *ds) return -ENODEV; } - priv->slave_mii_bus = devm_mdiobus_alloc(priv->dev); - if (!priv->slave_mii_bus) { + priv->user_mii_bus = devm_mdiobus_alloc(priv->dev); + if (!priv->user_mii_bus) { ret = -ENOMEM; goto err_put_node; } - priv->slave_mii_bus->priv = priv; - priv->slave_mii_bus->name = "SMI slave MII"; - priv->slave_mii_bus->read = realtek_smi_mdio_read; - priv->slave_mii_bus->write = realtek_smi_mdio_write; - snprintf(priv->slave_mii_bus->id, MII_BUS_ID_SIZE, "SMI-%d", + priv->user_mii_bus->priv = priv; + priv->user_mii_bus->name = "SMI user MII"; + priv->user_mii_bus->read = realtek_smi_mdio_read; + priv->user_mii_bus->write = realtek_smi_mdio_write; + snprintf(priv->user_mii_bus->id, MII_BUS_ID_SIZE, "SMI-%d", ds->index); - priv->slave_mii_bus->dev.of_node = mdio_np; - priv->slave_mii_bus->parent = priv->dev; - ds->slave_mii_bus = priv->slave_mii_bus; + priv->user_mii_bus->dev.of_node = mdio_np; + priv->user_mii_bus->parent = priv->dev; + ds->user_mii_bus = priv->user_mii_bus; - ret = devm_of_mdiobus_register(priv->dev, priv->slave_mii_bus, mdio_np); + ret = devm_of_mdiobus_register(priv->dev, priv->user_mii_bus, mdio_np); if (ret) { dev_err(priv->dev, "unable to register MDIO bus %s\n", - priv->slave_mii_bus->id); + priv->user_mii_bus->id); goto err_put_node; } @@ -514,8 +514,8 @@ static void realtek_smi_remove(struct platform_device *pdev) return; dsa_unregister_switch(priv->ds); - if (priv->slave_mii_bus) - of_node_put(priv->slave_mii_bus->dev.of_node); + if (priv->user_mii_bus) + of_node_put(priv->user_mii_bus->dev.of_node); /* leave the device reset asserted */ if (priv->reset) diff --git a/drivers/net/dsa/realtek/realtek.h b/drivers/net/dsa/realtek/realtek.h index 4fa7c6ba874a..790488e9c667 100644 --- a/drivers/net/dsa/realtek/realtek.h +++ b/drivers/net/dsa/realtek/realtek.h @@ -54,7 +54,7 @@ struct realtek_priv { struct regmap *map; struct regmap *map_nolock; struct mutex map_lock; - struct mii_bus *slave_mii_bus; + struct mii_bus *user_mii_bus; struct mii_bus *bus; int mdio_addr; diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c index d171c18dd354..0875e4fc9f57 100644 --- a/drivers/net/dsa/realtek/rtl8365mb.c +++ b/drivers/net/dsa/realtek/rtl8365mb.c @@ -1144,7 +1144,7 @@ static int rtl8365mb_port_change_mtu(struct dsa_switch *ds, int port, int frame_size; /* When a new MTU is set, DSA always sets the CPU port's MTU to the - * largest MTU of the slave ports. Because the switch only has a global + * largest MTU of the user ports. Because the switch only has a global * RX length register, only allowing CPU port here is enough. */ if (!dsa_is_cpu_port(ds, port)) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 1a367e64bc3b..74cee39d73df 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2688,7 +2688,7 @@ static int sja1105_mgmt_xmit(struct dsa_switch *ds, int port, int slot, } /* Transfer skb to the host port. */ - dsa_enqueue_skb(skb, dsa_to_port(ds, port)->slave); + dsa_enqueue_skb(skb, dsa_to_port(ds, port)->user); /* Wait until the switch has processed the frame */ do { @@ -3081,7 +3081,7 @@ static int sja1105_port_bridge_flags(struct dsa_switch *ds, int port, * ref_clk pin. So port clocking needs to be initialized early, before * connecting to PHYs is attempted, otherwise they won't respond through MDIO. * Setting correct PHY link speed does not matter now. - * But dsa_slave_phy_setup is called later than sja1105_setup, so the PHY + * But dsa_user_phy_setup is called later than sja1105_setup, so the PHY * bindings are not yet parsed by DSA core. We need to parse early so that we * can populate the xMII mode parameters table. */ diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c index 5b02e9e426fd..96db032b478f 100644 --- a/drivers/net/dsa/xrs700x/xrs700x.c +++ b/drivers/net/dsa/xrs700x/xrs700x.c @@ -554,7 +554,7 @@ static int xrs700x_hsr_join(struct dsa_switch *ds, int port, unsigned int val = XRS_HSR_CFG_HSR_PRP; struct dsa_port *partner = NULL, *dp; struct xrs700x *priv = ds->priv; - struct net_device *slave; + struct net_device *user; int ret, i, hsr_pair[2]; enum hsr_version ver; bool fwd = false; @@ -638,8 +638,8 @@ static int xrs700x_hsr_join(struct dsa_switch *ds, int port, hsr_pair[0] = port; hsr_pair[1] = partner->index; for (i = 0; i < ARRAY_SIZE(hsr_pair); i++) { - slave = dsa_to_port(ds, hsr_pair[i])->slave; - slave->features |= XRS7000X_SUPPORTED_HSR_FEATURES; + user = dsa_to_port(ds, hsr_pair[i])->user; + user->features |= XRS7000X_SUPPORTED_HSR_FEATURES; } return 0; @@ -650,7 +650,7 @@ static int xrs700x_hsr_leave(struct dsa_switch *ds, int port, { struct dsa_port *partner = NULL, *dp; struct xrs700x *priv = ds->priv; - struct net_device *slave; + struct net_device *user; int i, hsr_pair[2]; unsigned int val; @@ -692,8 +692,8 @@ static int xrs700x_hsr_leave(struct dsa_switch *ds, int port, hsr_pair[0] = port; hsr_pair[1] = partner->index; for (i = 0; i < ARRAY_SIZE(hsr_pair); i++) { - slave = dsa_to_port(ds, hsr_pair[i])->slave; - slave->features &= ~XRS7000X_SUPPORTED_HSR_FEATURES; + user = dsa_to_port(ds, hsr_pair[i])->user; + user->features &= ~XRS7000X_SUPPORTED_HSR_FEATURES; } return 0; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index ab096795e805..c9faa8540859 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2430,7 +2430,7 @@ static int bcm_sysport_netdevice_event(struct notifier_block *nb, if (dev->netdev_ops != &bcm_sysport_netdev_ops) return NOTIFY_DONE; - if (!dsa_slave_dev_check(info->upper_dev)) + if (!dsa_user_dev_check(info->upper_dev)) return NOTIFY_DONE; if (info->linking) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 60d49b0f595f..3cf6589cfdac 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3329,7 +3329,7 @@ static int mtk_device_event(struct notifier_block *n, unsigned long event, void return NOTIFY_DONE; found: - if (!dsa_slave_dev_check(dev)) + if (!dsa_user_dev_check(dev)) return NOTIFY_DONE; if (__ethtool_get_link_ksettings(dev, &s)) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index e073d2b5542c..fbb5e9d5af13 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -175,7 +175,7 @@ mtk_flow_get_dsa_port(struct net_device **dev) if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK) return -ENODEV; - *dev = dsa_port_to_master(dp); + *dev = dsa_port_to_conduit(dp); return dp->index; #else diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index c177322f793d..b9dd35d4b8f5 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -28,7 +28,7 @@ /* Source and Destination MAC of follow-up meta frames. * Whereas the choice of SMAC only affects the unique identification of the * switch as sender of meta frames, the DMAC must be an address that is present - * in the DSA master port's multicast MAC filter. + * in the DSA conduit port's multicast MAC filter. * 01-80-C2-00-00-0E is a good choice for this, as all profiles of IEEE 1588 * over L2 use this address for some purpose already. */ diff --git a/include/net/dsa.h b/include/net/dsa.h index d98439ea6146..82135fbdb1e6 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -102,11 +102,11 @@ struct dsa_device_ops { const char *name; enum dsa_tag_protocol proto; /* Some tagging protocols either mangle or shift the destination MAC - * address, in which case the DSA master would drop packets on ingress + * address, in which case the DSA conduit would drop packets on ingress * if what it understands out of the destination MAC address is not in * its RX filter. */ - bool promisc_on_master; + bool promisc_on_conduit; }; struct dsa_lag { @@ -236,12 +236,12 @@ struct dsa_bridge { }; struct dsa_port { - /* A CPU port is physically connected to a master device. - * A user port exposed to userspace has a slave device. + /* A CPU port is physically connected to a conduit device. A user port + * exposes a network device to user-space, called 'user' here. */ union { - struct net_device *master; - struct net_device *slave; + struct net_device *conduit; + struct net_device *user; }; /* Copy of the tagging protocol operations, for quicker access @@ -249,7 +249,7 @@ struct dsa_port { */ const struct dsa_device_ops *tag_ops; - /* Copies for faster access in master receive hot path */ + /* Copies for faster access in conduit receive hot path */ struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); @@ -281,9 +281,9 @@ struct dsa_port { u8 lag_tx_enabled:1; - /* Master state bits, valid only on CPU ports */ - u8 master_admin_up:1; - u8 master_oper_up:1; + /* conduit state bits, valid only on CPU ports */ + u8 conduit_admin_up:1; + u8 conduit_oper_up:1; /* Valid only on user ports */ u8 cpu_port_in_lag:1; @@ -303,7 +303,7 @@ struct dsa_port { struct list_head list; /* - * Original copy of the master netdev ethtool_ops + * Original copy of the conduit netdev ethtool_ops */ const struct ethtool_ops *orig_ethtool_ops; @@ -452,10 +452,10 @@ struct dsa_switch { const struct dsa_switch_ops *ops; /* - * Slave mii_bus and devices for the individual ports. + * User mii_bus and devices for the individual ports. */ u32 phys_mii_mask; - struct mii_bus *slave_mii_bus; + struct mii_bus *user_mii_bus; /* Ageing Time limits in msecs */ unsigned int ageing_time_min; @@ -520,10 +520,10 @@ static inline bool dsa_port_is_unused(struct dsa_port *dp) return dp->type == DSA_PORT_TYPE_UNUSED; } -static inline bool dsa_port_master_is_operational(struct dsa_port *dp) +static inline bool dsa_port_conduit_is_operational(struct dsa_port *dp) { - return dsa_port_is_cpu(dp) && dp->master_admin_up && - dp->master_oper_up; + return dsa_port_is_cpu(dp) && dp->conduit_admin_up && + dp->conduit_oper_up; } static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) @@ -713,12 +713,12 @@ static inline bool dsa_port_offloads_lag(struct dsa_port *dp, return dsa_port_lag_dev_get(dp) == lag->dev; } -static inline struct net_device *dsa_port_to_master(const struct dsa_port *dp) +static inline struct net_device *dsa_port_to_conduit(const struct dsa_port *dp) { if (dp->cpu_port_in_lag) return dsa_port_lag_dev_get(dp->cpu_dp); - return dp->cpu_dp->master; + return dp->cpu_dp->conduit; } static inline @@ -732,7 +732,7 @@ struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) else if (dp->hsr_dev) return dp->hsr_dev; - return dp->slave; + return dp->user; } static inline struct net_device * @@ -834,9 +834,9 @@ struct dsa_switch_ops { int (*connect_tag_protocol)(struct dsa_switch *ds, enum dsa_tag_protocol proto); - int (*port_change_master)(struct dsa_switch *ds, int port, - struct net_device *master, - struct netlink_ext_ack *extack); + int (*port_change_conduit)(struct dsa_switch *ds, int port, + struct net_device *conduit, + struct netlink_ext_ack *extack); /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); @@ -1233,11 +1233,11 @@ struct dsa_switch_ops { int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid); /* - * DSA master tracking operations + * DSA conduit tracking operations */ - void (*master_state_change)(struct dsa_switch *ds, - const struct net_device *master, - bool operational); + void (*conduit_state_change)(struct dsa_switch *ds, + const struct net_device *conduit, + bool operational); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ @@ -1374,9 +1374,9 @@ static inline int dsa_switch_resume(struct dsa_switch *ds) #endif /* CONFIG_PM_SLEEP */ #if IS_ENABLED(CONFIG_NET_DSA) -bool dsa_slave_dev_check(const struct net_device *dev); +bool dsa_user_dev_check(const struct net_device *dev); #else -static inline bool dsa_slave_dev_check(const struct net_device *dev) +static inline bool dsa_user_dev_check(const struct net_device *dev) { return false; } diff --git a/include/net/dsa_stubs.h b/include/net/dsa_stubs.h index 361811750a54..6f384897f287 100644 --- a/include/net/dsa_stubs.h +++ b/include/net/dsa_stubs.h @@ -13,14 +13,14 @@ extern const struct dsa_stubs *dsa_stubs; struct dsa_stubs { - int (*master_hwtstamp_validate)(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack); + int (*conduit_hwtstamp_validate)(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); }; -static inline int dsa_master_hwtstamp_validate(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack) +static inline int dsa_conduit_hwtstamp_validate(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { if (!netdev_uses_dsa(dev)) return 0; @@ -29,18 +29,18 @@ static inline int dsa_master_hwtstamp_validate(struct net_device *dev, * netdev_uses_dsa() returns true, the dsa_core module is still * registered, and so, dsa_unregister_stubs() couldn't have run. * For netdev_uses_dsa() to start returning false, it would imply that - * dsa_master_teardown() has executed, which requires rtnl_lock(). + * dsa_conduit_teardown() has executed, which requires rtnl_lock(). */ ASSERT_RTNL(); - return dsa_stubs->master_hwtstamp_validate(dev, config, extack); + return dsa_stubs->conduit_hwtstamp_validate(dev, config, extack); } #else -static inline int dsa_master_hwtstamp_validate(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack) +static inline int dsa_conduit_hwtstamp_validate(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { return 0; } diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index b46aedc36939..feeddf95f450 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -382,7 +382,7 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) if (err) return err; - err = dsa_master_hwtstamp_validate(dev, &kernel_cfg, &extack); + err = dsa_conduit_hwtstamp_validate(dev, &kernel_cfg, &extack); if (err) { if (extack._msg) netdev_err(dev, "%s\n", extack._msg); diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 12e305824a96..8a1894a42552 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -8,16 +8,16 @@ endif # the core obj-$(CONFIG_NET_DSA) += dsa_core.o dsa_core-y += \ + conduit.o \ devlink.o \ dsa.o \ - master.o \ netlink.o \ port.o \ - slave.o \ switch.o \ tag.o \ tag_8021q.o \ - trace.o + trace.o \ + user.o # tagging formats obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o diff --git a/net/dsa/conduit.c b/net/dsa/conduit.c new file mode 100644 index 000000000000..3dfdb3cb47dc --- /dev/null +++ b/net/dsa/conduit.c @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Handling of a conduit device, switching frames via its switch fabric CPU port + * + * Copyright (c) 2017 Savoir-faire Linux Inc. + * Vivien Didelot + */ + +#include +#include +#include +#include + +#include "conduit.h" +#include "dsa.h" +#include "port.h" +#include "tag.h" + +static int dsa_conduit_get_regs_len(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; + int ret = 0; + int len; + + if (ops->get_regs_len) { + len = ops->get_regs_len(dev); + if (len < 0) + return len; + ret += len; + } + + ret += sizeof(struct ethtool_drvinfo); + ret += sizeof(struct ethtool_regs); + + if (ds->ops->get_regs_len) { + len = ds->ops->get_regs_len(ds, port); + if (len < 0) + return len; + ret += len; + } + + return ret; +} + +static void dsa_conduit_get_regs(struct net_device *dev, + struct ethtool_regs *regs, void *data) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + struct ethtool_drvinfo *cpu_info; + struct ethtool_regs *cpu_regs; + int port = cpu_dp->index; + int len; + + if (ops->get_regs_len && ops->get_regs) { + len = ops->get_regs_len(dev); + if (len < 0) + return; + regs->len = len; + ops->get_regs(dev, regs, data); + data += regs->len; + } + + cpu_info = (struct ethtool_drvinfo *)data; + strscpy(cpu_info->driver, "dsa", sizeof(cpu_info->driver)); + data += sizeof(*cpu_info); + cpu_regs = (struct ethtool_regs *)data; + data += sizeof(*cpu_regs); + + if (ds->ops->get_regs_len && ds->ops->get_regs) { + len = ds->ops->get_regs_len(ds, port); + if (len < 0) + return; + cpu_regs->len = len; + ds->ops->get_regs(ds, port, cpu_regs, data); + } +} + +static void dsa_conduit_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; + int count = 0; + + if (ops->get_sset_count && ops->get_ethtool_stats) { + count = ops->get_sset_count(dev, ETH_SS_STATS); + ops->get_ethtool_stats(dev, stats, data); + } + + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, port, data + count); +} + +static void dsa_conduit_get_ethtool_phy_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; + int count = 0; + + if (dev->phydev && !ops->get_ethtool_phy_stats) { + count = phy_ethtool_get_sset_count(dev->phydev); + if (count >= 0) + phy_ethtool_get_stats(dev->phydev, stats, data); + } else if (ops->get_sset_count && ops->get_ethtool_phy_stats) { + count = ops->get_sset_count(dev, ETH_SS_PHY_STATS); + ops->get_ethtool_phy_stats(dev, stats, data); + } + + if (count < 0) + count = 0; + + if (ds->ops->get_ethtool_phy_stats) + ds->ops->get_ethtool_phy_stats(ds, port, data + count); +} + +static int dsa_conduit_get_sset_count(struct net_device *dev, int sset) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int count = 0; + + if (sset == ETH_SS_PHY_STATS && dev->phydev && + !ops->get_ethtool_phy_stats) + count = phy_ethtool_get_sset_count(dev->phydev); + else if (ops->get_sset_count) + count = ops->get_sset_count(dev, sset); + + if (count < 0) + count = 0; + + if (ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds, cpu_dp->index, sset); + + return count; +} + +static void dsa_conduit_get_strings(struct net_device *dev, uint32_t stringset, + uint8_t *data) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; + int len = ETH_GSTRING_LEN; + int mcount = 0, count, i; + uint8_t pfx[4]; + uint8_t *ndata; + + snprintf(pfx, sizeof(pfx), "p%.2d", port); + /* We do not want to be NULL-terminated, since this is a prefix */ + pfx[sizeof(pfx) - 1] = '_'; + + if (stringset == ETH_SS_PHY_STATS && dev->phydev && + !ops->get_ethtool_phy_stats) { + mcount = phy_ethtool_get_sset_count(dev->phydev); + if (mcount < 0) + mcount = 0; + else + phy_ethtool_get_strings(dev->phydev, data); + } else if (ops->get_sset_count && ops->get_strings) { + mcount = ops->get_sset_count(dev, stringset); + if (mcount < 0) + mcount = 0; + ops->get_strings(dev, stringset, data); + } + + if (ds->ops->get_strings) { + ndata = data + mcount * len; + /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle + * the output after to prepend our CPU port prefix we + * constructed earlier + */ + ds->ops->get_strings(ds, port, stringset, ndata); + count = ds->ops->get_sset_count(ds, port, stringset); + if (count < 0) + return; + for (i = 0; i < count; i++) { + memmove(ndata + (i * len + sizeof(pfx)), + ndata + i * len, len - sizeof(pfx)); + memcpy(ndata + i * len, pfx, sizeof(pfx)); + } + } +} + +/* Deny PTP operations on conduit if there is at least one switch in the tree + * that is PTP capable. + */ +int __dsa_conduit_hwtstamp_validate(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch *ds = cpu_dp->ds; + struct dsa_switch_tree *dst; + struct dsa_port *dp; + + dst = ds->dst; + + list_for_each_entry(dp, &dst->ports, list) { + if (dsa_port_supports_hwtstamp(dp)) { + NL_SET_ERR_MSG(extack, + "HW timestamping not allowed on DSA conduit when switch supports the operation"); + return -EBUSY; + } + } + + return 0; +} + +static int dsa_conduit_ethtool_setup(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch *ds = cpu_dp->ds; + struct ethtool_ops *ops; + + if (netif_is_lag_master(dev)) + return 0; + + ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; + + cpu_dp->orig_ethtool_ops = dev->ethtool_ops; + if (cpu_dp->orig_ethtool_ops) + memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops)); + + ops->get_regs_len = dsa_conduit_get_regs_len; + ops->get_regs = dsa_conduit_get_regs; + ops->get_sset_count = dsa_conduit_get_sset_count; + ops->get_ethtool_stats = dsa_conduit_get_ethtool_stats; + ops->get_strings = dsa_conduit_get_strings; + ops->get_ethtool_phy_stats = dsa_conduit_get_ethtool_phy_stats; + + dev->ethtool_ops = ops; + + return 0; +} + +static void dsa_conduit_ethtool_teardown(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + + if (netif_is_lag_master(dev)) + return; + + dev->ethtool_ops = cpu_dp->orig_ethtool_ops; + cpu_dp->orig_ethtool_ops = NULL; +} + +/* Keep the conduit always promiscuous if the tagging protocol requires that + * (garbles MAC DA) or if it doesn't support unicast filtering, case in which + * it would revert to promiscuous mode as soon as we call dev_uc_add() on it + * anyway. + */ +static void dsa_conduit_set_promiscuity(struct net_device *dev, int inc) +{ + const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops; + + if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_conduit) + return; + + ASSERT_RTNL(); + + dev_set_promiscuity(dev, inc); +} + +static ssize_t tagging_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + struct net_device *dev = to_net_dev(d); + struct dsa_port *cpu_dp = dev->dsa_ptr; + + return sysfs_emit(buf, "%s\n", + dsa_tag_protocol_to_str(cpu_dp->tag_ops)); +} + +static ssize_t tagging_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t count) +{ + const struct dsa_device_ops *new_tag_ops, *old_tag_ops; + const char *end = strchrnul(buf, '\n'), *name; + struct net_device *dev = to_net_dev(d); + struct dsa_port *cpu_dp = dev->dsa_ptr; + size_t len = end - buf; + int err; + + /* Empty string passed */ + if (!len) + return -ENOPROTOOPT; + + name = kstrndup(buf, len, GFP_KERNEL); + if (!name) + return -ENOMEM; + + old_tag_ops = cpu_dp->tag_ops; + new_tag_ops = dsa_tag_driver_get_by_name(name); + kfree(name); + /* Bad tagger name? */ + if (IS_ERR(new_tag_ops)) + return PTR_ERR(new_tag_ops); + + if (new_tag_ops == old_tag_ops) + /* Drop the temporarily held duplicate reference, since + * the DSA switch tree uses this tagger. + */ + goto out; + + err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, new_tag_ops, + old_tag_ops); + if (err) { + /* On failure the old tagger is restored, so we don't need the + * driver for the new one. + */ + dsa_tag_driver_put(new_tag_ops); + return err; + } + + /* On success we no longer need the module for the old tagging protocol + */ +out: + dsa_tag_driver_put(old_tag_ops); + return count; +} +static DEVICE_ATTR_RW(tagging); + +static struct attribute *dsa_user_attrs[] = { + &dev_attr_tagging.attr, + NULL +}; + +static const struct attribute_group dsa_group = { + .name = "dsa", + .attrs = dsa_user_attrs, +}; + +static void dsa_conduit_reset_mtu(struct net_device *dev) +{ + int err; + + err = dev_set_mtu(dev, ETH_DATA_LEN); + if (err) + netdev_dbg(dev, + "Unable to reset MTU to exclude DSA overheads\n"); +} + +int dsa_conduit_setup(struct net_device *dev, struct dsa_port *cpu_dp) +{ + const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops; + struct dsa_switch *ds = cpu_dp->ds; + struct device_link *consumer_link; + int mtu, ret; + + mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops); + + /* The DSA conduit must use SET_NETDEV_DEV for this to work. */ + if (!netif_is_lag_master(dev)) { + consumer_link = device_link_add(ds->dev, dev->dev.parent, + DL_FLAG_AUTOREMOVE_CONSUMER); + if (!consumer_link) + netdev_err(dev, + "Failed to create a device link to DSA switch %s\n", + dev_name(ds->dev)); + } + + /* The switch driver may not implement ->port_change_mtu(), case in + * which dsa_user_change_mtu() will not update the conduit MTU either, + * so we need to do that here. + */ + ret = dev_set_mtu(dev, mtu); + if (ret) + netdev_warn(dev, "error %d setting MTU to %d to include DSA overhead\n", + ret, mtu); + + /* If we use a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point on get + * sent to the tag format's receive function. + */ + wmb(); + + dev->dsa_ptr = cpu_dp; + + dsa_conduit_set_promiscuity(dev, 1); + + ret = dsa_conduit_ethtool_setup(dev); + if (ret) + goto out_err_reset_promisc; + + ret = sysfs_create_group(&dev->dev.kobj, &dsa_group); + if (ret) + goto out_err_ethtool_teardown; + + return ret; + +out_err_ethtool_teardown: + dsa_conduit_ethtool_teardown(dev); +out_err_reset_promisc: + dsa_conduit_set_promiscuity(dev, -1); + return ret; +} + +void dsa_conduit_teardown(struct net_device *dev) +{ + sysfs_remove_group(&dev->dev.kobj, &dsa_group); + dsa_conduit_ethtool_teardown(dev); + dsa_conduit_reset_mtu(dev); + dsa_conduit_set_promiscuity(dev, -1); + + dev->dsa_ptr = NULL; + + /* If we used a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point get sent + * without the tag and go through the regular receive path. + */ + wmb(); +} + +int dsa_conduit_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack) +{ + bool conduit_setup = false; + int err; + + if (!netdev_uses_dsa(lag_dev)) { + err = dsa_conduit_setup(lag_dev, cpu_dp); + if (err) + return err; + + conduit_setup = true; + } + + err = dsa_port_lag_join(cpu_dp, lag_dev, uinfo, extack); + if (err) { + NL_SET_ERR_MSG_WEAK_MOD(extack, "CPU port failed to join LAG"); + goto out_conduit_teardown; + } + + return 0; + +out_conduit_teardown: + if (conduit_setup) + dsa_conduit_teardown(lag_dev); + return err; +} + +/* Tear down a conduit if there isn't any other user port on it, + * optionally also destroying LAG information. + */ +void dsa_conduit_lag_teardown(struct net_device *lag_dev, + struct dsa_port *cpu_dp) +{ + struct net_device *upper; + struct list_head *iter; + + dsa_port_lag_leave(cpu_dp, lag_dev); + + netdev_for_each_upper_dev_rcu(lag_dev, upper, iter) + if (dsa_user_dev_check(upper)) + return; + + dsa_conduit_teardown(lag_dev); +} diff --git a/net/dsa/conduit.h b/net/dsa/conduit.h new file mode 100644 index 000000000000..31f8834f54bb --- /dev/null +++ b/net/dsa/conduit.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_CONDUIT_H +#define __DSA_CONDUIT_H + +struct dsa_port; +struct net_device; +struct netdev_lag_upper_info; +struct netlink_ext_ack; + +int dsa_conduit_setup(struct net_device *dev, struct dsa_port *cpu_dp); +void dsa_conduit_teardown(struct net_device *dev); +int dsa_conduit_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack); +void dsa_conduit_lag_teardown(struct net_device *lag_dev, + struct dsa_port *cpu_dp); +int __dsa_conduit_hwtstamp_validate(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); + +#endif diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index ccbdb98109f8..ac7be864e80d 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -20,14 +20,14 @@ #include #include +#include "conduit.h" #include "devlink.h" #include "dsa.h" -#include "master.h" #include "netlink.h" #include "port.h" -#include "slave.h" #include "switch.h" #include "tag.h" +#include "user.h" #define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG @@ -365,18 +365,18 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) return NULL; } -struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst) +struct net_device *dsa_tree_find_first_conduit(struct dsa_switch_tree *dst) { struct device_node *ethernet; - struct net_device *master; + struct net_device *conduit; struct dsa_port *cpu_dp; cpu_dp = dsa_tree_find_first_cpu(dst); ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0); - master = of_find_net_device_by_node(ethernet); + conduit = of_find_net_device_by_node(ethernet); of_node_put(ethernet); - return master; + return conduit; } /* Assign the default CPU port (the first one in the tree) to all ports of the @@ -517,7 +517,7 @@ static int dsa_port_setup(struct dsa_port *dp) break; case DSA_PORT_TYPE_USER: of_get_mac_address(dp->dn, dp->mac); - err = dsa_slave_create(dp); + err = dsa_user_create(dp); break; } @@ -554,9 +554,9 @@ static void dsa_port_teardown(struct dsa_port *dp) dsa_shared_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: - if (dp->slave) { - dsa_slave_destroy(dp->slave); - dp->slave = NULL; + if (dp->user) { + dsa_user_destroy(dp->user); + dp->user = NULL; } break; } @@ -632,9 +632,9 @@ static int dsa_switch_setup(struct dsa_switch *ds) if (ds->setup) return 0; - /* Initialize ds->phys_mii_mask before registering the slave MDIO bus + /* Initialize ds->phys_mii_mask before registering the user MDIO bus * driver and before ops->setup() has run, since the switch drivers and - * the slave MDIO bus driver rely on these values for probing PHY + * the user MDIO bus driver rely on these values for probing PHY * devices or not */ ds->phys_mii_mask |= dsa_user_ports(ds); @@ -657,21 +657,21 @@ static int dsa_switch_setup(struct dsa_switch *ds) if (err) goto teardown; - if (!ds->slave_mii_bus && ds->ops->phy_read) { - ds->slave_mii_bus = mdiobus_alloc(); - if (!ds->slave_mii_bus) { + if (!ds->user_mii_bus && ds->ops->phy_read) { + ds->user_mii_bus = mdiobus_alloc(); + if (!ds->user_mii_bus) { err = -ENOMEM; goto teardown; } - dsa_slave_mii_bus_init(ds); + dsa_user_mii_bus_init(ds); dn = of_get_child_by_name(ds->dev->of_node, "mdio"); - err = of_mdiobus_register(ds->slave_mii_bus, dn); + err = of_mdiobus_register(ds->user_mii_bus, dn); of_node_put(dn); if (err < 0) - goto free_slave_mii_bus; + goto free_user_mii_bus; } dsa_switch_devlink_register(ds); @@ -679,9 +679,9 @@ static int dsa_switch_setup(struct dsa_switch *ds) ds->setup = true; return 0; -free_slave_mii_bus: - if (ds->slave_mii_bus && ds->ops->phy_read) - mdiobus_free(ds->slave_mii_bus); +free_user_mii_bus: + if (ds->user_mii_bus && ds->ops->phy_read) + mdiobus_free(ds->user_mii_bus); teardown: if (ds->ops->teardown) ds->ops->teardown(ds); @@ -699,10 +699,10 @@ static void dsa_switch_teardown(struct dsa_switch *ds) dsa_switch_devlink_unregister(ds); - if (ds->slave_mii_bus && ds->ops->phy_read) { - mdiobus_unregister(ds->slave_mii_bus); - mdiobus_free(ds->slave_mii_bus); - ds->slave_mii_bus = NULL; + if (ds->user_mii_bus && ds->ops->phy_read) { + mdiobus_unregister(ds->user_mii_bus); + mdiobus_free(ds->user_mii_bus); + ds->user_mii_bus = NULL; } dsa_switch_teardown_tag_protocol(ds); @@ -793,7 +793,7 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) return err; } -static int dsa_tree_setup_master(struct dsa_switch_tree *dst) +static int dsa_tree_setup_conduit(struct dsa_switch_tree *dst) { struct dsa_port *cpu_dp; int err = 0; @@ -801,18 +801,18 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst) rtnl_lock(); dsa_tree_for_each_cpu_port(cpu_dp, dst) { - struct net_device *master = cpu_dp->master; - bool admin_up = (master->flags & IFF_UP) && - !qdisc_tx_is_noop(master); + struct net_device *conduit = cpu_dp->conduit; + bool admin_up = (conduit->flags & IFF_UP) && + !qdisc_tx_is_noop(conduit); - err = dsa_master_setup(master, cpu_dp); + err = dsa_conduit_setup(conduit, cpu_dp); if (err) break; - /* Replay master state event */ - dsa_tree_master_admin_state_change(dst, master, admin_up); - dsa_tree_master_oper_state_change(dst, master, - netif_oper_up(master)); + /* Replay conduit state event */ + dsa_tree_conduit_admin_state_change(dst, conduit, admin_up); + dsa_tree_conduit_oper_state_change(dst, conduit, + netif_oper_up(conduit)); } rtnl_unlock(); @@ -820,22 +820,22 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst) return err; } -static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) +static void dsa_tree_teardown_conduit(struct dsa_switch_tree *dst) { struct dsa_port *cpu_dp; rtnl_lock(); dsa_tree_for_each_cpu_port(cpu_dp, dst) { - struct net_device *master = cpu_dp->master; + struct net_device *conduit = cpu_dp->conduit; /* Synthesizing an "admin down" state is sufficient for - * the switches to get a notification if the master is + * the switches to get a notification if the conduit is * currently up and running. */ - dsa_tree_master_admin_state_change(dst, master, false); + dsa_tree_conduit_admin_state_change(dst, conduit, false); - dsa_master_teardown(master); + dsa_conduit_teardown(conduit); } rtnl_unlock(); @@ -894,13 +894,13 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (err) goto teardown_switches; - err = dsa_tree_setup_master(dst); + err = dsa_tree_setup_conduit(dst); if (err) goto teardown_ports; err = dsa_tree_setup_lags(dst); if (err) - goto teardown_master; + goto teardown_conduit; dst->setup = true; @@ -908,8 +908,8 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) return 0; -teardown_master: - dsa_tree_teardown_master(dst); +teardown_conduit: + dsa_tree_teardown_conduit(dst); teardown_ports: dsa_tree_teardown_ports(dst); teardown_switches: @@ -929,7 +929,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_lags(dst); - dsa_tree_teardown_master(dst); + dsa_tree_teardown_conduit(dst); dsa_tree_teardown_ports(dst); @@ -978,7 +978,7 @@ out_disconnect: return err; } -/* Since the dsa/tagging sysfs device attribute is per master, the assumption +/* Since the dsa/tagging sysfs device attribute is per conduit, the assumption * is that all DSA switches within a tree share the same tagger, otherwise * they would have formed disjoint trees (different "dsa,member" values). */ @@ -999,10 +999,10 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, * restriction, there needs to be another mutex which serializes this. */ dsa_tree_for_each_user_port(dp, dst) { - if (dsa_port_to_master(dp)->flags & IFF_UP) + if (dsa_port_to_conduit(dp)->flags & IFF_UP) goto out_unlock; - if (dp->slave->flags & IFF_UP) + if (dp->user->flags & IFF_UP) goto out_unlock; } @@ -1028,62 +1028,62 @@ out_unlock: return err; } -static void dsa_tree_master_state_change(struct dsa_switch_tree *dst, - struct net_device *master) +static void dsa_tree_conduit_state_change(struct dsa_switch_tree *dst, + struct net_device *conduit) { - struct dsa_notifier_master_state_info info; - struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_notifier_conduit_state_info info; + struct dsa_port *cpu_dp = conduit->dsa_ptr; - info.master = master; - info.operational = dsa_port_master_is_operational(cpu_dp); + info.conduit = conduit; + info.operational = dsa_port_conduit_is_operational(cpu_dp); - dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info); + dsa_tree_notify(dst, DSA_NOTIFIER_CONDUIT_STATE_CHANGE, &info); } -void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, - struct net_device *master, - bool up) +void dsa_tree_conduit_admin_state_change(struct dsa_switch_tree *dst, + struct net_device *conduit, + bool up) { - struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_port *cpu_dp = conduit->dsa_ptr; bool notify = false; - /* Don't keep track of admin state on LAG DSA masters, - * but rather just of physical DSA masters + /* Don't keep track of admin state on LAG DSA conduits, + * but rather just of physical DSA conduits */ - if (netif_is_lag_master(master)) + if (netif_is_lag_master(conduit)) return; - if ((dsa_port_master_is_operational(cpu_dp)) != - (up && cpu_dp->master_oper_up)) + if ((dsa_port_conduit_is_operational(cpu_dp)) != + (up && cpu_dp->conduit_oper_up)) notify = true; - cpu_dp->master_admin_up = up; + cpu_dp->conduit_admin_up = up; if (notify) - dsa_tree_master_state_change(dst, master); + dsa_tree_conduit_state_change(dst, conduit); } -void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, - struct net_device *master, - bool up) +void dsa_tree_conduit_oper_state_change(struct dsa_switch_tree *dst, + struct net_device *conduit, + bool up) { - struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_port *cpu_dp = conduit->dsa_ptr; bool notify = false; - /* Don't keep track of oper state on LAG DSA masters, - * but rather just of physical DSA masters + /* Don't keep track of oper state on LAG DSA conduits, + * but rather just of physical DSA conduits */ - if (netif_is_lag_master(master)) + if (netif_is_lag_master(conduit)) return; - if ((dsa_port_master_is_operational(cpu_dp)) != - (cpu_dp->master_admin_up && up)) + if ((dsa_port_conduit_is_operational(cpu_dp)) != + (cpu_dp->conduit_admin_up && up)) notify = true; - cpu_dp->master_oper_up = up; + cpu_dp->conduit_oper_up = up; if (notify) - dsa_tree_master_state_change(dst, master); + dsa_tree_conduit_state_change(dst, conduit); } static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) @@ -1129,7 +1129,7 @@ static int dsa_port_parse_dsa(struct dsa_port *dp) } static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp, - struct net_device *master) + struct net_device *conduit) { enum dsa_tag_protocol tag_protocol = DSA_TAG_PROTO_NONE; struct dsa_switch *mds, *ds = dp->ds; @@ -1140,21 +1140,21 @@ static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp, * happens the switch driver may want to know if its tagging protocol * is going to work in such a configuration. */ - if (dsa_slave_dev_check(master)) { - mdp = dsa_slave_to_port(master); + if (dsa_user_dev_check(conduit)) { + mdp = dsa_user_to_port(conduit); mds = mdp->ds; mdp_upstream = dsa_upstream_port(mds, mdp->index); tag_protocol = mds->ops->get_tag_protocol(mds, mdp_upstream, DSA_TAG_PROTO_NONE); } - /* If the master device is not itself a DSA slave in a disjoint DSA + /* If the conduit device is not itself a DSA user in a disjoint DSA * tree, then return immediately. */ return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol); } -static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, +static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *conduit, const char *user_protocol) { const struct dsa_device_ops *tag_ops = NULL; @@ -1163,7 +1163,7 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, enum dsa_tag_protocol default_proto; /* Find out which protocol the switch would prefer. */ - default_proto = dsa_get_tag_protocol(dp, master); + default_proto = dsa_get_tag_protocol(dp, conduit); if (dst->default_proto) { if (dst->default_proto != default_proto) { dev_err(ds->dev, @@ -1218,7 +1218,7 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, dst->tag_ops = tag_ops; } - dp->master = master; + dp->conduit = conduit; dp->type = DSA_PORT_TYPE_CPU; dsa_port_set_tag_protocol(dp, dst->tag_ops); dp->dst = dst; @@ -1248,16 +1248,16 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) dp->dn = dn; if (ethernet) { - struct net_device *master; + struct net_device *conduit; const char *user_protocol; - master = of_find_net_device_by_node(ethernet); + conduit = of_find_net_device_by_node(ethernet); of_node_put(ethernet); - if (!master) + if (!conduit) return -EPROBE_DEFER; user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL); - return dsa_port_parse_cpu(dp, master, user_protocol); + return dsa_port_parse_cpu(dp, conduit, user_protocol); } if (link) @@ -1412,15 +1412,15 @@ static int dsa_port_parse(struct dsa_port *dp, const char *name, struct device *dev) { if (!strcmp(name, "cpu")) { - struct net_device *master; + struct net_device *conduit; - master = dsa_dev_to_net_device(dev); - if (!master) + conduit = dsa_dev_to_net_device(dev); + if (!conduit) return -EPROBE_DEFER; - dev_put(master); + dev_put(conduit); - return dsa_port_parse_cpu(dp, master, NULL); + return dsa_port_parse_cpu(dp, conduit, NULL); } if (!strcmp(name, "dsa")) @@ -1566,14 +1566,14 @@ void dsa_unregister_switch(struct dsa_switch *ds) } EXPORT_SYMBOL_GPL(dsa_unregister_switch); -/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is +/* If the DSA conduit chooses to unregister its net_device on .shutdown, DSA is * blocking that operation from completion, due to the dev_hold taken inside - * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of - * the DSA master, so that the system can reboot successfully. + * netdev_upper_dev_link. Unlink the DSA user interfaces from being uppers of + * the DSA conduit, so that the system can reboot successfully. */ void dsa_switch_shutdown(struct dsa_switch *ds) { - struct net_device *master, *slave_dev; + struct net_device *conduit, *user_dev; struct dsa_port *dp; mutex_lock(&dsa2_mutex); @@ -1584,17 +1584,17 @@ void dsa_switch_shutdown(struct dsa_switch *ds) rtnl_lock(); dsa_switch_for_each_user_port(dp, ds) { - master = dsa_port_to_master(dp); - slave_dev = dp->slave; + conduit = dsa_port_to_conduit(dp); + user_dev = dp->user; - netdev_upper_dev_unlink(master, slave_dev); + netdev_upper_dev_unlink(conduit, user_dev); } - /* Disconnect from further netdevice notifiers on the master, + /* Disconnect from further netdevice notifiers on the conduit, * since netdev_uses_dsa() will now return false. */ dsa_switch_for_each_cpu_port(dp, ds) - dp->master->dsa_ptr = NULL; + dp->conduit->dsa_ptr = NULL; rtnl_unlock(); out: @@ -1605,7 +1605,7 @@ EXPORT_SYMBOL_GPL(dsa_switch_shutdown); #ifdef CONFIG_PM_SLEEP static bool dsa_port_is_initialized(const struct dsa_port *dp) { - return dp->type == DSA_PORT_TYPE_USER && dp->slave; + return dp->type == DSA_PORT_TYPE_USER && dp->user; } int dsa_switch_suspend(struct dsa_switch *ds) @@ -1613,12 +1613,12 @@ int dsa_switch_suspend(struct dsa_switch *ds) struct dsa_port *dp; int ret = 0; - /* Suspend slave network devices */ + /* Suspend user network devices */ dsa_switch_for_each_port(dp, ds) { if (!dsa_port_is_initialized(dp)) continue; - ret = dsa_slave_suspend(dp->slave); + ret = dsa_user_suspend(dp->user); if (ret) return ret; } @@ -1641,12 +1641,12 @@ int dsa_switch_resume(struct dsa_switch *ds) if (ret) return ret; - /* Resume slave network devices */ + /* Resume user network devices */ dsa_switch_for_each_port(dp, ds) { if (!dsa_port_is_initialized(dp)) continue; - ret = dsa_slave_resume(dp->slave); + ret = dsa_user_resume(dp->user); if (ret) return ret; } @@ -1658,10 +1658,10 @@ EXPORT_SYMBOL_GPL(dsa_switch_resume); struct dsa_port *dsa_port_from_netdev(struct net_device *netdev) { - if (!netdev || !dsa_slave_dev_check(netdev)) + if (!netdev || !dsa_user_dev_check(netdev)) return ERR_PTR(-ENODEV); - return dsa_slave_to_port(netdev); + return dsa_user_to_port(netdev); } EXPORT_SYMBOL_GPL(dsa_port_from_netdev); @@ -1726,7 +1726,7 @@ bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port, EXPORT_SYMBOL_GPL(dsa_mdb_present_in_other_db); static const struct dsa_stubs __dsa_stubs = { - .master_hwtstamp_validate = __dsa_master_hwtstamp_validate, + .conduit_hwtstamp_validate = __dsa_conduit_hwtstamp_validate, }; static void dsa_register_stubs(void) @@ -1748,7 +1748,7 @@ static int __init dsa_init_module(void) if (!dsa_owq) return -ENOMEM; - rc = dsa_slave_register_notifier(); + rc = dsa_user_register_notifier(); if (rc) goto register_notifier_fail; @@ -1763,7 +1763,7 @@ static int __init dsa_init_module(void) return 0; netlink_register_fail: - dsa_slave_unregister_notifier(); + dsa_user_unregister_notifier(); dev_remove_pack(&dsa_pack_type); register_notifier_fail: destroy_workqueue(dsa_owq); @@ -1778,7 +1778,7 @@ static void __exit dsa_cleanup_module(void) rtnl_link_unregister(&dsa_link_ops); - dsa_slave_unregister_notifier(); + dsa_user_unregister_notifier(); dev_remove_pack(&dsa_pack_type); destroy_workqueue(dsa_owq); } diff --git a/net/dsa/dsa.h b/net/dsa/dsa.h index b7e17ae1094d..3cc7823e9ef3 100644 --- a/net/dsa/dsa.h +++ b/net/dsa/dsa.h @@ -21,16 +21,16 @@ void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag); void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag); struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst, const struct net_device *lag_dev); -struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst); +struct net_device *dsa_tree_find_first_conduit(struct dsa_switch_tree *dst); int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, const struct dsa_device_ops *tag_ops, const struct dsa_device_ops *old_tag_ops); -void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, - struct net_device *master, +void dsa_tree_conduit_admin_state_change(struct dsa_switch_tree *dst, + struct net_device *conduit, + bool up); +void dsa_tree_conduit_oper_state_change(struct dsa_switch_tree *dst, + struct net_device *conduit, bool up); -void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, - struct net_device *master, - bool up); unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max); void dsa_bridge_num_put(const struct net_device *bridge_dev, unsigned int bridge_num); diff --git a/net/dsa/master.c b/net/dsa/master.c deleted file mode 100644 index 6be89ab0cc01..000000000000 --- a/net/dsa/master.c +++ /dev/null @@ -1,475 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Handling of a master device, switching frames via its switch fabric CPU port - * - * Copyright (c) 2017 Savoir-faire Linux Inc. - * Vivien Didelot - */ - -#include -#include -#include -#include - -#include "dsa.h" -#include "master.h" -#include "port.h" -#include "tag.h" - -static int dsa_master_get_regs_len(struct net_device *dev) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; - struct dsa_switch *ds = cpu_dp->ds; - int port = cpu_dp->index; - int ret = 0; - int len; - - if (ops->get_regs_len) { - len = ops->get_regs_len(dev); - if (len < 0) - return len; - ret += len; - } - - ret += sizeof(struct ethtool_drvinfo); - ret += sizeof(struct ethtool_regs); - - if (ds->ops->get_regs_len) { - len = ds->ops->get_regs_len(ds, port); - if (len < 0) - return len; - ret += len; - } - - return ret; -} - -static void dsa_master_get_regs(struct net_device *dev, - struct ethtool_regs *regs, void *data) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; - struct dsa_switch *ds = cpu_dp->ds; - struct ethtool_drvinfo *cpu_info; - struct ethtool_regs *cpu_regs; - int port = cpu_dp->index; - int len; - - if (ops->get_regs_len && ops->get_regs) { - len = ops->get_regs_len(dev); - if (len < 0) - return; - regs->len = len; - ops->get_regs(dev, regs, data); - data += regs->len; - } - - cpu_info = (struct ethtool_drvinfo *)data; - strscpy(cpu_info->driver, "dsa", sizeof(cpu_info->driver)); - data += sizeof(*cpu_info); - cpu_regs = (struct ethtool_regs *)data; - data += sizeof(*cpu_regs); - - if (ds->ops->get_regs_len && ds->ops->get_regs) { - len = ds->ops->get_regs_len(ds, port); - if (len < 0) - return; - cpu_regs->len = len; - ds->ops->get_regs(ds, port, cpu_regs, data); - } -} - -static void dsa_master_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, - uint64_t *data) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; - struct dsa_switch *ds = cpu_dp->ds; - int port = cpu_dp->index; - int count = 0; - - if (ops->get_sset_count && ops->get_ethtool_stats) { - count = ops->get_sset_count(dev, ETH_SS_STATS); - ops->get_ethtool_stats(dev, stats, data); - } - - if (ds->ops->get_ethtool_stats) - ds->ops->get_ethtool_stats(ds, port, data + count); -} - -static void dsa_master_get_ethtool_phy_stats(struct net_device *dev, - struct ethtool_stats *stats, - uint64_t *data) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; - struct dsa_switch *ds = cpu_dp->ds; - int port = cpu_dp->index; - int count = 0; - - if (dev->phydev && !ops->get_ethtool_phy_stats) { - count = phy_ethtool_get_sset_count(dev->phydev); - if (count >= 0) - phy_ethtool_get_stats(dev->phydev, stats, data); - } else if (ops->get_sset_count && ops->get_ethtool_phy_stats) { - count = ops->get_sset_count(dev, ETH_SS_PHY_STATS); - ops->get_ethtool_phy_stats(dev, stats, data); - } - - if (count < 0) - count = 0; - - if (ds->ops->get_ethtool_phy_stats) - ds->ops->get_ethtool_phy_stats(ds, port, data + count); -} - -static int dsa_master_get_sset_count(struct net_device *dev, int sset) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; - struct dsa_switch *ds = cpu_dp->ds; - int count = 0; - - if (sset == ETH_SS_PHY_STATS && dev->phydev && - !ops->get_ethtool_phy_stats) - count = phy_ethtool_get_sset_count(dev->phydev); - else if (ops->get_sset_count) - count = ops->get_sset_count(dev, sset); - - if (count < 0) - count = 0; - - if (ds->ops->get_sset_count) - count += ds->ops->get_sset_count(ds, cpu_dp->index, sset); - - return count; -} - -static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, - uint8_t *data) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; - struct dsa_switch *ds = cpu_dp->ds; - int port = cpu_dp->index; - int len = ETH_GSTRING_LEN; - int mcount = 0, count, i; - uint8_t pfx[4]; - uint8_t *ndata; - - snprintf(pfx, sizeof(pfx), "p%.2d", port); - /* We do not want to be NULL-terminated, since this is a prefix */ - pfx[sizeof(pfx) - 1] = '_'; - - if (stringset == ETH_SS_PHY_STATS && dev->phydev && - !ops->get_ethtool_phy_stats) { - mcount = phy_ethtool_get_sset_count(dev->phydev); - if (mcount < 0) - mcount = 0; - else - phy_ethtool_get_strings(dev->phydev, data); - } else if (ops->get_sset_count && ops->get_strings) { - mcount = ops->get_sset_count(dev, stringset); - if (mcount < 0) - mcount = 0; - ops->get_strings(dev, stringset, data); - } - - if (ds->ops->get_strings) { - ndata = data + mcount * len; - /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle - * the output after to prepend our CPU port prefix we - * constructed earlier - */ - ds->ops->get_strings(ds, port, stringset, ndata); - count = ds->ops->get_sset_count(ds, port, stringset); - if (count < 0) - return; - for (i = 0; i < count; i++) { - memmove(ndata + (i * len + sizeof(pfx)), - ndata + i * len, len - sizeof(pfx)); - memcpy(ndata + i * len, pfx, sizeof(pfx)); - } - } -} - -/* Deny PTP operations on master if there is at least one switch in the tree - * that is PTP capable. - */ -int __dsa_master_hwtstamp_validate(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - struct dsa_switch *ds = cpu_dp->ds; - struct dsa_switch_tree *dst; - struct dsa_port *dp; - - dst = ds->dst; - - list_for_each_entry(dp, &dst->ports, list) { - if (dsa_port_supports_hwtstamp(dp)) { - NL_SET_ERR_MSG(extack, - "HW timestamping not allowed on DSA master when switch supports the operation"); - return -EBUSY; - } - } - - return 0; -} - -static int dsa_master_ethtool_setup(struct net_device *dev) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - struct dsa_switch *ds = cpu_dp->ds; - struct ethtool_ops *ops; - - if (netif_is_lag_master(dev)) - return 0; - - ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); - if (!ops) - return -ENOMEM; - - cpu_dp->orig_ethtool_ops = dev->ethtool_ops; - if (cpu_dp->orig_ethtool_ops) - memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops)); - - ops->get_regs_len = dsa_master_get_regs_len; - ops->get_regs = dsa_master_get_regs; - ops->get_sset_count = dsa_master_get_sset_count; - ops->get_ethtool_stats = dsa_master_get_ethtool_stats; - ops->get_strings = dsa_master_get_strings; - ops->get_ethtool_phy_stats = dsa_master_get_ethtool_phy_stats; - - dev->ethtool_ops = ops; - - return 0; -} - -static void dsa_master_ethtool_teardown(struct net_device *dev) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - - if (netif_is_lag_master(dev)) - return; - - dev->ethtool_ops = cpu_dp->orig_ethtool_ops; - cpu_dp->orig_ethtool_ops = NULL; -} - -/* Keep the master always promiscuous if the tagging protocol requires that - * (garbles MAC DA) or if it doesn't support unicast filtering, case in which - * it would revert to promiscuous mode as soon as we call dev_uc_add() on it - * anyway. - */ -static void dsa_master_set_promiscuity(struct net_device *dev, int inc) -{ - const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops; - - if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_master) - return; - - ASSERT_RTNL(); - - dev_set_promiscuity(dev, inc); -} - -static ssize_t tagging_show(struct device *d, struct device_attribute *attr, - char *buf) -{ - struct net_device *dev = to_net_dev(d); - struct dsa_port *cpu_dp = dev->dsa_ptr; - - return sysfs_emit(buf, "%s\n", - dsa_tag_protocol_to_str(cpu_dp->tag_ops)); -} - -static ssize_t tagging_store(struct device *d, struct device_attribute *attr, - const char *buf, size_t count) -{ - const struct dsa_device_ops *new_tag_ops, *old_tag_ops; - const char *end = strchrnul(buf, '\n'), *name; - struct net_device *dev = to_net_dev(d); - struct dsa_port *cpu_dp = dev->dsa_ptr; - size_t len = end - buf; - int err; - - /* Empty string passed */ - if (!len) - return -ENOPROTOOPT; - - name = kstrndup(buf, len, GFP_KERNEL); - if (!name) - return -ENOMEM; - - old_tag_ops = cpu_dp->tag_ops; - new_tag_ops = dsa_tag_driver_get_by_name(name); - kfree(name); - /* Bad tagger name? */ - if (IS_ERR(new_tag_ops)) - return PTR_ERR(new_tag_ops); - - if (new_tag_ops == old_tag_ops) - /* Drop the temporarily held duplicate reference, since - * the DSA switch tree uses this tagger. - */ - goto out; - - err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, new_tag_ops, - old_tag_ops); - if (err) { - /* On failure the old tagger is restored, so we don't need the - * driver for the new one. - */ - dsa_tag_driver_put(new_tag_ops); - return err; - } - - /* On success we no longer need the module for the old tagging protocol - */ -out: - dsa_tag_driver_put(old_tag_ops); - return count; -} -static DEVICE_ATTR_RW(tagging); - -static struct attribute *dsa_slave_attrs[] = { - &dev_attr_tagging.attr, - NULL -}; - -static const struct attribute_group dsa_group = { - .name = "dsa", - .attrs = dsa_slave_attrs, -}; - -static void dsa_master_reset_mtu(struct net_device *dev) -{ - int err; - - err = dev_set_mtu(dev, ETH_DATA_LEN); - if (err) - netdev_dbg(dev, - "Unable to reset MTU to exclude DSA overheads\n"); -} - -int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) -{ - const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops; - struct dsa_switch *ds = cpu_dp->ds; - struct device_link *consumer_link; - int mtu, ret; - - mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops); - - /* The DSA master must use SET_NETDEV_DEV for this to work. */ - if (!netif_is_lag_master(dev)) { - consumer_link = device_link_add(ds->dev, dev->dev.parent, - DL_FLAG_AUTOREMOVE_CONSUMER); - if (!consumer_link) - netdev_err(dev, - "Failed to create a device link to DSA switch %s\n", - dev_name(ds->dev)); - } - - /* The switch driver may not implement ->port_change_mtu(), case in - * which dsa_slave_change_mtu() will not update the master MTU either, - * so we need to do that here. - */ - ret = dev_set_mtu(dev, mtu); - if (ret) - netdev_warn(dev, "error %d setting MTU to %d to include DSA overhead\n", - ret, mtu); - - /* If we use a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point on get - * sent to the tag format's receive function. - */ - wmb(); - - dev->dsa_ptr = cpu_dp; - - dsa_master_set_promiscuity(dev, 1); - - ret = dsa_master_ethtool_setup(dev); - if (ret) - goto out_err_reset_promisc; - - ret = sysfs_create_group(&dev->dev.kobj, &dsa_group); - if (ret) - goto out_err_ethtool_teardown; - - return ret; - -out_err_ethtool_teardown: - dsa_master_ethtool_teardown(dev); -out_err_reset_promisc: - dsa_master_set_promiscuity(dev, -1); - return ret; -} - -void dsa_master_teardown(struct net_device *dev) -{ - sysfs_remove_group(&dev->dev.kobj, &dsa_group); - dsa_master_ethtool_teardown(dev); - dsa_master_reset_mtu(dev); - dsa_master_set_promiscuity(dev, -1); - - dev->dsa_ptr = NULL; - - /* If we used a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point get sent - * without the tag and go through the regular receive path. - */ - wmb(); -} - -int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, - struct netdev_lag_upper_info *uinfo, - struct netlink_ext_ack *extack) -{ - bool master_setup = false; - int err; - - if (!netdev_uses_dsa(lag_dev)) { - err = dsa_master_setup(lag_dev, cpu_dp); - if (err) - return err; - - master_setup = true; - } - - err = dsa_port_lag_join(cpu_dp, lag_dev, uinfo, extack); - if (err) { - NL_SET_ERR_MSG_WEAK_MOD(extack, "CPU port failed to join LAG"); - goto out_master_teardown; - } - - return 0; - -out_master_teardown: - if (master_setup) - dsa_master_teardown(lag_dev); - return err; -} - -/* Tear down a master if there isn't any other user port on it, - * optionally also destroying LAG information. - */ -void dsa_master_lag_teardown(struct net_device *lag_dev, - struct dsa_port *cpu_dp) -{ - struct net_device *upper; - struct list_head *iter; - - dsa_port_lag_leave(cpu_dp, lag_dev); - - netdev_for_each_upper_dev_rcu(lag_dev, upper, iter) - if (dsa_slave_dev_check(upper)) - return; - - dsa_master_teardown(lag_dev); -} diff --git a/net/dsa/master.h b/net/dsa/master.h deleted file mode 100644 index 76e39d3ec909..000000000000 --- a/net/dsa/master.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ - -#ifndef __DSA_MASTER_H -#define __DSA_MASTER_H - -struct dsa_port; -struct net_device; -struct netdev_lag_upper_info; -struct netlink_ext_ack; - -int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp); -void dsa_master_teardown(struct net_device *dev); -int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, - struct netdev_lag_upper_info *uinfo, - struct netlink_ext_ack *extack); -void dsa_master_lag_teardown(struct net_device *lag_dev, - struct dsa_port *cpu_dp); -int __dsa_master_hwtstamp_validate(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack); - -#endif diff --git a/net/dsa/netlink.c b/net/dsa/netlink.c index bd4bbaf851de..f56f90a25b99 100644 --- a/net/dsa/netlink.c +++ b/net/dsa/netlink.c @@ -5,7 +5,7 @@ #include #include "netlink.h" -#include "slave.h" +#include "user.h" static const struct nla_policy dsa_policy[IFLA_DSA_MAX + 1] = { [IFLA_DSA_MASTER] = { .type = NLA_U32 }, @@ -22,13 +22,13 @@ static int dsa_changelink(struct net_device *dev, struct nlattr *tb[], if (data[IFLA_DSA_MASTER]) { u32 ifindex = nla_get_u32(data[IFLA_DSA_MASTER]); - struct net_device *master; + struct net_device *conduit; - master = __dev_get_by_index(dev_net(dev), ifindex); - if (!master) + conduit = __dev_get_by_index(dev_net(dev), ifindex); + if (!conduit) return -EINVAL; - err = dsa_slave_change_master(dev, master, extack); + err = dsa_user_change_conduit(dev, conduit, extack); if (err) return err; } @@ -44,9 +44,9 @@ static size_t dsa_get_size(const struct net_device *dev) static int dsa_fill_info(struct sk_buff *skb, const struct net_device *dev) { - struct net_device *master = dsa_slave_to_master(dev); + struct net_device *conduit = dsa_user_to_conduit(dev); - if (nla_put_u32(skb, IFLA_DSA_MASTER, master->ifindex)) + if (nla_put_u32(skb, IFLA_DSA_MASTER, conduit->ifindex)) return -EMSGSIZE; return 0; diff --git a/net/dsa/port.c b/net/dsa/port.c index 6e0d000a97c4..c42dac87671b 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -14,9 +14,9 @@ #include "dsa.h" #include "port.h" -#include "slave.h" #include "switch.h" #include "tag_8021q.h" +#include "user.h" /** * dsa_port_notify - Notify the switching fabric of changes to a port @@ -289,7 +289,7 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp, } /* If the bridge was vlan_filtering, the bridge core doesn't trigger an - * event for changing vlan_filtering setting upon slave ports leaving + * event for changing vlan_filtering setting upon user ports leaving * it. That is a good thing, because that lets us handle it and also * handle the case where the switch's vlan_filtering setting is global * (not per port). When that happens, the correct moment to trigger the @@ -489,7 +489,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, .dp = dp, .extack = extack, }; - struct net_device *dev = dp->slave; + struct net_device *dev = dp->user; struct net_device *brport_dev; int err; @@ -514,8 +514,8 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, dp->bridge->tx_fwd_offload = info.tx_fwd_offload; err = switchdev_bridge_port_offload(brport_dev, dev, dp, - &dsa_slave_switchdev_notifier, - &dsa_slave_switchdev_blocking_notifier, + &dsa_user_switchdev_notifier, + &dsa_user_switchdev_blocking_notifier, dp->bridge->tx_fwd_offload, extack); if (err) goto out_rollback_unbridge; @@ -528,8 +528,8 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, out_rollback_unoffload: switchdev_bridge_port_unoffload(brport_dev, dp, - &dsa_slave_switchdev_notifier, - &dsa_slave_switchdev_blocking_notifier); + &dsa_user_switchdev_notifier, + &dsa_user_switchdev_blocking_notifier); dsa_flush_workqueue(); out_rollback_unbridge: dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); @@ -547,8 +547,8 @@ void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br) return; switchdev_bridge_port_unoffload(brport_dev, dp, - &dsa_slave_switchdev_notifier, - &dsa_slave_switchdev_blocking_notifier); + &dsa_user_switchdev_notifier, + &dsa_user_switchdev_blocking_notifier); dsa_flush_workqueue(); } @@ -741,10 +741,10 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, */ if (vlan_filtering && dsa_port_is_user(dp)) { struct net_device *br = dsa_port_bridge_dev_get(dp); - struct net_device *upper_dev, *slave = dp->slave; + struct net_device *upper_dev, *user = dp->user; struct list_head *iter; - netdev_for_each_upper_dev_rcu(slave, upper_dev, iter) { + netdev_for_each_upper_dev_rcu(user, upper_dev, iter) { struct bridge_vlan_info br_info; u16 vid; @@ -803,9 +803,9 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, if (!ds->ops->port_vlan_filtering) return -EOPNOTSUPP; - /* We are called from dsa_slave_switchdev_blocking_event(), + /* We are called from dsa_user_switchdev_blocking_event(), * which is not under rcu_read_lock(), unlike - * dsa_slave_switchdev_event(). + * dsa_user_switchdev_event(). */ rcu_read_lock(); apply = dsa_port_can_apply_vlan_filtering(dp, vlan_filtering, extack); @@ -827,24 +827,24 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, ds->vlan_filtering = vlan_filtering; dsa_switch_for_each_user_port(other_dp, ds) { - struct net_device *slave = other_dp->slave; + struct net_device *user = other_dp->user; /* We might be called in the unbind path, so not - * all slave devices might still be registered. + * all user devices might still be registered. */ - if (!slave) + if (!user) continue; - err = dsa_slave_manage_vlan_filtering(slave, - vlan_filtering); + err = dsa_user_manage_vlan_filtering(user, + vlan_filtering); if (err) goto restore; } } else { dp->vlan_filtering = vlan_filtering; - err = dsa_slave_manage_vlan_filtering(dp->slave, - vlan_filtering); + err = dsa_user_manage_vlan_filtering(dp->user, + vlan_filtering); if (err) goto restore; } @@ -863,7 +863,7 @@ restore: } /* This enforces legacy behavior for switch drivers which assume they can't - * receive VLAN configuration when enslaved to a bridge with vlan_filtering=0 + * receive VLAN configuration when joining a bridge with vlan_filtering=0 */ bool dsa_port_skip_vlan_configuration(struct dsa_port *dp) { @@ -1047,7 +1047,7 @@ int dsa_port_standalone_host_fdb_add(struct dsa_port *dp, int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, @@ -1057,12 +1057,12 @@ int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, if (!dp->ds->fdb_isolation) db.bridge.num = 0; - /* Avoid a call to __dev_set_promiscuity() on the master, which + /* Avoid a call to __dev_set_promiscuity() on the conduit, which * requires rtnl_lock(), since we can't guarantee that is held here, * and we can't take it either. */ - if (master->priv_flags & IFF_UNICAST_FLT) { - err = dev_uc_add(master, addr); + if (conduit->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_add(conduit, addr); if (err) return err; } @@ -1098,7 +1098,7 @@ int dsa_port_standalone_host_fdb_del(struct dsa_port *dp, int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, @@ -1108,8 +1108,8 @@ int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, if (!dp->ds->fdb_isolation) db.bridge.num = 0; - if (master->priv_flags & IFF_UNICAST_FLT) { - err = dev_uc_del(master, addr); + if (conduit->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_del(conduit, addr); if (err) return err; } @@ -1229,7 +1229,7 @@ int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp, int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, @@ -1239,7 +1239,7 @@ int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, if (!dp->ds->fdb_isolation) db.bridge.num = 0; - err = dev_mc_add(master, mdb->addr); + err = dev_mc_add(conduit, mdb->addr); if (err) return err; @@ -1273,7 +1273,7 @@ int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp, int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, @@ -1283,7 +1283,7 @@ int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, if (!dp->ds->fdb_isolation) db.bridge.num = 0; - err = dev_mc_del(master, mdb->addr); + err = dev_mc_del(conduit, mdb->addr); if (err) return err; @@ -1318,7 +1318,7 @@ int dsa_port_host_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_notifier_vlan_info info = { .dp = dp, .vlan = vlan, @@ -1330,7 +1330,7 @@ int dsa_port_host_vlan_add(struct dsa_port *dp, if (err && err != -EOPNOTSUPP) return err; - vlan_vid_add(master, htons(ETH_P_8021Q), vlan->vid); + vlan_vid_add(conduit, htons(ETH_P_8021Q), vlan->vid); return err; } @@ -1338,7 +1338,7 @@ int dsa_port_host_vlan_add(struct dsa_port *dp, int dsa_port_host_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_notifier_vlan_info info = { .dp = dp, .vlan = vlan, @@ -1349,7 +1349,7 @@ int dsa_port_host_vlan_del(struct dsa_port *dp, if (err && err != -EOPNOTSUPP) return err; - vlan_vid_del(master, htons(ETH_P_8021Q), vlan->vid); + vlan_vid_del(conduit, htons(ETH_P_8021Q), vlan->vid); return err; } @@ -1398,24 +1398,24 @@ int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, return ds->ops->port_mrp_del_ring_role(ds, dp->index, mrp); } -static int dsa_port_assign_master(struct dsa_port *dp, - struct net_device *master, - struct netlink_ext_ack *extack, - bool fail_on_err) +static int dsa_port_assign_conduit(struct dsa_port *dp, + struct net_device *conduit, + struct netlink_ext_ack *extack, + bool fail_on_err) { struct dsa_switch *ds = dp->ds; int port = dp->index, err; - err = ds->ops->port_change_master(ds, port, master, extack); + err = ds->ops->port_change_conduit(ds, port, conduit, extack); if (err && !fail_on_err) - dev_err(ds->dev, "port %d failed to assign master %s: %pe\n", - port, master->name, ERR_PTR(err)); + dev_err(ds->dev, "port %d failed to assign conduit %s: %pe\n", + port, conduit->name, ERR_PTR(err)); if (err && fail_on_err) return err; - dp->cpu_dp = master->dsa_ptr; - dp->cpu_port_in_lag = netif_is_lag_master(master); + dp->cpu_dp = conduit->dsa_ptr; + dp->cpu_port_in_lag = netif_is_lag_master(conduit); return 0; } @@ -1428,12 +1428,12 @@ static int dsa_port_assign_master(struct dsa_port *dp, * the old CPU port before changing it, and restore it on errors during the * bringup of the new one. */ -int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, - struct netlink_ext_ack *extack) +int dsa_port_change_conduit(struct dsa_port *dp, struct net_device *conduit, + struct netlink_ext_ack *extack) { struct net_device *bridge_dev = dsa_port_bridge_dev_get(dp); - struct net_device *old_master = dsa_port_to_master(dp); - struct net_device *dev = dp->slave; + struct net_device *old_conduit = dsa_port_to_conduit(dp); + struct net_device *dev = dp->user; struct dsa_switch *ds = dp->ds; bool vlan_filtering; int err, tmp; @@ -1454,7 +1454,7 @@ int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, */ vlan_filtering = dsa_port_is_vlan_filtering(dp); if (vlan_filtering) { - err = dsa_slave_manage_vlan_filtering(dev, false); + err = dsa_user_manage_vlan_filtering(dev, false); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to remove standalone VLANs"); @@ -1465,16 +1465,16 @@ int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, /* Standalone addresses, and addresses of upper interfaces like * VLAN, LAG, HSR need to be migrated. */ - dsa_slave_unsync_ha(dev); + dsa_user_unsync_ha(dev); - err = dsa_port_assign_master(dp, master, extack, true); + err = dsa_port_assign_conduit(dp, conduit, extack, true); if (err) goto rewind_old_addrs; - dsa_slave_sync_ha(dev); + dsa_user_sync_ha(dev); if (vlan_filtering) { - err = dsa_slave_manage_vlan_filtering(dev, true); + err = dsa_user_manage_vlan_filtering(dev, true); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to restore standalone VLANs"); @@ -1495,19 +1495,19 @@ int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, rewind_new_vlan: if (vlan_filtering) - dsa_slave_manage_vlan_filtering(dev, false); + dsa_user_manage_vlan_filtering(dev, false); rewind_new_addrs: - dsa_slave_unsync_ha(dev); + dsa_user_unsync_ha(dev); - dsa_port_assign_master(dp, old_master, NULL, false); + dsa_port_assign_conduit(dp, old_conduit, NULL, false); /* Restore the objects on the old CPU port */ rewind_old_addrs: - dsa_slave_sync_ha(dev); + dsa_user_sync_ha(dev); if (vlan_filtering) { - tmp = dsa_slave_manage_vlan_filtering(dev, true); + tmp = dsa_user_manage_vlan_filtering(dev, true); if (tmp) { dev_err(ds->dev, "port %d failed to restore standalone VLANs: %pe\n", @@ -1620,7 +1620,7 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config, struct dsa_switch *ds = dp->ds; if (dsa_port_is_user(dp)) - phydev = dp->slave->phydev; + phydev = dp->user->phydev; if (!ds->ops->phylink_mac_link_down) { if (ds->ops->adjust_link && phydev) @@ -1808,7 +1808,7 @@ err_phy_connect: * their type. * * User ports with no phy-handle or fixed-link are expected to connect to an - * internal PHY located on the ds->slave_mii_bus at an MDIO address equal to + * internal PHY located on the ds->user_mii_bus at an MDIO address equal to * the port number. This description is still actively supported. * * Shared (CPU and DSA) ports with no phy-handle or fixed-link are expected to @@ -1829,7 +1829,7 @@ err_phy_connect: * a fixed-link, a phy-handle, or a managed = "in-band-status" property. * It becomes the responsibility of the driver to ensure that these ports * operate at the maximum speed (whatever this means) and will interoperate - * with the DSA master or other cascade port, since phylink methods will not be + * with the DSA conduit or other cascade port, since phylink methods will not be * invoked for them. * * If you are considering expanding this table for newly introduced switches, diff --git a/net/dsa/port.h b/net/dsa/port.h index 334879964e2c..6bc3291573c0 100644 --- a/net/dsa/port.h +++ b/net/dsa/port.h @@ -109,7 +109,7 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast); void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast); void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc); -int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, - struct netlink_ext_ack *extack); +int dsa_port_change_conduit(struct dsa_port *dp, struct net_device *conduit, + struct netlink_ext_ack *extack); #endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c deleted file mode 100644 index 4c3e502d7e16..000000000000 --- a/net/dsa/slave.c +++ /dev/null @@ -1,3727 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/dsa/slave.c - Slave device handling - * Copyright (c) 2008-2009 Marvell Semiconductor - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "dsa.h" -#include "port.h" -#include "master.h" -#include "netlink.h" -#include "slave.h" -#include "switch.h" -#include "tag.h" - -struct dsa_switchdev_event_work { - struct net_device *dev; - struct net_device *orig_dev; - struct work_struct work; - unsigned long event; - /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and - * SWITCHDEV_FDB_DEL_TO_DEVICE - */ - unsigned char addr[ETH_ALEN]; - u16 vid; - bool host_addr; -}; - -enum dsa_standalone_event { - DSA_UC_ADD, - DSA_UC_DEL, - DSA_MC_ADD, - DSA_MC_DEL, -}; - -struct dsa_standalone_event_work { - struct work_struct work; - struct net_device *dev; - enum dsa_standalone_event event; - unsigned char addr[ETH_ALEN]; - u16 vid; -}; - -struct dsa_host_vlan_rx_filtering_ctx { - struct net_device *dev; - const unsigned char *addr; - enum dsa_standalone_event event; -}; - -static bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds) -{ - return ds->ops->port_fdb_add && ds->ops->port_fdb_del && - ds->fdb_isolation && !ds->vlan_filtering_is_global && - !ds->needs_standalone_vlan_filtering; -} - -static bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds) -{ - return ds->ops->port_mdb_add && ds->ops->port_mdb_del && - ds->fdb_isolation && !ds->vlan_filtering_is_global && - !ds->needs_standalone_vlan_filtering; -} - -static void dsa_slave_standalone_event_work(struct work_struct *work) -{ - struct dsa_standalone_event_work *standalone_work = - container_of(work, struct dsa_standalone_event_work, work); - const unsigned char *addr = standalone_work->addr; - struct net_device *dev = standalone_work->dev; - struct dsa_port *dp = dsa_slave_to_port(dev); - struct switchdev_obj_port_mdb mdb; - struct dsa_switch *ds = dp->ds; - u16 vid = standalone_work->vid; - int err; - - switch (standalone_work->event) { - case DSA_UC_ADD: - err = dsa_port_standalone_host_fdb_add(dp, addr, vid); - if (err) { - dev_err(ds->dev, - "port %d failed to add %pM vid %d to fdb: %d\n", - dp->index, addr, vid, err); - break; - } - break; - - case DSA_UC_DEL: - err = dsa_port_standalone_host_fdb_del(dp, addr, vid); - if (err) { - dev_err(ds->dev, - "port %d failed to delete %pM vid %d from fdb: %d\n", - dp->index, addr, vid, err); - } - - break; - case DSA_MC_ADD: - ether_addr_copy(mdb.addr, addr); - mdb.vid = vid; - - err = dsa_port_standalone_host_mdb_add(dp, &mdb); - if (err) { - dev_err(ds->dev, - "port %d failed to add %pM vid %d to mdb: %d\n", - dp->index, addr, vid, err); - break; - } - break; - case DSA_MC_DEL: - ether_addr_copy(mdb.addr, addr); - mdb.vid = vid; - - err = dsa_port_standalone_host_mdb_del(dp, &mdb); - if (err) { - dev_err(ds->dev, - "port %d failed to delete %pM vid %d from mdb: %d\n", - dp->index, addr, vid, err); - } - - break; - } - - kfree(standalone_work); -} - -static int dsa_slave_schedule_standalone_work(struct net_device *dev, - enum dsa_standalone_event event, - const unsigned char *addr, - u16 vid) -{ - struct dsa_standalone_event_work *standalone_work; - - standalone_work = kzalloc(sizeof(*standalone_work), GFP_ATOMIC); - if (!standalone_work) - return -ENOMEM; - - INIT_WORK(&standalone_work->work, dsa_slave_standalone_event_work); - standalone_work->event = event; - standalone_work->dev = dev; - - ether_addr_copy(standalone_work->addr, addr); - standalone_work->vid = vid; - - dsa_schedule_work(&standalone_work->work); - - return 0; -} - -static int dsa_slave_host_vlan_rx_filtering(void *arg, int vid) -{ - struct dsa_host_vlan_rx_filtering_ctx *ctx = arg; - - return dsa_slave_schedule_standalone_work(ctx->dev, ctx->event, - ctx->addr, vid); -} - -static int dsa_slave_vlan_for_each(struct net_device *dev, - int (*cb)(void *arg, int vid), void *arg) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_vlan *v; - int err; - - lockdep_assert_held(&dev->addr_list_lock); - - err = cb(arg, 0); - if (err) - return err; - - list_for_each_entry(v, &dp->user_vlans, list) { - err = cb(arg, v->vid); - if (err) - return err; - } - - return 0; -} - -static int dsa_slave_sync_uc(struct net_device *dev, - const unsigned char *addr) -{ - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_host_vlan_rx_filtering_ctx ctx = { - .dev = dev, - .addr = addr, - .event = DSA_UC_ADD, - }; - - dev_uc_add(master, addr); - - if (!dsa_switch_supports_uc_filtering(dp->ds)) - return 0; - - return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, - &ctx); -} - -static int dsa_slave_unsync_uc(struct net_device *dev, - const unsigned char *addr) -{ - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_host_vlan_rx_filtering_ctx ctx = { - .dev = dev, - .addr = addr, - .event = DSA_UC_DEL, - }; - - dev_uc_del(master, addr); - - if (!dsa_switch_supports_uc_filtering(dp->ds)) - return 0; - - return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, - &ctx); -} - -static int dsa_slave_sync_mc(struct net_device *dev, - const unsigned char *addr) -{ - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_host_vlan_rx_filtering_ctx ctx = { - .dev = dev, - .addr = addr, - .event = DSA_MC_ADD, - }; - - dev_mc_add(master, addr); - - if (!dsa_switch_supports_mc_filtering(dp->ds)) - return 0; - - return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, - &ctx); -} - -static int dsa_slave_unsync_mc(struct net_device *dev, - const unsigned char *addr) -{ - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_host_vlan_rx_filtering_ctx ctx = { - .dev = dev, - .addr = addr, - .event = DSA_MC_DEL, - }; - - dev_mc_del(master, addr); - - if (!dsa_switch_supports_mc_filtering(dp->ds)) - return 0; - - return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, - &ctx); -} - -void dsa_slave_sync_ha(struct net_device *dev) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - struct netdev_hw_addr *ha; - - netif_addr_lock_bh(dev); - - netdev_for_each_synced_mc_addr(ha, dev) - dsa_slave_sync_mc(dev, ha->addr); - - netdev_for_each_synced_uc_addr(ha, dev) - dsa_slave_sync_uc(dev, ha->addr); - - netif_addr_unlock_bh(dev); - - if (dsa_switch_supports_uc_filtering(ds) || - dsa_switch_supports_mc_filtering(ds)) - dsa_flush_workqueue(); -} - -void dsa_slave_unsync_ha(struct net_device *dev) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - struct netdev_hw_addr *ha; - - netif_addr_lock_bh(dev); - - netdev_for_each_synced_uc_addr(ha, dev) - dsa_slave_unsync_uc(dev, ha->addr); - - netdev_for_each_synced_mc_addr(ha, dev) - dsa_slave_unsync_mc(dev, ha->addr); - - netif_addr_unlock_bh(dev); - - if (dsa_switch_supports_uc_filtering(ds) || - dsa_switch_supports_mc_filtering(ds)) - dsa_flush_workqueue(); -} - -/* slave mii_bus handling ***************************************************/ -static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) -{ - struct dsa_switch *ds = bus->priv; - - if (ds->phys_mii_mask & (1 << addr)) - return ds->ops->phy_read(ds, addr, reg); - - return 0xffff; -} - -static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) -{ - struct dsa_switch *ds = bus->priv; - - if (ds->phys_mii_mask & (1 << addr)) - return ds->ops->phy_write(ds, addr, reg, val); - - return 0; -} - -void dsa_slave_mii_bus_init(struct dsa_switch *ds) -{ - ds->slave_mii_bus->priv = (void *)ds; - ds->slave_mii_bus->name = "dsa slave smi"; - ds->slave_mii_bus->read = dsa_slave_phy_read; - ds->slave_mii_bus->write = dsa_slave_phy_write; - snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d", - ds->dst->index, ds->index); - ds->slave_mii_bus->parent = ds->dev; - ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; -} - - -/* slave device handling ****************************************************/ -static int dsa_slave_get_iflink(const struct net_device *dev) -{ - return dsa_slave_to_master(dev)->ifindex; -} - -static int dsa_slave_open(struct net_device *dev) -{ - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - int err; - - err = dev_open(master, NULL); - if (err < 0) { - netdev_err(dev, "failed to open master %s\n", master->name); - goto out; - } - - if (dsa_switch_supports_uc_filtering(ds)) { - err = dsa_port_standalone_host_fdb_add(dp, dev->dev_addr, 0); - if (err) - goto out; - } - - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) { - err = dev_uc_add(master, dev->dev_addr); - if (err < 0) - goto del_host_addr; - } - - err = dsa_port_enable_rt(dp, dev->phydev); - if (err) - goto del_unicast; - - return 0; - -del_unicast: - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) - dev_uc_del(master, dev->dev_addr); -del_host_addr: - if (dsa_switch_supports_uc_filtering(ds)) - dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); -out: - return err; -} - -static int dsa_slave_close(struct net_device *dev) -{ - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - dsa_port_disable_rt(dp); - - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) - dev_uc_del(master, dev->dev_addr); - - if (dsa_switch_supports_uc_filtering(ds)) - dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); - - return 0; -} - -static void dsa_slave_manage_host_flood(struct net_device *dev) -{ - bool mc = dev->flags & (IFF_PROMISC | IFF_ALLMULTI); - struct dsa_port *dp = dsa_slave_to_port(dev); - bool uc = dev->flags & IFF_PROMISC; - - dsa_port_set_host_flood(dp, uc, mc); -} - -static void dsa_slave_change_rx_flags(struct net_device *dev, int change) -{ - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (change & IFF_ALLMULTI) - dev_set_allmulti(master, - dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(master, - dev->flags & IFF_PROMISC ? 1 : -1); - - if (dsa_switch_supports_uc_filtering(ds) && - dsa_switch_supports_mc_filtering(ds)) - dsa_slave_manage_host_flood(dev); -} - -static void dsa_slave_set_rx_mode(struct net_device *dev) -{ - __dev_mc_sync(dev, dsa_slave_sync_mc, dsa_slave_unsync_mc); - __dev_uc_sync(dev, dsa_slave_sync_uc, dsa_slave_unsync_uc); -} - -static int dsa_slave_set_mac_address(struct net_device *dev, void *a) -{ - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - struct sockaddr *addr = a; - int err; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - - if (ds->ops->port_set_mac_address) { - err = ds->ops->port_set_mac_address(ds, dp->index, - addr->sa_data); - if (err) - return err; - } - - /* If the port is down, the address isn't synced yet to hardware or - * to the DSA master, so there is nothing to change. - */ - if (!(dev->flags & IFF_UP)) - goto out_change_dev_addr; - - if (dsa_switch_supports_uc_filtering(ds)) { - err = dsa_port_standalone_host_fdb_add(dp, addr->sa_data, 0); - if (err) - return err; - } - - if (!ether_addr_equal(addr->sa_data, master->dev_addr)) { - err = dev_uc_add(master, addr->sa_data); - if (err < 0) - goto del_unicast; - } - - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) - dev_uc_del(master, dev->dev_addr); - - if (dsa_switch_supports_uc_filtering(ds)) - dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); - -out_change_dev_addr: - eth_hw_addr_set(dev, addr->sa_data); - - return 0; - -del_unicast: - if (dsa_switch_supports_uc_filtering(ds)) - dsa_port_standalone_host_fdb_del(dp, addr->sa_data, 0); - - return err; -} - -struct dsa_slave_dump_ctx { - struct net_device *dev; - struct sk_buff *skb; - struct netlink_callback *cb; - int idx; -}; - -static int -dsa_slave_port_fdb_do_dump(const unsigned char *addr, u16 vid, - bool is_static, void *data) -{ - struct dsa_slave_dump_ctx *dump = data; - u32 portid = NETLINK_CB(dump->cb->skb).portid; - u32 seq = dump->cb->nlh->nlmsg_seq; - struct nlmsghdr *nlh; - struct ndmsg *ndm; - - if (dump->idx < dump->cb->args[2]) - goto skip; - - nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, - sizeof(*ndm), NLM_F_MULTI); - if (!nlh) - return -EMSGSIZE; - - ndm = nlmsg_data(nlh); - ndm->ndm_family = AF_BRIDGE; - ndm->ndm_pad1 = 0; - ndm->ndm_pad2 = 0; - ndm->ndm_flags = NTF_SELF; - ndm->ndm_type = 0; - ndm->ndm_ifindex = dump->dev->ifindex; - ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; - - if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, addr)) - goto nla_put_failure; - - if (vid && nla_put_u16(dump->skb, NDA_VLAN, vid)) - goto nla_put_failure; - - nlmsg_end(dump->skb, nlh); - -skip: - dump->idx++; - return 0; - -nla_put_failure: - nlmsg_cancel(dump->skb, nlh); - return -EMSGSIZE; -} - -static int -dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, struct net_device *filter_dev, - int *idx) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_slave_dump_ctx dump = { - .dev = dev, - .skb = skb, - .cb = cb, - .idx = *idx, - }; - int err; - - err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump); - *idx = dump.idx; - - return err; -} - -static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - int port = p->dp->index; - - /* Pass through to switch driver if it supports timestamping */ - switch (cmd) { - case SIOCGHWTSTAMP: - if (ds->ops->port_hwtstamp_get) - return ds->ops->port_hwtstamp_get(ds, port, ifr); - break; - case SIOCSHWTSTAMP: - if (ds->ops->port_hwtstamp_set) - return ds->ops->port_hwtstamp_set(ds, port, ifr); - break; - } - - return phylink_mii_ioctl(p->dp->pl, ifr, cmd); -} - -static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, - const struct switchdev_attr *attr, - struct netlink_ext_ack *extack) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - int ret; - - if (ctx && ctx != dp) - return 0; - - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) - return -EOPNOTSUPP; - - ret = dsa_port_set_state(dp, attr->u.stp_state, true); - break; - case SWITCHDEV_ATTR_ID_PORT_MST_STATE: - if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) - return -EOPNOTSUPP; - - ret = dsa_port_set_mst_state(dp, &attr->u.mst_state, extack); - break; - case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: - if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) - return -EOPNOTSUPP; - - ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering, - extack); - break; - case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: - if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) - return -EOPNOTSUPP; - - ret = dsa_port_ageing_time(dp, attr->u.ageing_time); - break; - case SWITCHDEV_ATTR_ID_BRIDGE_MST: - if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) - return -EOPNOTSUPP; - - ret = dsa_port_mst_enable(dp, attr->u.mst, extack); - break; - case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: - if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) - return -EOPNOTSUPP; - - ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags, - extack); - break; - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) - return -EOPNOTSUPP; - - ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack); - break; - case SWITCHDEV_ATTR_ID_VLAN_MSTI: - if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) - return -EOPNOTSUPP; - - ret = dsa_port_vlan_msti(dp, &attr->u.vlan_msti); - break; - default: - ret = -EOPNOTSUPP; - break; - } - - return ret; -} - -/* Must be called under rcu_read_lock() */ -static int -dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave, - const struct switchdev_obj_port_vlan *vlan) -{ - struct net_device *upper_dev; - struct list_head *iter; - - netdev_for_each_upper_dev_rcu(slave, upper_dev, iter) { - u16 vid; - - if (!is_vlan_dev(upper_dev)) - continue; - - vid = vlan_dev_vlan_id(upper_dev); - if (vid == vlan->vid) - return -EBUSY; - } - - return 0; -} - -static int dsa_slave_vlan_add(struct net_device *dev, - const struct switchdev_obj *obj, - struct netlink_ext_ack *extack) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct switchdev_obj_port_vlan *vlan; - int err; - - if (dsa_port_skip_vlan_configuration(dp)) { - NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN"); - return 0; - } - - vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); - - /* Deny adding a bridge VLAN when there is already an 802.1Q upper with - * the same VID. - */ - if (br_vlan_enabled(dsa_port_bridge_dev_get(dp))) { - rcu_read_lock(); - err = dsa_slave_vlan_check_for_8021q_uppers(dev, vlan); - rcu_read_unlock(); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "Port already has a VLAN upper with this VID"); - return err; - } - } - - return dsa_port_vlan_add(dp, vlan, extack); -} - -/* Offload a VLAN installed on the bridge or on a foreign interface by - * installing it as a VLAN towards the CPU port. - */ -static int dsa_slave_host_vlan_add(struct net_device *dev, - const struct switchdev_obj *obj, - struct netlink_ext_ack *extack) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct switchdev_obj_port_vlan vlan; - - /* Do nothing if this is a software bridge */ - if (!dp->bridge) - return -EOPNOTSUPP; - - if (dsa_port_skip_vlan_configuration(dp)) { - NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN"); - return 0; - } - - vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj); - - /* Even though drivers often handle CPU membership in special ways, - * it doesn't make sense to program a PVID, so clear this flag. - */ - vlan.flags &= ~BRIDGE_VLAN_INFO_PVID; - - return dsa_port_host_vlan_add(dp, &vlan, extack); -} - -static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx, - const struct switchdev_obj *obj, - struct netlink_ext_ack *extack) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - int err; - - if (ctx && ctx != dp) - return 0; - - switch (obj->id) { - case SWITCHDEV_OBJ_ID_PORT_MDB: - if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) - return -EOPNOTSUPP; - - err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); - break; - case SWITCHDEV_OBJ_ID_HOST_MDB: - if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) - return -EOPNOTSUPP; - - err = dsa_port_bridge_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); - break; - case SWITCHDEV_OBJ_ID_PORT_VLAN: - if (dsa_port_offloads_bridge_port(dp, obj->orig_dev)) - err = dsa_slave_vlan_add(dev, obj, extack); - else - err = dsa_slave_host_vlan_add(dev, obj, extack); - break; - case SWITCHDEV_OBJ_ID_MRP: - if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) - return -EOPNOTSUPP; - - err = dsa_port_mrp_add(dp, SWITCHDEV_OBJ_MRP(obj)); - break; - case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: - if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) - return -EOPNOTSUPP; - - err = dsa_port_mrp_add_ring_role(dp, - SWITCHDEV_OBJ_RING_ROLE_MRP(obj)); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -static int dsa_slave_vlan_del(struct net_device *dev, - const struct switchdev_obj *obj) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct switchdev_obj_port_vlan *vlan; - - if (dsa_port_skip_vlan_configuration(dp)) - return 0; - - vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); - - return dsa_port_vlan_del(dp, vlan); -} - -static int dsa_slave_host_vlan_del(struct net_device *dev, - const struct switchdev_obj *obj) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct switchdev_obj_port_vlan *vlan; - - /* Do nothing if this is a software bridge */ - if (!dp->bridge) - return -EOPNOTSUPP; - - if (dsa_port_skip_vlan_configuration(dp)) - return 0; - - vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); - - return dsa_port_host_vlan_del(dp, vlan); -} - -static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx, - const struct switchdev_obj *obj) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - int err; - - if (ctx && ctx != dp) - return 0; - - switch (obj->id) { - case SWITCHDEV_OBJ_ID_PORT_MDB: - if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) - return -EOPNOTSUPP; - - err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); - break; - case SWITCHDEV_OBJ_ID_HOST_MDB: - if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) - return -EOPNOTSUPP; - - err = dsa_port_bridge_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); - break; - case SWITCHDEV_OBJ_ID_PORT_VLAN: - if (dsa_port_offloads_bridge_port(dp, obj->orig_dev)) - err = dsa_slave_vlan_del(dev, obj); - else - err = dsa_slave_host_vlan_del(dev, obj); - break; - case SWITCHDEV_OBJ_ID_MRP: - if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) - return -EOPNOTSUPP; - - err = dsa_port_mrp_del(dp, SWITCHDEV_OBJ_MRP(obj)); - break; - case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: - if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) - return -EOPNOTSUPP; - - err = dsa_port_mrp_del_ring_role(dp, - SWITCHDEV_OBJ_RING_ROLE_MRP(obj)); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev, - struct sk_buff *skb) -{ -#ifdef CONFIG_NET_POLL_CONTROLLER - struct dsa_slave_priv *p = netdev_priv(dev); - - return netpoll_send_skb(p->netpoll, skb); -#else - BUG(); - return NETDEV_TX_OK; -#endif -} - -static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p, - struct sk_buff *skb) -{ - struct dsa_switch *ds = p->dp->ds; - - if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) - return; - - if (!ds->ops->port_txtstamp) - return; - - ds->ops->port_txtstamp(ds, p->dp->index, skb); -} - -netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev) -{ - /* SKB for netpoll still need to be mangled with the protocol-specific - * tag to be successfully transmitted - */ - if (unlikely(netpoll_tx_running(dev))) - return dsa_slave_netpoll_send_skb(dev, skb); - - /* Queue the SKB for transmission on the parent interface, but - * do not modify its EtherType - */ - skb->dev = dsa_slave_to_master(dev); - dev_queue_xmit(skb); - - return NETDEV_TX_OK; -} -EXPORT_SYMBOL_GPL(dsa_enqueue_skb); - -static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev) -{ - int needed_headroom = dev->needed_headroom; - int needed_tailroom = dev->needed_tailroom; - - /* For tail taggers, we need to pad short frames ourselves, to ensure - * that the tail tag does not fail at its role of being at the end of - * the packet, once the master interface pads the frame. Account for - * that pad length here, and pad later. - */ - if (unlikely(needed_tailroom && skb->len < ETH_ZLEN)) - needed_tailroom += ETH_ZLEN - skb->len; - /* skb_headroom() returns unsigned int... */ - needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0); - needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0); - - if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb))) - /* No reallocation needed, yay! */ - return 0; - - return pskb_expand_head(skb, needed_headroom, needed_tailroom, - GFP_ATOMIC); -} - -static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct sk_buff *nskb; - - dev_sw_netstats_tx_add(dev, 1, skb->len); - - memset(skb->cb, 0, sizeof(skb->cb)); - - /* Handle tx timestamp if any */ - dsa_skb_tx_timestamp(p, skb); - - if (dsa_realloc_skb(skb, dev)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - - /* needed_tailroom should still be 'warm' in the cache line from - * dsa_realloc_skb(), which has also ensured that padding is safe. - */ - if (dev->needed_tailroom) - eth_skb_pad(skb); - - /* Transmit function may have to reallocate the original SKB, - * in which case it must have freed it. Only free it here on error. - */ - nskb = p->xmit(skb, dev); - if (!nskb) { - kfree_skb(skb); - return NETDEV_TX_OK; - } - - return dsa_enqueue_skb(nskb, dev); -} - -/* ethtool operations *******************************************************/ - -static void dsa_slave_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *drvinfo) -{ - strscpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver)); - strscpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version)); - strscpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info)); -} - -static int dsa_slave_get_regs_len(struct net_device *dev) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_regs_len) - return ds->ops->get_regs_len(ds, dp->index); - - return -EOPNOTSUPP; -} - -static void -dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_regs) - ds->ops->get_regs(ds, dp->index, regs, _p); -} - -static int dsa_slave_nway_reset(struct net_device *dev) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - return phylink_ethtool_nway_reset(dp->pl); -} - -static int dsa_slave_get_eeprom_len(struct net_device *dev) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->cd && ds->cd->eeprom_len) - return ds->cd->eeprom_len; - - if (ds->ops->get_eeprom_len) - return ds->ops->get_eeprom_len(ds); - - return 0; -} - -static int dsa_slave_get_eeprom(struct net_device *dev, - struct ethtool_eeprom *eeprom, u8 *data) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_eeprom) - return ds->ops->get_eeprom(ds, eeprom, data); - - return -EOPNOTSUPP; -} - -static int dsa_slave_set_eeprom(struct net_device *dev, - struct ethtool_eeprom *eeprom, u8 *data) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->set_eeprom) - return ds->ops->set_eeprom(ds, eeprom, data); - - return -EOPNOTSUPP; -} - -static void dsa_slave_get_strings(struct net_device *dev, - uint32_t stringset, uint8_t *data) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (stringset == ETH_SS_STATS) { - int len = ETH_GSTRING_LEN; - - strscpy_pad(data, "tx_packets", len); - strscpy_pad(data + len, "tx_bytes", len); - strscpy_pad(data + 2 * len, "rx_packets", len); - strscpy_pad(data + 3 * len, "rx_bytes", len); - if (ds->ops->get_strings) - ds->ops->get_strings(ds, dp->index, stringset, - data + 4 * len); - } else if (stringset == ETH_SS_TEST) { - net_selftest_get_strings(data); - } - -} - -static void dsa_slave_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, - uint64_t *data) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - struct pcpu_sw_netstats *s; - unsigned int start; - int i; - - for_each_possible_cpu(i) { - u64 tx_packets, tx_bytes, rx_packets, rx_bytes; - - s = per_cpu_ptr(dev->tstats, i); - do { - start = u64_stats_fetch_begin(&s->syncp); - tx_packets = u64_stats_read(&s->tx_packets); - tx_bytes = u64_stats_read(&s->tx_bytes); - rx_packets = u64_stats_read(&s->rx_packets); - rx_bytes = u64_stats_read(&s->rx_bytes); - } while (u64_stats_fetch_retry(&s->syncp, start)); - data[0] += tx_packets; - data[1] += tx_bytes; - data[2] += rx_packets; - data[3] += rx_bytes; - } - if (ds->ops->get_ethtool_stats) - ds->ops->get_ethtool_stats(ds, dp->index, data + 4); -} - -static int dsa_slave_get_sset_count(struct net_device *dev, int sset) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (sset == ETH_SS_STATS) { - int count = 0; - - if (ds->ops->get_sset_count) { - count = ds->ops->get_sset_count(ds, dp->index, sset); - if (count < 0) - return count; - } - - return count + 4; - } else if (sset == ETH_SS_TEST) { - return net_selftest_get_count(); - } - - return -EOPNOTSUPP; -} - -static void dsa_slave_get_eth_phy_stats(struct net_device *dev, - struct ethtool_eth_phy_stats *phy_stats) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_eth_phy_stats) - ds->ops->get_eth_phy_stats(ds, dp->index, phy_stats); -} - -static void dsa_slave_get_eth_mac_stats(struct net_device *dev, - struct ethtool_eth_mac_stats *mac_stats) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_eth_mac_stats) - ds->ops->get_eth_mac_stats(ds, dp->index, mac_stats); -} - -static void -dsa_slave_get_eth_ctrl_stats(struct net_device *dev, - struct ethtool_eth_ctrl_stats *ctrl_stats) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_eth_ctrl_stats) - ds->ops->get_eth_ctrl_stats(ds, dp->index, ctrl_stats); -} - -static void -dsa_slave_get_rmon_stats(struct net_device *dev, - struct ethtool_rmon_stats *rmon_stats, - const struct ethtool_rmon_hist_range **ranges) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_rmon_stats) - ds->ops->get_rmon_stats(ds, dp->index, rmon_stats, ranges); -} - -static void dsa_slave_net_selftest(struct net_device *ndev, - struct ethtool_test *etest, u64 *buf) -{ - struct dsa_port *dp = dsa_slave_to_port(ndev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->self_test) { - ds->ops->self_test(ds, dp->index, etest, buf); - return; - } - - net_selftest(ndev, etest, buf); -} - -static int dsa_slave_get_mm(struct net_device *dev, - struct ethtool_mm_state *state) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (!ds->ops->get_mm) - return -EOPNOTSUPP; - - return ds->ops->get_mm(ds, dp->index, state); -} - -static int dsa_slave_set_mm(struct net_device *dev, struct ethtool_mm_cfg *cfg, - struct netlink_ext_ack *extack) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (!ds->ops->set_mm) - return -EOPNOTSUPP; - - return ds->ops->set_mm(ds, dp->index, cfg, extack); -} - -static void dsa_slave_get_mm_stats(struct net_device *dev, - struct ethtool_mm_stats *stats) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_mm_stats) - ds->ops->get_mm_stats(ds, dp->index, stats); -} - -static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - phylink_ethtool_get_wol(dp->pl, w); - - if (ds->ops->get_wol) - ds->ops->get_wol(ds, dp->index, w); -} - -static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - int ret = -EOPNOTSUPP; - - phylink_ethtool_set_wol(dp->pl, w); - - if (ds->ops->set_wol) - ret = ds->ops->set_wol(ds, dp->index, w); - - return ret; -} - -static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - int ret; - - /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev || !dp->pl) - return -ENODEV; - - if (!ds->ops->set_mac_eee) - return -EOPNOTSUPP; - - ret = ds->ops->set_mac_eee(ds, dp->index, e); - if (ret) - return ret; - - return phylink_ethtool_set_eee(dp->pl, e); -} - -static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - int ret; - - /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev || !dp->pl) - return -ENODEV; - - if (!ds->ops->get_mac_eee) - return -EOPNOTSUPP; - - ret = ds->ops->get_mac_eee(ds, dp->index, e); - if (ret) - return ret; - - return phylink_ethtool_get_eee(dp->pl, e); -} - -static int dsa_slave_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - return phylink_ethtool_ksettings_get(dp->pl, cmd); -} - -static int dsa_slave_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - return phylink_ethtool_ksettings_set(dp->pl, cmd); -} - -static void dsa_slave_get_pause_stats(struct net_device *dev, - struct ethtool_pause_stats *pause_stats) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_pause_stats) - ds->ops->get_pause_stats(ds, dp->index, pause_stats); -} - -static void dsa_slave_get_pauseparam(struct net_device *dev, - struct ethtool_pauseparam *pause) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - phylink_ethtool_get_pauseparam(dp->pl, pause); -} - -static int dsa_slave_set_pauseparam(struct net_device *dev, - struct ethtool_pauseparam *pause) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - return phylink_ethtool_set_pauseparam(dp->pl, pause); -} - -#ifdef CONFIG_NET_POLL_CONTROLLER -static int dsa_slave_netpoll_setup(struct net_device *dev, - struct netpoll_info *ni) -{ - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_slave_priv *p = netdev_priv(dev); - struct netpoll *netpoll; - int err = 0; - - netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); - if (!netpoll) - return -ENOMEM; - - err = __netpoll_setup(netpoll, master); - if (err) { - kfree(netpoll); - goto out; - } - - p->netpoll = netpoll; -out: - return err; -} - -static void dsa_slave_netpoll_cleanup(struct net_device *dev) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct netpoll *netpoll = p->netpoll; - - if (!netpoll) - return; - - p->netpoll = NULL; - - __netpoll_free(netpoll); -} - -static void dsa_slave_poll_controller(struct net_device *dev) -{ -} -#endif - -static struct dsa_mall_tc_entry * -dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_mall_tc_entry *mall_tc_entry; - - list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) - if (mall_tc_entry->cookie == cookie) - return mall_tc_entry; - - return NULL; -} - -static int -dsa_slave_add_cls_matchall_mirred(struct net_device *dev, - struct tc_cls_matchall_offload *cls, - bool ingress) -{ - struct netlink_ext_ack *extack = cls->common.extack; - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_mall_mirror_tc_entry *mirror; - struct dsa_mall_tc_entry *mall_tc_entry; - struct dsa_switch *ds = dp->ds; - struct flow_action_entry *act; - struct dsa_port *to_dp; - int err; - - if (!ds->ops->port_mirror_add) - return -EOPNOTSUPP; - - if (!flow_action_basic_hw_stats_check(&cls->rule->action, - cls->common.extack)) - return -EOPNOTSUPP; - - act = &cls->rule->action.entries[0]; - - if (!act->dev) - return -EINVAL; - - if (!dsa_slave_dev_check(act->dev)) - return -EOPNOTSUPP; - - mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); - if (!mall_tc_entry) - return -ENOMEM; - - mall_tc_entry->cookie = cls->cookie; - mall_tc_entry->type = DSA_PORT_MALL_MIRROR; - mirror = &mall_tc_entry->mirror; - - to_dp = dsa_slave_to_port(act->dev); - - mirror->to_local_port = to_dp->index; - mirror->ingress = ingress; - - err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress, extack); - if (err) { - kfree(mall_tc_entry); - return err; - } - - list_add_tail(&mall_tc_entry->list, &p->mall_tc_list); - - return err; -} - -static int -dsa_slave_add_cls_matchall_police(struct net_device *dev, - struct tc_cls_matchall_offload *cls, - bool ingress) -{ - struct netlink_ext_ack *extack = cls->common.extack; - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_mall_policer_tc_entry *policer; - struct dsa_mall_tc_entry *mall_tc_entry; - struct dsa_switch *ds = dp->ds; - struct flow_action_entry *act; - int err; - - if (!ds->ops->port_policer_add) { - NL_SET_ERR_MSG_MOD(extack, - "Policing offload not implemented"); - return -EOPNOTSUPP; - } - - if (!ingress) { - NL_SET_ERR_MSG_MOD(extack, - "Only supported on ingress qdisc"); - return -EOPNOTSUPP; - } - - if (!flow_action_basic_hw_stats_check(&cls->rule->action, - cls->common.extack)) - return -EOPNOTSUPP; - - list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) { - if (mall_tc_entry->type == DSA_PORT_MALL_POLICER) { - NL_SET_ERR_MSG_MOD(extack, - "Only one port policer allowed"); - return -EEXIST; - } - } - - act = &cls->rule->action.entries[0]; - - mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); - if (!mall_tc_entry) - return -ENOMEM; - - mall_tc_entry->cookie = cls->cookie; - mall_tc_entry->type = DSA_PORT_MALL_POLICER; - policer = &mall_tc_entry->policer; - policer->rate_bytes_per_sec = act->police.rate_bytes_ps; - policer->burst = act->police.burst; - - err = ds->ops->port_policer_add(ds, dp->index, policer); - if (err) { - kfree(mall_tc_entry); - return err; - } - - list_add_tail(&mall_tc_entry->list, &p->mall_tc_list); - - return err; -} - -static int dsa_slave_add_cls_matchall(struct net_device *dev, - struct tc_cls_matchall_offload *cls, - bool ingress) -{ - int err = -EOPNOTSUPP; - - if (cls->common.protocol == htons(ETH_P_ALL) && - flow_offload_has_one_action(&cls->rule->action) && - cls->rule->action.entries[0].id == FLOW_ACTION_MIRRED) - err = dsa_slave_add_cls_matchall_mirred(dev, cls, ingress); - else if (flow_offload_has_one_action(&cls->rule->action) && - cls->rule->action.entries[0].id == FLOW_ACTION_POLICE) - err = dsa_slave_add_cls_matchall_police(dev, cls, ingress); - - return err; -} - -static void dsa_slave_del_cls_matchall(struct net_device *dev, - struct tc_cls_matchall_offload *cls) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_mall_tc_entry *mall_tc_entry; - struct dsa_switch *ds = dp->ds; - - mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie); - if (!mall_tc_entry) - return; - - list_del(&mall_tc_entry->list); - - switch (mall_tc_entry->type) { - case DSA_PORT_MALL_MIRROR: - if (ds->ops->port_mirror_del) - ds->ops->port_mirror_del(ds, dp->index, - &mall_tc_entry->mirror); - break; - case DSA_PORT_MALL_POLICER: - if (ds->ops->port_policer_del) - ds->ops->port_policer_del(ds, dp->index); - break; - default: - WARN_ON(1); - } - - kfree(mall_tc_entry); -} - -static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev, - struct tc_cls_matchall_offload *cls, - bool ingress) -{ - if (cls->common.chain_index) - return -EOPNOTSUPP; - - switch (cls->command) { - case TC_CLSMATCHALL_REPLACE: - return dsa_slave_add_cls_matchall(dev, cls, ingress); - case TC_CLSMATCHALL_DESTROY: - dsa_slave_del_cls_matchall(dev, cls); - return 0; - default: - return -EOPNOTSUPP; - } -} - -static int dsa_slave_add_cls_flower(struct net_device *dev, - struct flow_cls_offload *cls, - bool ingress) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - int port = dp->index; - - if (!ds->ops->cls_flower_add) - return -EOPNOTSUPP; - - return ds->ops->cls_flower_add(ds, port, cls, ingress); -} - -static int dsa_slave_del_cls_flower(struct net_device *dev, - struct flow_cls_offload *cls, - bool ingress) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - int port = dp->index; - - if (!ds->ops->cls_flower_del) - return -EOPNOTSUPP; - - return ds->ops->cls_flower_del(ds, port, cls, ingress); -} - -static int dsa_slave_stats_cls_flower(struct net_device *dev, - struct flow_cls_offload *cls, - bool ingress) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - int port = dp->index; - - if (!ds->ops->cls_flower_stats) - return -EOPNOTSUPP; - - return ds->ops->cls_flower_stats(ds, port, cls, ingress); -} - -static int dsa_slave_setup_tc_cls_flower(struct net_device *dev, - struct flow_cls_offload *cls, - bool ingress) -{ - switch (cls->command) { - case FLOW_CLS_REPLACE: - return dsa_slave_add_cls_flower(dev, cls, ingress); - case FLOW_CLS_DESTROY: - return dsa_slave_del_cls_flower(dev, cls, ingress); - case FLOW_CLS_STATS: - return dsa_slave_stats_cls_flower(dev, cls, ingress); - default: - return -EOPNOTSUPP; - } -} - -static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data, - void *cb_priv, bool ingress) -{ - struct net_device *dev = cb_priv; - - if (!tc_can_offload(dev)) - return -EOPNOTSUPP; - - switch (type) { - case TC_SETUP_CLSMATCHALL: - return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress); - case TC_SETUP_CLSFLOWER: - return dsa_slave_setup_tc_cls_flower(dev, type_data, ingress); - default: - return -EOPNOTSUPP; - } -} - -static int dsa_slave_setup_tc_block_cb_ig(enum tc_setup_type type, - void *type_data, void *cb_priv) -{ - return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, true); -} - -static int dsa_slave_setup_tc_block_cb_eg(enum tc_setup_type type, - void *type_data, void *cb_priv) -{ - return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, false); -} - -static LIST_HEAD(dsa_slave_block_cb_list); - -static int dsa_slave_setup_tc_block(struct net_device *dev, - struct flow_block_offload *f) -{ - struct flow_block_cb *block_cb; - flow_setup_cb_t *cb; - - if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - cb = dsa_slave_setup_tc_block_cb_ig; - else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) - cb = dsa_slave_setup_tc_block_cb_eg; - else - return -EOPNOTSUPP; - - f->driver_block_list = &dsa_slave_block_cb_list; - - switch (f->command) { - case FLOW_BLOCK_BIND: - if (flow_block_cb_is_busy(cb, dev, &dsa_slave_block_cb_list)) - return -EBUSY; - - block_cb = flow_block_cb_alloc(cb, dev, dev, NULL); - if (IS_ERR(block_cb)) - return PTR_ERR(block_cb); - - flow_block_cb_add(block_cb, f); - list_add_tail(&block_cb->driver_list, &dsa_slave_block_cb_list); - return 0; - case FLOW_BLOCK_UNBIND: - block_cb = flow_block_cb_lookup(f->block, cb, dev); - if (!block_cb) - return -ENOENT; - - flow_block_cb_remove(block_cb, f); - list_del(&block_cb->driver_list); - return 0; - default: - return -EOPNOTSUPP; - } -} - -static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port, - void *type_data) -{ - struct net_device *master = dsa_port_to_master(dsa_to_port(ds, port)); - - if (!master->netdev_ops->ndo_setup_tc) - return -EOPNOTSUPP; - - return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data); -} - -static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - switch (type) { - case TC_SETUP_BLOCK: - return dsa_slave_setup_tc_block(dev, type_data); - case TC_SETUP_FT: - return dsa_slave_setup_ft_block(ds, dp->index, type_data); - default: - break; - } - - if (!ds->ops->port_setup_tc) - return -EOPNOTSUPP; - - return ds->ops->port_setup_tc(ds, dp->index, type, type_data); -} - -static int dsa_slave_get_rxnfc(struct net_device *dev, - struct ethtool_rxnfc *nfc, u32 *rule_locs) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (!ds->ops->get_rxnfc) - return -EOPNOTSUPP; - - return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs); -} - -static int dsa_slave_set_rxnfc(struct net_device *dev, - struct ethtool_rxnfc *nfc) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (!ds->ops->set_rxnfc) - return -EOPNOTSUPP; - - return ds->ops->set_rxnfc(ds, dp->index, nfc); -} - -static int dsa_slave_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *ts) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - if (!ds->ops->get_ts_info) - return -EOPNOTSUPP; - - return ds->ops->get_ts_info(ds, p->dp->index, ts); -} - -static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, - u16 vid) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct switchdev_obj_port_vlan vlan = { - .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - .vid = vid, - /* This API only allows programming tagged, non-PVID VIDs */ - .flags = 0, - }; - struct netlink_ext_ack extack = {0}; - struct dsa_switch *ds = dp->ds; - struct netdev_hw_addr *ha; - struct dsa_vlan *v; - int ret; - - /* User port... */ - ret = dsa_port_vlan_add(dp, &vlan, &extack); - if (ret) { - if (extack._msg) - netdev_err(dev, "%s\n", extack._msg); - return ret; - } - - /* And CPU port... */ - ret = dsa_port_host_vlan_add(dp, &vlan, &extack); - if (ret) { - if (extack._msg) - netdev_err(dev, "CPU port %d: %s\n", dp->cpu_dp->index, - extack._msg); - return ret; - } - - if (!dsa_switch_supports_uc_filtering(ds) && - !dsa_switch_supports_mc_filtering(ds)) - return 0; - - v = kzalloc(sizeof(*v), GFP_KERNEL); - if (!v) { - ret = -ENOMEM; - goto rollback; - } - - netif_addr_lock_bh(dev); - - v->vid = vid; - list_add_tail(&v->list, &dp->user_vlans); - - if (dsa_switch_supports_mc_filtering(ds)) { - netdev_for_each_synced_mc_addr(ha, dev) { - dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, - ha->addr, vid); - } - } - - if (dsa_switch_supports_uc_filtering(ds)) { - netdev_for_each_synced_uc_addr(ha, dev) { - dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, - ha->addr, vid); - } - } - - netif_addr_unlock_bh(dev); - - dsa_flush_workqueue(); - - return 0; - -rollback: - dsa_port_host_vlan_del(dp, &vlan); - dsa_port_vlan_del(dp, &vlan); - - return ret; -} - -static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, - u16 vid) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct switchdev_obj_port_vlan vlan = { - .vid = vid, - /* This API only allows programming tagged, non-PVID VIDs */ - .flags = 0, - }; - struct dsa_switch *ds = dp->ds; - struct netdev_hw_addr *ha; - struct dsa_vlan *v; - int err; - - err = dsa_port_vlan_del(dp, &vlan); - if (err) - return err; - - err = dsa_port_host_vlan_del(dp, &vlan); - if (err) - return err; - - if (!dsa_switch_supports_uc_filtering(ds) && - !dsa_switch_supports_mc_filtering(ds)) - return 0; - - netif_addr_lock_bh(dev); - - v = dsa_vlan_find(&dp->user_vlans, &vlan); - if (!v) { - netif_addr_unlock_bh(dev); - return -ENOENT; - } - - list_del(&v->list); - kfree(v); - - if (dsa_switch_supports_mc_filtering(ds)) { - netdev_for_each_synced_mc_addr(ha, dev) { - dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, - ha->addr, vid); - } - } - - if (dsa_switch_supports_uc_filtering(ds)) { - netdev_for_each_synced_uc_addr(ha, dev) { - dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, - ha->addr, vid); - } - } - - netif_addr_unlock_bh(dev); - - dsa_flush_workqueue(); - - return 0; -} - -static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg) -{ - __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q); - - return dsa_slave_vlan_rx_add_vid(arg, proto, vid); -} - -static int dsa_slave_clear_vlan(struct net_device *vdev, int vid, void *arg) -{ - __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q); - - return dsa_slave_vlan_rx_kill_vid(arg, proto, vid); -} - -/* Keep the VLAN RX filtering list in sync with the hardware only if VLAN - * filtering is enabled. The baseline is that only ports that offload a - * VLAN-aware bridge are VLAN-aware, and standalone ports are VLAN-unaware, - * but there are exceptions for quirky hardware. - * - * If ds->vlan_filtering_is_global = true, then standalone ports which share - * the same switch with other ports that offload a VLAN-aware bridge are also - * inevitably VLAN-aware. - * - * To summarize, a DSA switch port offloads: - * - * - If standalone (this includes software bridge, software LAG): - * - if ds->needs_standalone_vlan_filtering = true, OR if - * (ds->vlan_filtering_is_global = true AND there are bridges spanning - * this switch chip which have vlan_filtering=1) - * - the 8021q upper VLANs - * - else (standalone VLAN filtering is not needed, VLAN filtering is not - * global, or it is, but no port is under a VLAN-aware bridge): - * - no VLAN (any 8021q upper is a software VLAN) - * - * - If under a vlan_filtering=0 bridge which it offload: - * - if ds->configure_vlan_while_not_filtering = true (default): - * - the bridge VLANs. These VLANs are committed to hardware but inactive. - * - else (deprecated): - * - no VLAN. The bridge VLANs are not restored when VLAN awareness is - * enabled, so this behavior is broken and discouraged. - * - * - If under a vlan_filtering=1 bridge which it offload: - * - the bridge VLANs - * - the 8021q upper VLANs - */ -int dsa_slave_manage_vlan_filtering(struct net_device *slave, - bool vlan_filtering) -{ - int err; - - if (vlan_filtering) { - slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; - - err = vlan_for_each(slave, dsa_slave_restore_vlan, slave); - if (err) { - vlan_for_each(slave, dsa_slave_clear_vlan, slave); - slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; - return err; - } - } else { - err = vlan_for_each(slave, dsa_slave_clear_vlan, slave); - if (err) - return err; - - slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; - } - - return 0; -} - -struct dsa_hw_port { - struct list_head list; - struct net_device *dev; - int old_mtu; -}; - -static int dsa_hw_port_list_set_mtu(struct list_head *hw_port_list, int mtu) -{ - const struct dsa_hw_port *p; - int err; - - list_for_each_entry(p, hw_port_list, list) { - if (p->dev->mtu == mtu) - continue; - - err = dev_set_mtu(p->dev, mtu); - if (err) - goto rollback; - } - - return 0; - -rollback: - list_for_each_entry_continue_reverse(p, hw_port_list, list) { - if (p->dev->mtu == p->old_mtu) - continue; - - if (dev_set_mtu(p->dev, p->old_mtu)) - netdev_err(p->dev, "Failed to restore MTU\n"); - } - - return err; -} - -static void dsa_hw_port_list_free(struct list_head *hw_port_list) -{ - struct dsa_hw_port *p, *n; - - list_for_each_entry_safe(p, n, hw_port_list, list) - kfree(p); -} - -/* Make the hardware datapath to/from @dev limited to a common MTU */ -static void dsa_bridge_mtu_normalization(struct dsa_port *dp) -{ - struct list_head hw_port_list; - struct dsa_switch_tree *dst; - int min_mtu = ETH_MAX_MTU; - struct dsa_port *other_dp; - int err; - - if (!dp->ds->mtu_enforcement_ingress) - return; - - if (!dp->bridge) - return; - - INIT_LIST_HEAD(&hw_port_list); - - /* Populate the list of ports that are part of the same bridge - * as the newly added/modified port - */ - list_for_each_entry(dst, &dsa_tree_list, list) { - list_for_each_entry(other_dp, &dst->ports, list) { - struct dsa_hw_port *hw_port; - struct net_device *slave; - - if (other_dp->type != DSA_PORT_TYPE_USER) - continue; - - if (!dsa_port_bridge_same(dp, other_dp)) - continue; - - if (!other_dp->ds->mtu_enforcement_ingress) - continue; - - slave = other_dp->slave; - - if (min_mtu > slave->mtu) - min_mtu = slave->mtu; - - hw_port = kzalloc(sizeof(*hw_port), GFP_KERNEL); - if (!hw_port) - goto out; - - hw_port->dev = slave; - hw_port->old_mtu = slave->mtu; - - list_add(&hw_port->list, &hw_port_list); - } - } - - /* Attempt to configure the entire hardware bridge to the newly added - * interface's MTU first, regardless of whether the intention of the - * user was to raise or lower it. - */ - err = dsa_hw_port_list_set_mtu(&hw_port_list, dp->slave->mtu); - if (!err) - goto out; - - /* Clearly that didn't work out so well, so just set the minimum MTU on - * all hardware bridge ports now. If this fails too, then all ports will - * still have their old MTU rolled back anyway. - */ - dsa_hw_port_list_set_mtu(&hw_port_list, min_mtu); - -out: - dsa_hw_port_list_free(&hw_port_list); -} - -int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) -{ - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_port *cpu_dp = dp->cpu_dp; - struct dsa_switch *ds = dp->ds; - struct dsa_port *other_dp; - int largest_mtu = 0; - int new_master_mtu; - int old_master_mtu; - int mtu_limit; - int overhead; - int cpu_mtu; - int err; - - if (!ds->ops->port_change_mtu) - return -EOPNOTSUPP; - - dsa_tree_for_each_user_port(other_dp, ds->dst) { - int slave_mtu; - - /* During probe, this function will be called for each slave - * device, while not all of them have been allocated. That's - * ok, it doesn't change what the maximum is, so ignore it. - */ - if (!other_dp->slave) - continue; - - /* Pretend that we already applied the setting, which we - * actually haven't (still haven't done all integrity checks) - */ - if (dp == other_dp) - slave_mtu = new_mtu; - else - slave_mtu = other_dp->slave->mtu; - - if (largest_mtu < slave_mtu) - largest_mtu = slave_mtu; - } - - overhead = dsa_tag_protocol_overhead(cpu_dp->tag_ops); - mtu_limit = min_t(int, master->max_mtu, dev->max_mtu + overhead); - old_master_mtu = master->mtu; - new_master_mtu = largest_mtu + overhead; - if (new_master_mtu > mtu_limit) - return -ERANGE; - - /* If the master MTU isn't over limit, there's no need to check the CPU - * MTU, since that surely isn't either. - */ - cpu_mtu = largest_mtu; - - /* Start applying stuff */ - if (new_master_mtu != old_master_mtu) { - err = dev_set_mtu(master, new_master_mtu); - if (err < 0) - goto out_master_failed; - - /* We only need to propagate the MTU of the CPU port to - * upstream switches, so emit a notifier which updates them. - */ - err = dsa_port_mtu_change(cpu_dp, cpu_mtu); - if (err) - goto out_cpu_failed; - } - - err = ds->ops->port_change_mtu(ds, dp->index, new_mtu); - if (err) - goto out_port_failed; - - dev->mtu = new_mtu; - - dsa_bridge_mtu_normalization(dp); - - return 0; - -out_port_failed: - if (new_master_mtu != old_master_mtu) - dsa_port_mtu_change(cpu_dp, old_master_mtu - overhead); -out_cpu_failed: - if (new_master_mtu != old_master_mtu) - dev_set_mtu(master, old_master_mtu); -out_master_failed: - return err; -} - -static int __maybe_unused -dsa_slave_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - unsigned long mask, new_prio; - int err, port = dp->index; - - if (!ds->ops->port_set_default_prio) - return -EOPNOTSUPP; - - err = dcb_ieee_setapp(dev, app); - if (err) - return err; - - mask = dcb_ieee_getapp_mask(dev, app); - new_prio = __fls(mask); - - err = ds->ops->port_set_default_prio(ds, port, new_prio); - if (err) { - dcb_ieee_delapp(dev, app); - return err; - } - - return 0; -} - -static int __maybe_unused -dsa_slave_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - unsigned long mask, new_prio; - int err, port = dp->index; - u8 dscp = app->protocol; - - if (!ds->ops->port_add_dscp_prio) - return -EOPNOTSUPP; - - if (dscp >= 64) { - netdev_err(dev, "DSCP APP entry with protocol value %u is invalid\n", - dscp); - return -EINVAL; - } - - err = dcb_ieee_setapp(dev, app); - if (err) - return err; - - mask = dcb_ieee_getapp_mask(dev, app); - new_prio = __fls(mask); - - err = ds->ops->port_add_dscp_prio(ds, port, dscp, new_prio); - if (err) { - dcb_ieee_delapp(dev, app); - return err; - } - - return 0; -} - -static int __maybe_unused dsa_slave_dcbnl_ieee_setapp(struct net_device *dev, - struct dcb_app *app) -{ - switch (app->selector) { - case IEEE_8021QAZ_APP_SEL_ETHERTYPE: - switch (app->protocol) { - case 0: - return dsa_slave_dcbnl_set_default_prio(dev, app); - default: - return -EOPNOTSUPP; - } - break; - case IEEE_8021QAZ_APP_SEL_DSCP: - return dsa_slave_dcbnl_add_dscp_prio(dev, app); - default: - return -EOPNOTSUPP; - } -} - -static int __maybe_unused -dsa_slave_dcbnl_del_default_prio(struct net_device *dev, struct dcb_app *app) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - unsigned long mask, new_prio; - int err, port = dp->index; - - if (!ds->ops->port_set_default_prio) - return -EOPNOTSUPP; - - err = dcb_ieee_delapp(dev, app); - if (err) - return err; - - mask = dcb_ieee_getapp_mask(dev, app); - new_prio = mask ? __fls(mask) : 0; - - err = ds->ops->port_set_default_prio(ds, port, new_prio); - if (err) { - dcb_ieee_setapp(dev, app); - return err; - } - - return 0; -} - -static int __maybe_unused -dsa_slave_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - int err, port = dp->index; - u8 dscp = app->protocol; - - if (!ds->ops->port_del_dscp_prio) - return -EOPNOTSUPP; - - err = dcb_ieee_delapp(dev, app); - if (err) - return err; - - err = ds->ops->port_del_dscp_prio(ds, port, dscp, app->priority); - if (err) { - dcb_ieee_setapp(dev, app); - return err; - } - - return 0; -} - -static int __maybe_unused dsa_slave_dcbnl_ieee_delapp(struct net_device *dev, - struct dcb_app *app) -{ - switch (app->selector) { - case IEEE_8021QAZ_APP_SEL_ETHERTYPE: - switch (app->protocol) { - case 0: - return dsa_slave_dcbnl_del_default_prio(dev, app); - default: - return -EOPNOTSUPP; - } - break; - case IEEE_8021QAZ_APP_SEL_DSCP: - return dsa_slave_dcbnl_del_dscp_prio(dev, app); - default: - return -EOPNOTSUPP; - } -} - -/* Pre-populate the DCB application priority table with the priorities - * configured during switch setup, which we read from hardware here. - */ -static int dsa_slave_dcbnl_init(struct net_device *dev) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - int port = dp->index; - int err; - - if (ds->ops->port_get_default_prio) { - int prio = ds->ops->port_get_default_prio(ds, port); - struct dcb_app app = { - .selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE, - .protocol = 0, - .priority = prio, - }; - - if (prio < 0) - return prio; - - err = dcb_ieee_setapp(dev, &app); - if (err) - return err; - } - - if (ds->ops->port_get_dscp_prio) { - int protocol; - - for (protocol = 0; protocol < 64; protocol++) { - struct dcb_app app = { - .selector = IEEE_8021QAZ_APP_SEL_DSCP, - .protocol = protocol, - }; - int prio; - - prio = ds->ops->port_get_dscp_prio(ds, port, protocol); - if (prio == -EOPNOTSUPP) - continue; - if (prio < 0) - return prio; - - app.priority = prio; - - err = dcb_ieee_setapp(dev, &app); - if (err) - return err; - } - } - - return 0; -} - -static const struct ethtool_ops dsa_slave_ethtool_ops = { - .get_drvinfo = dsa_slave_get_drvinfo, - .get_regs_len = dsa_slave_get_regs_len, - .get_regs = dsa_slave_get_regs, - .nway_reset = dsa_slave_nway_reset, - .get_link = ethtool_op_get_link, - .get_eeprom_len = dsa_slave_get_eeprom_len, - .get_eeprom = dsa_slave_get_eeprom, - .set_eeprom = dsa_slave_set_eeprom, - .get_strings = dsa_slave_get_strings, - .get_ethtool_stats = dsa_slave_get_ethtool_stats, - .get_sset_count = dsa_slave_get_sset_count, - .get_eth_phy_stats = dsa_slave_get_eth_phy_stats, - .get_eth_mac_stats = dsa_slave_get_eth_mac_stats, - .get_eth_ctrl_stats = dsa_slave_get_eth_ctrl_stats, - .get_rmon_stats = dsa_slave_get_rmon_stats, - .set_wol = dsa_slave_set_wol, - .get_wol = dsa_slave_get_wol, - .set_eee = dsa_slave_set_eee, - .get_eee = dsa_slave_get_eee, - .get_link_ksettings = dsa_slave_get_link_ksettings, - .set_link_ksettings = dsa_slave_set_link_ksettings, - .get_pause_stats = dsa_slave_get_pause_stats, - .get_pauseparam = dsa_slave_get_pauseparam, - .set_pauseparam = dsa_slave_set_pauseparam, - .get_rxnfc = dsa_slave_get_rxnfc, - .set_rxnfc = dsa_slave_set_rxnfc, - .get_ts_info = dsa_slave_get_ts_info, - .self_test = dsa_slave_net_selftest, - .get_mm = dsa_slave_get_mm, - .set_mm = dsa_slave_set_mm, - .get_mm_stats = dsa_slave_get_mm_stats, -}; - -static const struct dcbnl_rtnl_ops __maybe_unused dsa_slave_dcbnl_ops = { - .ieee_setapp = dsa_slave_dcbnl_ieee_setapp, - .ieee_delapp = dsa_slave_dcbnl_ieee_delapp, -}; - -static void dsa_slave_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *s) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->get_stats64) - ds->ops->get_stats64(ds, dp->index, s); - else - dev_get_tstats64(dev, s); -} - -static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx, - struct net_device_path *path) -{ - struct dsa_port *dp = dsa_slave_to_port(ctx->dev); - struct net_device *master = dsa_port_to_master(dp); - struct dsa_port *cpu_dp = dp->cpu_dp; - - path->dev = ctx->dev; - path->type = DEV_PATH_DSA; - path->dsa.proto = cpu_dp->tag_ops->proto; - path->dsa.port = dp->index; - ctx->dev = master; - - return 0; -} - -static const struct net_device_ops dsa_slave_netdev_ops = { - .ndo_open = dsa_slave_open, - .ndo_stop = dsa_slave_close, - .ndo_start_xmit = dsa_slave_xmit, - .ndo_change_rx_flags = dsa_slave_change_rx_flags, - .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_fdb_dump = dsa_slave_fdb_dump, - .ndo_eth_ioctl = dsa_slave_ioctl, - .ndo_get_iflink = dsa_slave_get_iflink, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_netpoll_setup = dsa_slave_netpoll_setup, - .ndo_netpoll_cleanup = dsa_slave_netpoll_cleanup, - .ndo_poll_controller = dsa_slave_poll_controller, -#endif - .ndo_setup_tc = dsa_slave_setup_tc, - .ndo_get_stats64 = dsa_slave_get_stats64, - .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid, - .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid, - .ndo_change_mtu = dsa_slave_change_mtu, - .ndo_fill_forward_path = dsa_slave_fill_forward_path, -}; - -static struct device_type dsa_type = { - .name = "dsa", -}; - -void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up) -{ - const struct dsa_port *dp = dsa_to_port(ds, port); - - if (dp->pl) - phylink_mac_change(dp->pl, up); -} -EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_change); - -static void dsa_slave_phylink_fixed_state(struct phylink_config *config, - struct phylink_link_state *state) -{ - struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); - struct dsa_switch *ds = dp->ds; - - /* No need to check that this operation is valid, the callback would - * not be called if it was not. - */ - ds->ops->phylink_fixed_state(ds, dp->index, state); -} - -/* slave device setup *******************************************************/ -static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr, - u32 flags) -{ - struct dsa_port *dp = dsa_slave_to_port(slave_dev); - struct dsa_switch *ds = dp->ds; - - slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr); - if (!slave_dev->phydev) { - netdev_err(slave_dev, "no phy at %d\n", addr); - return -ENODEV; - } - - slave_dev->phydev->dev_flags |= flags; - - return phylink_connect_phy(dp->pl, slave_dev->phydev); -} - -static int dsa_slave_phy_setup(struct net_device *slave_dev) -{ - struct dsa_port *dp = dsa_slave_to_port(slave_dev); - struct device_node *port_dn = dp->dn; - struct dsa_switch *ds = dp->ds; - u32 phy_flags = 0; - int ret; - - dp->pl_config.dev = &slave_dev->dev; - dp->pl_config.type = PHYLINK_NETDEV; - - /* The get_fixed_state callback takes precedence over polling the - * link GPIO in PHYLINK (see phylink_get_fixed_state). Only set - * this if the switch provides such a callback. - */ - if (ds->ops->phylink_fixed_state) { - dp->pl_config.get_fixed_state = dsa_slave_phylink_fixed_state; - dp->pl_config.poll_fixed_state = true; - } - - ret = dsa_port_phylink_create(dp); - if (ret) - return ret; - - if (ds->ops->get_phy_flags) - phy_flags = ds->ops->get_phy_flags(ds, dp->index); - - ret = phylink_of_phy_connect(dp->pl, port_dn, phy_flags); - if (ret == -ENODEV && ds->slave_mii_bus) { - /* We could not connect to a designated PHY or SFP, so try to - * use the switch internal MDIO bus instead - */ - ret = dsa_slave_phy_connect(slave_dev, dp->index, phy_flags); - } - if (ret) { - netdev_err(slave_dev, "failed to connect to PHY: %pe\n", - ERR_PTR(ret)); - dsa_port_phylink_destroy(dp); - } - - return ret; -} - -void dsa_slave_setup_tagger(struct net_device *slave) -{ - struct dsa_port *dp = dsa_slave_to_port(slave); - struct net_device *master = dsa_port_to_master(dp); - struct dsa_slave_priv *p = netdev_priv(slave); - const struct dsa_port *cpu_dp = dp->cpu_dp; - const struct dsa_switch *ds = dp->ds; - - slave->needed_headroom = cpu_dp->tag_ops->needed_headroom; - slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; - /* Try to save one extra realloc later in the TX path (in the master) - * by also inheriting the master's needed headroom and tailroom. - * The 8021q driver also does this. - */ - slave->needed_headroom += master->needed_headroom; - slave->needed_tailroom += master->needed_tailroom; - - p->xmit = cpu_dp->tag_ops->xmit; - - slave->features = master->vlan_features | NETIF_F_HW_TC; - slave->hw_features |= NETIF_F_HW_TC; - slave->features |= NETIF_F_LLTX; - if (slave->needed_tailroom) - slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); - if (ds->needs_standalone_vlan_filtering) - slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; -} - -int dsa_slave_suspend(struct net_device *slave_dev) -{ - struct dsa_port *dp = dsa_slave_to_port(slave_dev); - - if (!netif_running(slave_dev)) - return 0; - - netif_device_detach(slave_dev); - - rtnl_lock(); - phylink_stop(dp->pl); - rtnl_unlock(); - - return 0; -} - -int dsa_slave_resume(struct net_device *slave_dev) -{ - struct dsa_port *dp = dsa_slave_to_port(slave_dev); - - if (!netif_running(slave_dev)) - return 0; - - netif_device_attach(slave_dev); - - rtnl_lock(); - phylink_start(dp->pl); - rtnl_unlock(); - - return 0; -} - -int dsa_slave_create(struct dsa_port *port) -{ - struct net_device *master = dsa_port_to_master(port); - struct dsa_switch *ds = port->ds; - struct net_device *slave_dev; - struct dsa_slave_priv *p; - const char *name; - int assign_type; - int ret; - - if (!ds->num_tx_queues) - ds->num_tx_queues = 1; - - if (port->name) { - name = port->name; - assign_type = NET_NAME_PREDICTABLE; - } else { - name = "eth%d"; - assign_type = NET_NAME_ENUM; - } - - slave_dev = alloc_netdev_mqs(sizeof(struct dsa_slave_priv), name, - assign_type, ether_setup, - ds->num_tx_queues, 1); - if (slave_dev == NULL) - return -ENOMEM; - - slave_dev->rtnl_link_ops = &dsa_link_ops; - slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; -#if IS_ENABLED(CONFIG_DCB) - slave_dev->dcbnl_ops = &dsa_slave_dcbnl_ops; -#endif - if (!is_zero_ether_addr(port->mac)) - eth_hw_addr_set(slave_dev, port->mac); - else - eth_hw_addr_inherit(slave_dev, master); - slave_dev->priv_flags |= IFF_NO_QUEUE; - if (dsa_switch_supports_uc_filtering(ds)) - slave_dev->priv_flags |= IFF_UNICAST_FLT; - slave_dev->netdev_ops = &dsa_slave_netdev_ops; - if (ds->ops->port_max_mtu) - slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index); - SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); - - SET_NETDEV_DEV(slave_dev, port->ds->dev); - SET_NETDEV_DEVLINK_PORT(slave_dev, &port->devlink_port); - slave_dev->dev.of_node = port->dn; - slave_dev->vlan_features = master->vlan_features; - - p = netdev_priv(slave_dev); - slave_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!slave_dev->tstats) { - free_netdev(slave_dev); - return -ENOMEM; - } - - ret = gro_cells_init(&p->gcells, slave_dev); - if (ret) - goto out_free; - - p->dp = port; - INIT_LIST_HEAD(&p->mall_tc_list); - port->slave = slave_dev; - dsa_slave_setup_tagger(slave_dev); - - netif_carrier_off(slave_dev); - - ret = dsa_slave_phy_setup(slave_dev); - if (ret) { - netdev_err(slave_dev, - "error %d setting up PHY for tree %d, switch %d, port %d\n", - ret, ds->dst->index, ds->index, port->index); - goto out_gcells; - } - - rtnl_lock(); - - ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN); - if (ret && ret != -EOPNOTSUPP) - dev_warn(ds->dev, "nonfatal error %d setting MTU to %d on port %d\n", - ret, ETH_DATA_LEN, port->index); - - ret = register_netdevice(slave_dev); - if (ret) { - netdev_err(master, "error %d registering interface %s\n", - ret, slave_dev->name); - rtnl_unlock(); - goto out_phy; - } - - if (IS_ENABLED(CONFIG_DCB)) { - ret = dsa_slave_dcbnl_init(slave_dev); - if (ret) { - netdev_err(slave_dev, - "failed to initialize DCB: %pe\n", - ERR_PTR(ret)); - rtnl_unlock(); - goto out_unregister; - } - } - - ret = netdev_upper_dev_link(master, slave_dev, NULL); - - rtnl_unlock(); - - if (ret) - goto out_unregister; - - return 0; - -out_unregister: - unregister_netdev(slave_dev); -out_phy: - rtnl_lock(); - phylink_disconnect_phy(p->dp->pl); - rtnl_unlock(); - dsa_port_phylink_destroy(p->dp); -out_gcells: - gro_cells_destroy(&p->gcells); -out_free: - free_percpu(slave_dev->tstats); - free_netdev(slave_dev); - port->slave = NULL; - return ret; -} - -void dsa_slave_destroy(struct net_device *slave_dev) -{ - struct net_device *master = dsa_slave_to_master(slave_dev); - struct dsa_port *dp = dsa_slave_to_port(slave_dev); - struct dsa_slave_priv *p = netdev_priv(slave_dev); - - netif_carrier_off(slave_dev); - rtnl_lock(); - netdev_upper_dev_unlink(master, slave_dev); - unregister_netdevice(slave_dev); - phylink_disconnect_phy(dp->pl); - rtnl_unlock(); - - dsa_port_phylink_destroy(dp); - gro_cells_destroy(&p->gcells); - free_percpu(slave_dev->tstats); - free_netdev(slave_dev); -} - -int dsa_slave_change_master(struct net_device *dev, struct net_device *master, - struct netlink_ext_ack *extack) -{ - struct net_device *old_master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - struct net_device *upper; - struct list_head *iter; - int err; - - if (master == old_master) - return 0; - - if (!ds->ops->port_change_master) { - NL_SET_ERR_MSG_MOD(extack, - "Driver does not support changing DSA master"); - return -EOPNOTSUPP; - } - - if (!netdev_uses_dsa(master)) { - NL_SET_ERR_MSG_MOD(extack, - "Interface not eligible as DSA master"); - return -EOPNOTSUPP; - } - - netdev_for_each_upper_dev_rcu(master, upper, iter) { - if (dsa_slave_dev_check(upper)) - continue; - if (netif_is_bridge_master(upper)) - continue; - NL_SET_ERR_MSG_MOD(extack, "Cannot join master with unknown uppers"); - return -EOPNOTSUPP; - } - - /* Since we allow live-changing the DSA master, plus we auto-open the - * DSA master when the user port opens => we need to ensure that the - * new DSA master is open too. - */ - if (dev->flags & IFF_UP) { - err = dev_open(master, extack); - if (err) - return err; - } - - netdev_upper_dev_unlink(old_master, dev); - - err = netdev_upper_dev_link(master, dev, extack); - if (err) - goto out_revert_old_master_unlink; - - err = dsa_port_change_master(dp, master, extack); - if (err) - goto out_revert_master_link; - - /* Update the MTU of the new CPU port through cross-chip notifiers */ - err = dsa_slave_change_mtu(dev, dev->mtu); - if (err && err != -EOPNOTSUPP) { - netdev_warn(dev, - "nonfatal error updating MTU with new master: %pe\n", - ERR_PTR(err)); - } - - /* If the port doesn't have its own MAC address and relies on the DSA - * master's one, inherit it again from the new DSA master. - */ - if (is_zero_ether_addr(dp->mac)) - eth_hw_addr_inherit(dev, master); - - return 0; - -out_revert_master_link: - netdev_upper_dev_unlink(master, dev); -out_revert_old_master_unlink: - netdev_upper_dev_link(old_master, dev, NULL); - return err; -} - -bool dsa_slave_dev_check(const struct net_device *dev) -{ - return dev->netdev_ops == &dsa_slave_netdev_ops; -} -EXPORT_SYMBOL_GPL(dsa_slave_dev_check); - -static int dsa_slave_changeupper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct netlink_ext_ack *extack; - int err = NOTIFY_DONE; - - if (!dsa_slave_dev_check(dev)) - return err; - - extack = netdev_notifier_info_to_extack(&info->info); - - if (netif_is_bridge_master(info->upper_dev)) { - if (info->linking) { - err = dsa_port_bridge_join(dp, info->upper_dev, extack); - if (!err) - dsa_bridge_mtu_normalization(dp); - if (err == -EOPNOTSUPP) { - NL_SET_ERR_MSG_WEAK_MOD(extack, - "Offloading not supported"); - err = 0; - } - err = notifier_from_errno(err); - } else { - dsa_port_bridge_leave(dp, info->upper_dev); - err = NOTIFY_OK; - } - } else if (netif_is_lag_master(info->upper_dev)) { - if (info->linking) { - err = dsa_port_lag_join(dp, info->upper_dev, - info->upper_info, extack); - if (err == -EOPNOTSUPP) { - NL_SET_ERR_MSG_WEAK_MOD(extack, - "Offloading not supported"); - err = 0; - } - err = notifier_from_errno(err); - } else { - dsa_port_lag_leave(dp, info->upper_dev); - err = NOTIFY_OK; - } - } else if (is_hsr_master(info->upper_dev)) { - if (info->linking) { - err = dsa_port_hsr_join(dp, info->upper_dev, extack); - if (err == -EOPNOTSUPP) { - NL_SET_ERR_MSG_WEAK_MOD(extack, - "Offloading not supported"); - err = 0; - } - err = notifier_from_errno(err); - } else { - dsa_port_hsr_leave(dp, info->upper_dev); - err = NOTIFY_OK; - } - } - - return err; -} - -static int dsa_slave_prechangeupper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - if (!dsa_slave_dev_check(dev)) - return NOTIFY_DONE; - - if (netif_is_bridge_master(info->upper_dev) && !info->linking) - dsa_port_pre_bridge_leave(dp, info->upper_dev); - else if (netif_is_lag_master(info->upper_dev) && !info->linking) - dsa_port_pre_lag_leave(dp, info->upper_dev); - /* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be - * meaningfully enslaved to a bridge yet - */ - - return NOTIFY_DONE; -} - -static int -dsa_slave_lag_changeupper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) -{ - struct net_device *lower; - struct list_head *iter; - int err = NOTIFY_DONE; - struct dsa_port *dp; - - if (!netif_is_lag_master(dev)) - return err; - - netdev_for_each_lower_dev(dev, lower, iter) { - if (!dsa_slave_dev_check(lower)) - continue; - - dp = dsa_slave_to_port(lower); - if (!dp->lag) - /* Software LAG */ - continue; - - err = dsa_slave_changeupper(lower, info); - if (notifier_to_errno(err)) - break; - } - - return err; -} - -/* Same as dsa_slave_lag_changeupper() except that it calls - * dsa_slave_prechangeupper() - */ -static int -dsa_slave_lag_prechangeupper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) -{ - struct net_device *lower; - struct list_head *iter; - int err = NOTIFY_DONE; - struct dsa_port *dp; - - if (!netif_is_lag_master(dev)) - return err; - - netdev_for_each_lower_dev(dev, lower, iter) { - if (!dsa_slave_dev_check(lower)) - continue; - - dp = dsa_slave_to_port(lower); - if (!dp->lag) - /* Software LAG */ - continue; - - err = dsa_slave_prechangeupper(lower, info); - if (notifier_to_errno(err)) - break; - } - - return err; -} - -static int -dsa_prevent_bridging_8021q_upper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) -{ - struct netlink_ext_ack *ext_ack; - struct net_device *slave, *br; - struct dsa_port *dp; - - ext_ack = netdev_notifier_info_to_extack(&info->info); - - if (!is_vlan_dev(dev)) - return NOTIFY_DONE; - - slave = vlan_dev_real_dev(dev); - if (!dsa_slave_dev_check(slave)) - return NOTIFY_DONE; - - dp = dsa_slave_to_port(slave); - br = dsa_port_bridge_dev_get(dp); - if (!br) - return NOTIFY_DONE; - - /* Deny enslaving a VLAN device into a VLAN-aware bridge */ - if (br_vlan_enabled(br) && - netif_is_bridge_master(info->upper_dev) && info->linking) { - NL_SET_ERR_MSG_MOD(ext_ack, - "Cannot enslave VLAN device into VLAN aware bridge"); - return notifier_from_errno(-EINVAL); - } - - return NOTIFY_DONE; -} - -static int -dsa_slave_check_8021q_upper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - struct net_device *br = dsa_port_bridge_dev_get(dp); - struct bridge_vlan_info br_info; - struct netlink_ext_ack *extack; - int err = NOTIFY_DONE; - u16 vid; - - if (!br || !br_vlan_enabled(br)) - return NOTIFY_DONE; - - extack = netdev_notifier_info_to_extack(&info->info); - vid = vlan_dev_vlan_id(info->upper_dev); - - /* br_vlan_get_info() returns -EINVAL or -ENOENT if the - * device, respectively the VID is not found, returning - * 0 means success, which is a failure for us here. - */ - err = br_vlan_get_info(br, vid, &br_info); - if (err == 0) { - NL_SET_ERR_MSG_MOD(extack, - "This VLAN is already configured by the bridge"); - return notifier_from_errno(-EBUSY); - } - - return NOTIFY_DONE; -} - -static int -dsa_slave_prechangeupper_sanity_check(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) -{ - struct dsa_switch *ds; - struct dsa_port *dp; - int err; - - if (!dsa_slave_dev_check(dev)) - return dsa_prevent_bridging_8021q_upper(dev, info); - - dp = dsa_slave_to_port(dev); - ds = dp->ds; - - if (ds->ops->port_prechangeupper) { - err = ds->ops->port_prechangeupper(ds, dp->index, info); - if (err) - return notifier_from_errno(err); - } - - if (is_vlan_dev(info->upper_dev)) - return dsa_slave_check_8021q_upper(dev, info); - - return NOTIFY_DONE; -} - -/* To be eligible as a DSA master, a LAG must have all lower interfaces be - * eligible DSA masters. Additionally, all LAG slaves must be DSA masters of - * switches in the same switch tree. - */ -static int dsa_lag_master_validate(struct net_device *lag_dev, - struct netlink_ext_ack *extack) -{ - struct net_device *lower1, *lower2; - struct list_head *iter1, *iter2; - - netdev_for_each_lower_dev(lag_dev, lower1, iter1) { - netdev_for_each_lower_dev(lag_dev, lower2, iter2) { - if (!netdev_uses_dsa(lower1) || - !netdev_uses_dsa(lower2)) { - NL_SET_ERR_MSG_MOD(extack, - "All LAG ports must be eligible as DSA masters"); - return notifier_from_errno(-EINVAL); - } - - if (lower1 == lower2) - continue; - - if (!dsa_port_tree_same(lower1->dsa_ptr, - lower2->dsa_ptr)) { - NL_SET_ERR_MSG_MOD(extack, - "LAG contains DSA masters of disjoint switch trees"); - return notifier_from_errno(-EINVAL); - } - } - } - - return NOTIFY_DONE; -} - -static int -dsa_master_prechangeupper_sanity_check(struct net_device *master, - struct netdev_notifier_changeupper_info *info) -{ - struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); - - if (!netdev_uses_dsa(master)) - return NOTIFY_DONE; - - if (!info->linking) - return NOTIFY_DONE; - - /* Allow DSA switch uppers */ - if (dsa_slave_dev_check(info->upper_dev)) - return NOTIFY_DONE; - - /* Allow bridge uppers of DSA masters, subject to further - * restrictions in dsa_bridge_prechangelower_sanity_check() - */ - if (netif_is_bridge_master(info->upper_dev)) - return NOTIFY_DONE; - - /* Allow LAG uppers, subject to further restrictions in - * dsa_lag_master_prechangelower_sanity_check() - */ - if (netif_is_lag_master(info->upper_dev)) - return dsa_lag_master_validate(info->upper_dev, extack); - - NL_SET_ERR_MSG_MOD(extack, - "DSA master cannot join unknown upper interfaces"); - return notifier_from_errno(-EBUSY); -} - -static int -dsa_lag_master_prechangelower_sanity_check(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) -{ - struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); - struct net_device *lag_dev = info->upper_dev; - struct net_device *lower; - struct list_head *iter; - - if (!netdev_uses_dsa(lag_dev) || !netif_is_lag_master(lag_dev)) - return NOTIFY_DONE; - - if (!info->linking) - return NOTIFY_DONE; - - if (!netdev_uses_dsa(dev)) { - NL_SET_ERR_MSG(extack, - "Only DSA masters can join a LAG DSA master"); - return notifier_from_errno(-EINVAL); - } - - netdev_for_each_lower_dev(lag_dev, lower, iter) { - if (!dsa_port_tree_same(dev->dsa_ptr, lower->dsa_ptr)) { - NL_SET_ERR_MSG(extack, - "Interface is DSA master for a different switch tree than this LAG"); - return notifier_from_errno(-EINVAL); - } - - break; - } - - return NOTIFY_DONE; -} - -/* Don't allow bridging of DSA masters, since the bridge layer rx_handler - * prevents the DSA fake ethertype handler to be invoked, so we don't get the - * chance to strip off and parse the DSA switch tag protocol header (the bridge - * layer just returns RX_HANDLER_CONSUMED, stopping RX processing for these - * frames). - * The only case where that would not be an issue is when bridging can already - * be offloaded, such as when the DSA master is itself a DSA or plain switchdev - * port, and is bridged only with other ports from the same hardware device. - */ -static int -dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower, - struct netdev_notifier_changeupper_info *info) -{ - struct net_device *br = info->upper_dev; - struct netlink_ext_ack *extack; - struct net_device *lower; - struct list_head *iter; - - if (!netif_is_bridge_master(br)) - return NOTIFY_DONE; - - if (!info->linking) - return NOTIFY_DONE; - - extack = netdev_notifier_info_to_extack(&info->info); - - netdev_for_each_lower_dev(br, lower, iter) { - if (!netdev_uses_dsa(new_lower) && !netdev_uses_dsa(lower)) - continue; - - if (!netdev_port_same_parent_id(lower, new_lower)) { - NL_SET_ERR_MSG(extack, - "Cannot do software bridging with a DSA master"); - return notifier_from_errno(-EINVAL); - } - } - - return NOTIFY_DONE; -} - -static void dsa_tree_migrate_ports_from_lag_master(struct dsa_switch_tree *dst, - struct net_device *lag_dev) -{ - struct net_device *new_master = dsa_tree_find_first_master(dst); - struct dsa_port *dp; - int err; - - dsa_tree_for_each_user_port(dp, dst) { - if (dsa_port_to_master(dp) != lag_dev) - continue; - - err = dsa_slave_change_master(dp->slave, new_master, NULL); - if (err) { - netdev_err(dp->slave, - "failed to restore master to %s: %pe\n", - new_master->name, ERR_PTR(err)); - } - } -} - -static int dsa_master_lag_join(struct net_device *master, - struct net_device *lag_dev, - struct netdev_lag_upper_info *uinfo, - struct netlink_ext_ack *extack) -{ - struct dsa_port *cpu_dp = master->dsa_ptr; - struct dsa_switch_tree *dst = cpu_dp->dst; - struct dsa_port *dp; - int err; - - err = dsa_master_lag_setup(lag_dev, cpu_dp, uinfo, extack); - if (err) - return err; - - dsa_tree_for_each_user_port(dp, dst) { - if (dsa_port_to_master(dp) != master) - continue; - - err = dsa_slave_change_master(dp->slave, lag_dev, extack); - if (err) - goto restore; - } - - return 0; - -restore: - dsa_tree_for_each_user_port_continue_reverse(dp, dst) { - if (dsa_port_to_master(dp) != lag_dev) - continue; - - err = dsa_slave_change_master(dp->slave, master, NULL); - if (err) { - netdev_err(dp->slave, - "failed to restore master to %s: %pe\n", - master->name, ERR_PTR(err)); - } - } - - dsa_master_lag_teardown(lag_dev, master->dsa_ptr); - - return err; -} - -static void dsa_master_lag_leave(struct net_device *master, - struct net_device *lag_dev) -{ - struct dsa_port *dp, *cpu_dp = lag_dev->dsa_ptr; - struct dsa_switch_tree *dst = cpu_dp->dst; - struct dsa_port *new_cpu_dp = NULL; - struct net_device *lower; - struct list_head *iter; - - netdev_for_each_lower_dev(lag_dev, lower, iter) { - if (netdev_uses_dsa(lower)) { - new_cpu_dp = lower->dsa_ptr; - break; - } - } - - if (new_cpu_dp) { - /* Update the CPU port of the user ports still under the LAG - * so that dsa_port_to_master() continues to work properly - */ - dsa_tree_for_each_user_port(dp, dst) - if (dsa_port_to_master(dp) == lag_dev) - dp->cpu_dp = new_cpu_dp; - - /* Update the index of the virtual CPU port to match the lowest - * physical CPU port - */ - lag_dev->dsa_ptr = new_cpu_dp; - wmb(); - } else { - /* If the LAG DSA master has no ports left, migrate back all - * user ports to the first physical CPU port - */ - dsa_tree_migrate_ports_from_lag_master(dst, lag_dev); - } - - /* This DSA master has left its LAG in any case, so let - * the CPU port leave the hardware LAG as well - */ - dsa_master_lag_teardown(lag_dev, master->dsa_ptr); -} - -static int dsa_master_changeupper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) -{ - struct netlink_ext_ack *extack; - int err = NOTIFY_DONE; - - if (!netdev_uses_dsa(dev)) - return err; - - extack = netdev_notifier_info_to_extack(&info->info); - - if (netif_is_lag_master(info->upper_dev)) { - if (info->linking) { - err = dsa_master_lag_join(dev, info->upper_dev, - info->upper_info, extack); - err = notifier_from_errno(err); - } else { - dsa_master_lag_leave(dev, info->upper_dev); - err = NOTIFY_OK; - } - } - - return err; -} - -static int dsa_slave_netdevice_event(struct notifier_block *nb, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - switch (event) { - case NETDEV_PRECHANGEUPPER: { - struct netdev_notifier_changeupper_info *info = ptr; - int err; - - err = dsa_slave_prechangeupper_sanity_check(dev, info); - if (notifier_to_errno(err)) - return err; - - err = dsa_master_prechangeupper_sanity_check(dev, info); - if (notifier_to_errno(err)) - return err; - - err = dsa_lag_master_prechangelower_sanity_check(dev, info); - if (notifier_to_errno(err)) - return err; - - err = dsa_bridge_prechangelower_sanity_check(dev, info); - if (notifier_to_errno(err)) - return err; - - err = dsa_slave_prechangeupper(dev, ptr); - if (notifier_to_errno(err)) - return err; - - err = dsa_slave_lag_prechangeupper(dev, ptr); - if (notifier_to_errno(err)) - return err; - - break; - } - case NETDEV_CHANGEUPPER: { - int err; - - err = dsa_slave_changeupper(dev, ptr); - if (notifier_to_errno(err)) - return err; - - err = dsa_slave_lag_changeupper(dev, ptr); - if (notifier_to_errno(err)) - return err; - - err = dsa_master_changeupper(dev, ptr); - if (notifier_to_errno(err)) - return err; - - break; - } - case NETDEV_CHANGELOWERSTATE: { - struct netdev_notifier_changelowerstate_info *info = ptr; - struct dsa_port *dp; - int err = 0; - - if (dsa_slave_dev_check(dev)) { - dp = dsa_slave_to_port(dev); - - err = dsa_port_lag_change(dp, info->lower_state_info); - } - - /* Mirror LAG port events on DSA masters that are in - * a LAG towards their respective switch CPU ports - */ - if (netdev_uses_dsa(dev)) { - dp = dev->dsa_ptr; - - err = dsa_port_lag_change(dp, info->lower_state_info); - } - - return notifier_from_errno(err); - } - case NETDEV_CHANGE: - case NETDEV_UP: { - /* Track state of master port. - * DSA driver may require the master port (and indirectly - * the tagger) to be available for some special operation. - */ - if (netdev_uses_dsa(dev)) { - struct dsa_port *cpu_dp = dev->dsa_ptr; - struct dsa_switch_tree *dst = cpu_dp->ds->dst; - - /* Track when the master port is UP */ - dsa_tree_master_oper_state_change(dst, dev, - netif_oper_up(dev)); - - /* Track when the master port is ready and can accept - * packet. - * NETDEV_UP event is not enough to flag a port as ready. - * We also have to wait for linkwatch_do_dev to dev_activate - * and emit a NETDEV_CHANGE event. - * We check if a master port is ready by checking if the dev - * have a qdisc assigned and is not noop. - */ - dsa_tree_master_admin_state_change(dst, dev, - !qdisc_tx_is_noop(dev)); - - return NOTIFY_OK; - } - - return NOTIFY_DONE; - } - case NETDEV_GOING_DOWN: { - struct dsa_port *dp, *cpu_dp; - struct dsa_switch_tree *dst; - LIST_HEAD(close_list); - - if (!netdev_uses_dsa(dev)) - return NOTIFY_DONE; - - cpu_dp = dev->dsa_ptr; - dst = cpu_dp->ds->dst; - - dsa_tree_master_admin_state_change(dst, dev, false); - - list_for_each_entry(dp, &dst->ports, list) { - if (!dsa_port_is_user(dp)) - continue; - - if (dp->cpu_dp != cpu_dp) - continue; - - list_add(&dp->slave->close_list, &close_list); - } - - dev_close_many(&close_list, true); - - return NOTIFY_OK; - } - default: - break; - } - - return NOTIFY_DONE; -} - -static void -dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work) -{ - struct switchdev_notifier_fdb_info info = {}; - - info.addr = switchdev_work->addr; - info.vid = switchdev_work->vid; - info.offloaded = true; - call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, - switchdev_work->orig_dev, &info.info, NULL); -} - -static void dsa_slave_switchdev_event_work(struct work_struct *work) -{ - struct dsa_switchdev_event_work *switchdev_work = - container_of(work, struct dsa_switchdev_event_work, work); - const unsigned char *addr = switchdev_work->addr; - struct net_device *dev = switchdev_work->dev; - u16 vid = switchdev_work->vid; - struct dsa_switch *ds; - struct dsa_port *dp; - int err; - - dp = dsa_slave_to_port(dev); - ds = dp->ds; - - switch (switchdev_work->event) { - case SWITCHDEV_FDB_ADD_TO_DEVICE: - if (switchdev_work->host_addr) - err = dsa_port_bridge_host_fdb_add(dp, addr, vid); - else if (dp->lag) - err = dsa_port_lag_fdb_add(dp, addr, vid); - else - err = dsa_port_fdb_add(dp, addr, vid); - if (err) { - dev_err(ds->dev, - "port %d failed to add %pM vid %d to fdb: %d\n", - dp->index, addr, vid, err); - break; - } - dsa_fdb_offload_notify(switchdev_work); - break; - - case SWITCHDEV_FDB_DEL_TO_DEVICE: - if (switchdev_work->host_addr) - err = dsa_port_bridge_host_fdb_del(dp, addr, vid); - else if (dp->lag) - err = dsa_port_lag_fdb_del(dp, addr, vid); - else - err = dsa_port_fdb_del(dp, addr, vid); - if (err) { - dev_err(ds->dev, - "port %d failed to delete %pM vid %d from fdb: %d\n", - dp->index, addr, vid, err); - } - - break; - } - - kfree(switchdev_work); -} - -static bool dsa_foreign_dev_check(const struct net_device *dev, - const struct net_device *foreign_dev) -{ - const struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch_tree *dst = dp->ds->dst; - - if (netif_is_bridge_master(foreign_dev)) - return !dsa_tree_offloads_bridge_dev(dst, foreign_dev); - - if (netif_is_bridge_port(foreign_dev)) - return !dsa_tree_offloads_bridge_port(dst, foreign_dev); - - /* Everything else is foreign */ - return true; -} - -static int dsa_slave_fdb_event(struct net_device *dev, - struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info) -{ - struct dsa_switchdev_event_work *switchdev_work; - struct dsa_port *dp = dsa_slave_to_port(dev); - bool host_addr = fdb_info->is_local; - struct dsa_switch *ds = dp->ds; - - if (ctx && ctx != dp) - return 0; - - if (!dp->bridge) - return 0; - - if (switchdev_fdb_is_dynamically_learned(fdb_info)) { - if (dsa_port_offloads_bridge_port(dp, orig_dev)) - return 0; - - /* FDB entries learned by the software bridge or by foreign - * bridge ports should be installed as host addresses only if - * the driver requests assisted learning. - */ - if (!ds->assisted_learning_on_cpu_port) - return 0; - } - - /* Also treat FDB entries on foreign interfaces bridged with us as host - * addresses. - */ - if (dsa_foreign_dev_check(dev, orig_dev)) - host_addr = true; - - /* Check early that we're not doing work in vain. - * Host addresses on LAG ports still require regular FDB ops, - * since the CPU port isn't in a LAG. - */ - if (dp->lag && !host_addr) { - if (!ds->ops->lag_fdb_add || !ds->ops->lag_fdb_del) - return -EOPNOTSUPP; - } else { - if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del) - return -EOPNOTSUPP; - } - - switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); - if (!switchdev_work) - return -ENOMEM; - - netdev_dbg(dev, "%s FDB entry towards %s, addr %pM vid %d%s\n", - event == SWITCHDEV_FDB_ADD_TO_DEVICE ? "Adding" : "Deleting", - orig_dev->name, fdb_info->addr, fdb_info->vid, - host_addr ? " as host address" : ""); - - INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work); - switchdev_work->event = event; - switchdev_work->dev = dev; - switchdev_work->orig_dev = orig_dev; - - ether_addr_copy(switchdev_work->addr, fdb_info->addr); - switchdev_work->vid = fdb_info->vid; - switchdev_work->host_addr = host_addr; - - dsa_schedule_work(&switchdev_work->work); - - return 0; -} - -/* Called under rcu_read_lock() */ -static int dsa_slave_switchdev_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = switchdev_notifier_info_to_dev(ptr); - int err; - - switch (event) { - case SWITCHDEV_PORT_ATTR_SET: - err = switchdev_handle_port_attr_set(dev, ptr, - dsa_slave_dev_check, - dsa_slave_port_attr_set); - return notifier_from_errno(err); - case SWITCHDEV_FDB_ADD_TO_DEVICE: - case SWITCHDEV_FDB_DEL_TO_DEVICE: - err = switchdev_handle_fdb_event_to_device(dev, event, ptr, - dsa_slave_dev_check, - dsa_foreign_dev_check, - dsa_slave_fdb_event); - return notifier_from_errno(err); - default: - return NOTIFY_DONE; - } - - return NOTIFY_OK; -} - -static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = switchdev_notifier_info_to_dev(ptr); - int err; - - switch (event) { - case SWITCHDEV_PORT_OBJ_ADD: - err = switchdev_handle_port_obj_add_foreign(dev, ptr, - dsa_slave_dev_check, - dsa_foreign_dev_check, - dsa_slave_port_obj_add); - return notifier_from_errno(err); - case SWITCHDEV_PORT_OBJ_DEL: - err = switchdev_handle_port_obj_del_foreign(dev, ptr, - dsa_slave_dev_check, - dsa_foreign_dev_check, - dsa_slave_port_obj_del); - return notifier_from_errno(err); - case SWITCHDEV_PORT_ATTR_SET: - err = switchdev_handle_port_attr_set(dev, ptr, - dsa_slave_dev_check, - dsa_slave_port_attr_set); - return notifier_from_errno(err); - } - - return NOTIFY_DONE; -} - -static struct notifier_block dsa_slave_nb __read_mostly = { - .notifier_call = dsa_slave_netdevice_event, -}; - -struct notifier_block dsa_slave_switchdev_notifier = { - .notifier_call = dsa_slave_switchdev_event, -}; - -struct notifier_block dsa_slave_switchdev_blocking_notifier = { - .notifier_call = dsa_slave_switchdev_blocking_event, -}; - -int dsa_slave_register_notifier(void) -{ - struct notifier_block *nb; - int err; - - err = register_netdevice_notifier(&dsa_slave_nb); - if (err) - return err; - - err = register_switchdev_notifier(&dsa_slave_switchdev_notifier); - if (err) - goto err_switchdev_nb; - - nb = &dsa_slave_switchdev_blocking_notifier; - err = register_switchdev_blocking_notifier(nb); - if (err) - goto err_switchdev_blocking_nb; - - return 0; - -err_switchdev_blocking_nb: - unregister_switchdev_notifier(&dsa_slave_switchdev_notifier); -err_switchdev_nb: - unregister_netdevice_notifier(&dsa_slave_nb); - return err; -} - -void dsa_slave_unregister_notifier(void) -{ - struct notifier_block *nb; - int err; - - nb = &dsa_slave_switchdev_blocking_notifier; - err = unregister_switchdev_blocking_notifier(nb); - if (err) - pr_err("DSA: failed to unregister switchdev blocking notifier (%d)\n", err); - - err = unregister_switchdev_notifier(&dsa_slave_switchdev_notifier); - if (err) - pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err); - - err = unregister_netdevice_notifier(&dsa_slave_nb); - if (err) - pr_err("DSA: failed to unregister slave notifier (%d)\n", err); -} diff --git a/net/dsa/slave.h b/net/dsa/slave.h deleted file mode 100644 index d0abe609e00d..000000000000 --- a/net/dsa/slave.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ - -#ifndef __DSA_SLAVE_H -#define __DSA_SLAVE_H - -#include -#include -#include -#include -#include -#include -#include - -struct net_device; -struct netlink_ext_ack; - -extern struct notifier_block dsa_slave_switchdev_notifier; -extern struct notifier_block dsa_slave_switchdev_blocking_notifier; - -struct dsa_slave_priv { - /* Copy of CPU port xmit for faster access in slave transmit hot path */ - struct sk_buff * (*xmit)(struct sk_buff *skb, - struct net_device *dev); - - struct gro_cells gcells; - - /* DSA port data, such as switch, port index, etc. */ - struct dsa_port *dp; - -#ifdef CONFIG_NET_POLL_CONTROLLER - struct netpoll *netpoll; -#endif - - /* TC context */ - struct list_head mall_tc_list; -}; - -void dsa_slave_mii_bus_init(struct dsa_switch *ds); -int dsa_slave_create(struct dsa_port *dp); -void dsa_slave_destroy(struct net_device *slave_dev); -int dsa_slave_suspend(struct net_device *slave_dev); -int dsa_slave_resume(struct net_device *slave_dev); -int dsa_slave_register_notifier(void); -void dsa_slave_unregister_notifier(void); -void dsa_slave_sync_ha(struct net_device *dev); -void dsa_slave_unsync_ha(struct net_device *dev); -void dsa_slave_setup_tagger(struct net_device *slave); -int dsa_slave_change_mtu(struct net_device *dev, int new_mtu); -int dsa_slave_change_master(struct net_device *dev, struct net_device *master, - struct netlink_ext_ack *extack); -int dsa_slave_manage_vlan_filtering(struct net_device *dev, - bool vlan_filtering); - -static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - - return p->dp; -} - -static inline struct net_device * -dsa_slave_to_master(const struct net_device *dev) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - return dsa_port_to_master(dp); -} - -#endif diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 1a42f9317334..3d2feeea897b 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -15,10 +15,10 @@ #include "dsa.h" #include "netlink.h" #include "port.h" -#include "slave.h" #include "switch.h" #include "tag_8021q.h" #include "trace.h" +#include "user.h" static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds, unsigned int ageing_time) @@ -894,12 +894,12 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds, * bits that depend on the tagger, such as the MTU. */ dsa_switch_for_each_user_port(dp, ds) { - struct net_device *slave = dp->slave; + struct net_device *user = dp->user; - dsa_slave_setup_tagger(slave); + dsa_user_setup_tagger(user); /* rtnl_mutex is held in dsa_tree_change_tag_proto */ - dsa_slave_change_mtu(slave, slave->mtu); + dsa_user_change_mtu(user, user->mtu); } return 0; @@ -960,13 +960,13 @@ dsa_switch_disconnect_tag_proto(struct dsa_switch *ds, } static int -dsa_switch_master_state_change(struct dsa_switch *ds, - struct dsa_notifier_master_state_info *info) +dsa_switch_conduit_state_change(struct dsa_switch *ds, + struct dsa_notifier_conduit_state_info *info) { - if (!ds->ops->master_state_change) + if (!ds->ops->conduit_state_change) return 0; - ds->ops->master_state_change(ds, info->master, info->operational); + ds->ops->conduit_state_change(ds, info->conduit, info->operational); return 0; } @@ -1056,8 +1056,8 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL: err = dsa_switch_tag_8021q_vlan_del(ds, info); break; - case DSA_NOTIFIER_MASTER_STATE_CHANGE: - err = dsa_switch_master_state_change(ds, info); + case DSA_NOTIFIER_CONDUIT_STATE_CHANGE: + err = dsa_switch_conduit_state_change(ds, info); break; default: err = -EOPNOTSUPP; diff --git a/net/dsa/switch.h b/net/dsa/switch.h index ea034677da15..be0a2749cd97 100644 --- a/net/dsa/switch.h +++ b/net/dsa/switch.h @@ -34,7 +34,7 @@ enum { DSA_NOTIFIER_TAG_PROTO_DISCONNECT, DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, - DSA_NOTIFIER_MASTER_STATE_CHANGE, + DSA_NOTIFIER_CONDUIT_STATE_CHANGE, }; /* DSA_NOTIFIER_AGEING_TIME */ @@ -105,9 +105,9 @@ struct dsa_notifier_tag_8021q_vlan_info { u16 vid; }; -/* DSA_NOTIFIER_MASTER_STATE_CHANGE */ -struct dsa_notifier_master_state_info { - const struct net_device *master; +/* DSA_NOTIFIER_CONDUIT_STATE_CHANGE */ +struct dsa_notifier_conduit_state_info { + const struct net_device *conduit; bool operational; }; diff --git a/net/dsa/tag.c b/net/dsa/tag.c index 5105a5ff58fa..6e402d49afd3 100644 --- a/net/dsa/tag.c +++ b/net/dsa/tag.c @@ -13,8 +13,8 @@ #include #include -#include "slave.h" #include "tag.h" +#include "user.h" static LIST_HEAD(dsa_tag_drivers_list); static DEFINE_MUTEX(dsa_tag_drivers_lock); @@ -27,7 +27,7 @@ static DEFINE_MUTEX(dsa_tag_drivers_lock); * switch, the DSA driver owning the interface to which the packet is * delivered is never notified unless we do so here. */ -static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p, +static bool dsa_skb_defer_rx_timestamp(struct dsa_user_priv *p, struct sk_buff *skb) { struct dsa_switch *ds = p->dp->ds; @@ -57,7 +57,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dsa_port *cpu_dp = dev->dsa_ptr; struct sk_buff *nskb = NULL; - struct dsa_slave_priv *p; + struct dsa_user_priv *p; if (unlikely(!cpu_dp)) { kfree_skb(skb); @@ -75,7 +75,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb_has_extensions(skb)) skb->slow_gro = 0; - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (likely(skb->dev)) { dsa_default_offload_fwd_mark(skb); nskb = skb; @@ -94,7 +94,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); - if (unlikely(!dsa_slave_dev_check(skb->dev))) { + if (unlikely(!dsa_user_dev_check(skb->dev))) { /* Packet is to be injected directly on an upper * device, e.g. a team/bond, so skip all DSA-port * specific actions. diff --git a/net/dsa/tag.h b/net/dsa/tag.h index 32d12f4a9d73..f6b9c73718df 100644 --- a/net/dsa/tag.h +++ b/net/dsa/tag.h @@ -9,7 +9,7 @@ #include #include "port.h" -#include "slave.h" +#include "user.h" struct dsa_tag_driver { const struct dsa_device_ops *ops; @@ -29,7 +29,7 @@ static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops) return ops->needed_headroom + ops->needed_tailroom; } -static inline struct net_device *dsa_master_find_slave(struct net_device *dev, +static inline struct net_device *dsa_conduit_find_user(struct net_device *dev, int device, int port) { struct dsa_port *cpu_dp = dev->dsa_ptr; @@ -39,7 +39,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, list_for_each_entry(dp, &dst->ports, list) if (dp->ds->index == device && dp->index == port && dp->type == DSA_PORT_TYPE_USER) - return dp->slave; + return dp->user; return NULL; } @@ -49,7 +49,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, */ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) { - struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct dsa_port *dp = dsa_user_to_port(skb->dev); struct net_device *br = dsa_port_bridge_dev_get(dp); struct net_device *dev = skb->dev; struct net_device *upper_dev; @@ -107,12 +107,12 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) * to support termination through the bridge. */ static inline struct net_device * -dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) +dsa_find_designated_bridge_port_by_vid(struct net_device *conduit, u16 vid) { - struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_port *cpu_dp = conduit->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; struct bridge_vlan_info vinfo; - struct net_device *slave; + struct net_device *user; struct dsa_port *dp; int err; @@ -134,13 +134,13 @@ dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) if (dp->cpu_dp != cpu_dp) continue; - slave = dp->slave; + user = dp->user; - err = br_vlan_get_info_rcu(slave, vid, &vinfo); + err = br_vlan_get_info_rcu(user, vid, &vinfo); if (err) continue; - return slave; + return user; } return NULL; @@ -155,7 +155,7 @@ dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) */ static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb) { - struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct dsa_port *dp = dsa_user_to_port(skb->dev); skb->offload_fwd_mark = !!(dp->bridge); } @@ -215,9 +215,9 @@ static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len) memmove(skb->data, skb->data + len, 2 * ETH_ALEN); } -/* On RX, eth_type_trans() on the DSA master pulls ETH_HLEN bytes starting from +/* On RX, eth_type_trans() on the DSA conduit pulls ETH_HLEN bytes starting from * skb_mac_header(skb), which leaves skb->data pointing at the first byte after - * what the DSA master perceives as the EtherType (the beginning of the L3 + * what the DSA conduit perceives as the EtherType (the beginning of the L3 * protocol). Since DSA EtherType header taggers treat the EtherType as part of * the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header * is located 2 bytes behind skb->data. Note that EtherType in this context diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index cbdfc392f7e0..71b26ae6db39 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -73,7 +73,7 @@ struct dsa_tag_8021q_vlan { struct dsa_8021q_context { struct dsa_switch *ds; struct list_head vlans; - /* EtherType of RX VID, used for filtering on master interface */ + /* EtherType of RX VID, used for filtering on conduit interface */ __be16 proto; }; @@ -338,7 +338,7 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); u16 vid = dsa_tag_8021q_standalone_vid(dp); - struct net_device *master; + struct net_device *conduit; int err; /* The CPU port is implicitly configured by @@ -347,7 +347,7 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) if (!dsa_port_is_user(dp)) return 0; - master = dsa_port_to_master(dp); + conduit = dsa_port_to_conduit(dp); err = dsa_port_tag_8021q_vlan_add(dp, vid, false); if (err) { @@ -357,8 +357,8 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) return err; } - /* Add the VLAN to the master's RX filter. */ - vlan_vid_add(master, ctx->proto, vid); + /* Add the VLAN to the conduit's RX filter. */ + vlan_vid_add(conduit, ctx->proto, vid); return err; } @@ -368,7 +368,7 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); u16 vid = dsa_tag_8021q_standalone_vid(dp); - struct net_device *master; + struct net_device *conduit; /* The CPU port is implicitly configured by * configuring the front-panel ports @@ -376,11 +376,11 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) if (!dsa_port_is_user(dp)) return; - master = dsa_port_to_master(dp); + conduit = dsa_port_to_conduit(dp); dsa_port_tag_8021q_vlan_del(dp, vid, false); - vlan_vid_del(master, ctx->proto, vid); + vlan_vid_del(conduit, ctx->proto, vid); } static int dsa_tag_8021q_setup(struct dsa_switch *ds) @@ -468,10 +468,10 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, } EXPORT_SYMBOL_GPL(dsa_8021q_xmit); -struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, +struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, int vbid) { - struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_port *cpu_dp = conduit->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; struct dsa_port *dp; @@ -490,7 +490,7 @@ struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, continue; if (dsa_port_bridge_num_get(dp) == vbid) - return dp->slave; + return dp->user; } return NULL; diff --git a/net/dsa/tag_8021q.h b/net/dsa/tag_8021q.h index b75cbaa028ef..41f7167ac520 100644 --- a/net/dsa/tag_8021q.h +++ b/net/dsa/tag_8021q.h @@ -16,7 +16,7 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, int *vbid); -struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, +struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, int vbid); int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c index 7f3b7d730b85..92ce67b93a58 100644 --- a/net/dsa/tag_ar9331.c +++ b/net/dsa/tag_ar9331.c @@ -29,7 +29,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); __le16 *phdr; u16 hdr; @@ -74,7 +74,7 @@ static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb, /* Get source port information */ port = FIELD_GET(AR9331_HDR_PORT_NUM_MASK, hdr); - skb->dev = dsa_master_find_slave(ndev, 0, port); + skb->dev = dsa_conduit_find_user(ndev, 0, port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index cacdafb41200..83d283a5d27e 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -85,7 +85,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, struct net_device *dev, unsigned int offset) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); u16 queue = skb_get_queue_mapping(skb); u8 *brcm_tag; @@ -96,7 +96,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, * (including FCS and tag) because the length verification is done after * the Broadcom tag is stripped off the ingress packet. * - * Let dsa_slave_xmit() free the SKB + * Let dsa_user_xmit() free the SKB */ if (__skb_put_padto(skb, ETH_ZLEN + BRCM_TAG_LEN, false)) return NULL; @@ -119,7 +119,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, brcm_tag[2] = BRCM_IG_DSTMAP2_MASK; brcm_tag[3] = (1 << dp->index) & BRCM_IG_DSTMAP1_MASK; - /* Now tell the master network device about the desired output queue + /* Now tell the conduit network device about the desired output queue * as well */ skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(dp->index, queue)); @@ -164,7 +164,7 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, /* Locate which port this is coming from */ source_port = brcm_tag[3] & BRCM_EG_PID_MASK; - skb->dev = dsa_master_find_slave(dev, 0, source_port); + skb->dev = dsa_conduit_find_user(dev, 0, source_port); if (!skb->dev) return NULL; @@ -216,7 +216,7 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM, BRCM_NAME); static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); u8 *brcm_tag; /* The Ethernet switch we are interfaced with needs packets to be at @@ -226,7 +226,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, * (including FCS and tag) because the length verification is done after * the Broadcom tag is stripped off the ingress packet. * - * Let dsa_slave_xmit() free the SKB + * Let dsa_user_xmit() free the SKB */ if (__skb_put_padto(skb, ETH_ZLEN + BRCM_LEG_TAG_LEN, false)) return NULL; @@ -264,7 +264,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, source_port = brcm_tag[5] & BRCM_LEG_PORT_ID; - skb->dev = dsa_master_find_slave(dev, 0, source_port); + skb->dev = dsa_conduit_find_user(dev, 0, source_port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 1fd7fa26db64..8ed52dd663ab 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -129,7 +129,7 @@ enum dsa_code { static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, u8 extra) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct net_device *br_dev; u8 tag_dev, tag_port; enum dsa_cmd cmd; @@ -267,14 +267,14 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, lag = dsa_lag_by_id(cpu_dp->dst, source_port + 1); skb->dev = lag ? lag->dev : NULL; } else { - skb->dev = dsa_master_find_slave(dev, source_device, + skb->dev = dsa_conduit_find_user(dev, source_device, source_port); } if (!skb->dev) return NULL; - /* When using LAG offload, skb->dev is not a DSA slave interface, + /* When using LAG offload, skb->dev is not a DSA user interface, * so we cannot call dsa_default_offload_fwd_mark and we need to * special-case it. */ diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c index e279cd9057b0..3539141b5350 100644 --- a/net/dsa/tag_gswip.c +++ b/net/dsa/tag_gswip.c @@ -61,7 +61,7 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); u8 *gswip_tag; skb_push(skb, GSWIP_TX_HEADER_LEN); @@ -89,7 +89,7 @@ static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb, /* Get source port information */ port = (gswip_tag[7] & GSWIP_RX_SPPID_MASK) >> GSWIP_RX_SPPID_SHIFT; - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index 03a1fb9c87a9..6e233cd0aa38 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -20,7 +20,7 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); u8 *tag; /* Calculate checksums (if required) before adding the trailer tag to @@ -45,7 +45,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN; unsigned int port = tag[0] & 0x03; - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) { netdev_warn_once(dev, "Failed to get source port: %d\n", port); return NULL; diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 3632e47dea9e..9be341fa88f0 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -87,7 +87,7 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, struct net_device *dev, unsigned int port, unsigned int len) { - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) return NULL; @@ -119,7 +119,7 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct ethhdr *hdr; u8 *tag; @@ -256,7 +256,7 @@ static struct sk_buff *ksz_defer_xmit(struct dsa_port *dp, struct sk_buff *skb) return NULL; kthread_init_work(&xmit_work->work, xmit_work_fn); - /* Increase refcount so the kfree_skb in dsa_slave_xmit + /* Increase refcount so the kfree_skb in dsa_user_xmit * won't really free the packet. */ xmit_work->dp = dp; @@ -272,7 +272,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, { u16 queue_mapping = skb_get_queue_mapping(skb); u8 prio = netdev_txq_to_tc(dev, queue_mapping); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct ethhdr *hdr; __be16 *tag; u16 val; @@ -344,7 +344,7 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb, { u16 queue_mapping = skb_get_queue_mapping(skb); u8 prio = netdev_txq_to_tc(dev, queue_mapping); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct ethhdr *hdr; u8 *tag; @@ -410,7 +410,7 @@ static struct sk_buff *lan937x_xmit(struct sk_buff *skb, { u16 queue_mapping = skb_get_queue_mapping(skb); u8 prio = netdev_txq_to_tc(dev, queue_mapping); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); const struct ethhdr *hdr = eth_hdr(skb); __be16 *tag; u16 val; diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index c25f5536706b..1ed8ee24855d 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -56,7 +56,7 @@ static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr) static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); __be16 *lan9303_tag; u16 tag; @@ -99,7 +99,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev) source_port = lan9303_tag1 & 0x3; - skb->dev = dsa_master_find_slave(dev, 0, source_port); + skb->dev = dsa_conduit_find_user(dev, 0, source_port); if (!skb->dev) { dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n"); return NULL; diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 40af80452747..2483785f6ab1 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -23,7 +23,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); u8 xmit_tpid; u8 *mtk_tag; @@ -85,7 +85,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev) /* Get source port information */ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_none.c b/net/dsa/tag_none.c index d2fd179c4227..9a473624db50 100644 --- a/net/dsa/tag_none.c +++ b/net/dsa/tag_none.c @@ -12,8 +12,8 @@ #define NONE_NAME "none" -static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, - struct net_device *dev) +static struct sk_buff *dsa_user_notag_xmit(struct sk_buff *skb, + struct net_device *dev) { /* Just return the original SKB */ return skb; @@ -22,7 +22,7 @@ static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, static const struct dsa_device_ops none_ops = { .name = NONE_NAME, .proto = DSA_TAG_PROTO_NONE, - .xmit = dsa_slave_notag_xmit, + .xmit = dsa_user_notag_xmit, }; module_dsa_tag_driver(none_ops); diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 20bf7074d5a6..ef2f8fffb2c7 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -45,7 +45,7 @@ static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp, static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, __be32 ifh_prefix, void **ifh) { - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); struct dsa_switch *ds = dp->ds; u64 vlan_tci, tag_type; void *injection; @@ -79,7 +79,7 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, static struct sk_buff *ocelot_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); void *injection; ocelot_xmit_common(skb, netdev, cpu_to_be32(0x8880000a), &injection); @@ -91,7 +91,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, static struct sk_buff *seville_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); void *injection; ocelot_xmit_common(skb, netdev, cpu_to_be32(0x88800005), &injection); @@ -111,12 +111,12 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, u16 vlan_tpid; u64 rew_val; - /* Revert skb->data by the amount consumed by the DSA master, + /* Revert skb->data by the amount consumed by the DSA conduit, * so it points to the beginning of the frame. */ skb_push(skb, ETH_HLEN); /* We don't care about the short prefix, it is just for easy entrance - * into the DSA master's RX filter. Discard it now by moving it into + * into the DSA conduit's RX filter. Discard it now by moving it into * the headroom. */ skb_pull(skb, OCELOT_SHORT_PREFIX_LEN); @@ -141,12 +141,12 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, ocelot_xfh_get_vlan_tci(extraction, &vlan_tci); ocelot_xfh_get_rew_val(extraction, &rew_val); - skb->dev = dsa_master_find_slave(netdev, 0, src_port); + skb->dev = dsa_conduit_find_user(netdev, 0, src_port); if (!skb->dev) /* The switch will reflect back some frames sent through - * sockets opened on the bare DSA master. These will come back + * sockets opened on the bare DSA conduit. These will come back * with src_port equal to the index of the CPU port, for which - * there is no slave registered. So don't print any error + * there is no user registered. So don't print any error * message here (ignore and drop those frames). */ return NULL; @@ -170,7 +170,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, * equal to the pvid of the ingress port and should not be used for * processing. */ - dp = dsa_slave_to_port(skb->dev); + dp = dsa_user_to_port(skb->dev); vlan_tpid = tag_type ? ETH_P_8021AD : ETH_P_8021Q; if (dsa_port_is_vlan_filtering(dp) && @@ -192,7 +192,7 @@ static const struct dsa_device_ops ocelot_netdev_ops = { .xmit = ocelot_xmit, .rcv = ocelot_rcv, .needed_headroom = OCELOT_TOTAL_TAG_LEN, - .promisc_on_master = true, + .promisc_on_conduit = true, }; DSA_TAG_DRIVER(ocelot_netdev_ops); @@ -204,7 +204,7 @@ static const struct dsa_device_ops seville_netdev_ops = { .xmit = seville_xmit, .rcv = ocelot_rcv, .needed_headroom = OCELOT_TOTAL_TAG_LEN, - .promisc_on_master = true, + .promisc_on_conduit = true, }; DSA_TAG_DRIVER(seville_netdev_ops); diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 1f0b8c20eba5..210039320888 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -37,8 +37,8 @@ static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp, return NULL; /* PTP over IP packets need UDP checksumming. We may have inherited - * NETIF_F_HW_CSUM from the DSA master, but these packets are not sent - * through the DSA master, so calculate the checksum here. + * NETIF_F_HW_CSUM from the DSA conduit, but these packets are not sent + * through the DSA conduit, so calculate the checksum here. */ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) return NULL; @@ -49,7 +49,7 @@ static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp, /* Calls felix_port_deferred_xmit in felix.c */ kthread_init_work(&xmit_work->work, xmit_work_fn); - /* Increase refcount so the kfree_skb in dsa_slave_xmit + /* Increase refcount so the kfree_skb in dsa_user_xmit * won't really free the packet. */ xmit_work->dp = dp; @@ -63,7 +63,7 @@ static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp, static struct sk_buff *ocelot_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); @@ -83,7 +83,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, dsa_8021q_rcv(skb, &src_port, &switch_id, NULL); - skb->dev = dsa_master_find_slave(netdev, switch_id, src_port); + skb->dev = dsa_conduit_find_user(netdev, switch_id, src_port); if (!skb->dev) return NULL; @@ -130,7 +130,7 @@ static const struct dsa_device_ops ocelot_8021q_netdev_ops = { .connect = ocelot_connect, .disconnect = ocelot_disconnect, .needed_headroom = VLAN_HLEN, - .promisc_on_master = true, + .promisc_on_conduit = true, }; MODULE_LICENSE("GPL v2"); diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index e5ff7c34e577..6514aa7993ce 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -14,7 +14,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); __be16 *phdr; u16 hdr; @@ -78,7 +78,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev) /* Get source port information */ port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, hdr); - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) return NULL; @@ -116,7 +116,7 @@ static const struct dsa_device_ops qca_netdev_ops = { .xmit = qca_tag_xmit, .rcv = qca_tag_rcv, .needed_headroom = QCA_HDR_LEN, - .promisc_on_master = true, + .promisc_on_conduit = true, }; MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index c327314b95e3..4da5bad1a7aa 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -36,7 +36,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); __be16 *p; u8 *tag; u16 out; @@ -97,9 +97,9 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, } port = protport & 0xff; - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) { - netdev_dbg(dev, "could not find slave for port %d\n", port); + netdev_dbg(dev, "could not find user for port %d\n", port); return NULL; } diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c index 4f67834fd121..07e857debabf 100644 --- a/net/dsa/tag_rtl8_4.c +++ b/net/dsa/tag_rtl8_4.c @@ -103,7 +103,7 @@ static void rtl8_4_write_tag(struct sk_buff *skb, struct net_device *dev, void *tag) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); __be16 tag16[RTL8_4_TAG_LEN / 2]; /* Set Realtek EtherType */ @@ -180,10 +180,10 @@ static int rtl8_4_read_tag(struct sk_buff *skb, struct net_device *dev, /* Parse TX (switch->CPU) */ port = FIELD_GET(RTL8_4_TX, ntohs(tag16[3])); - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) { dev_warn_ratelimited(&dev->dev, - "could not find slave for port %d\n", + "could not find user for port %d\n", port); return -ENOENT; } diff --git a/net/dsa/tag_rzn1_a5psw.c b/net/dsa/tag_rzn1_a5psw.c index 437a6820ac42..2ce866b45615 100644 --- a/net/dsa/tag_rzn1_a5psw.c +++ b/net/dsa/tag_rzn1_a5psw.c @@ -39,7 +39,7 @@ struct a5psw_tag { static struct sk_buff *a5psw_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct a5psw_tag *ptag; u32 data2_val; @@ -90,7 +90,7 @@ static struct sk_buff *a5psw_tag_rcv(struct sk_buff *skb, port = FIELD_GET(A5PSW_CTRL_DATA_PORT, ntohs(tag->ctrl_data)); - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index ade3eeb2f3e6..1fffe8c2b589 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -157,7 +157,7 @@ static struct sk_buff *sja1105_defer_xmit(struct dsa_port *dp, return NULL; kthread_init_work(&xmit_work->work, xmit_work_fn); - /* Increase refcount so the kfree_skb in dsa_slave_xmit + /* Increase refcount so the kfree_skb in dsa_user_xmit * won't really free the packet. */ xmit_work->dp = dp; @@ -210,7 +210,7 @@ static u16 sja1105_xmit_tpid(struct dsa_port *dp) static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); unsigned int bridge_num = dsa_port_bridge_num_get(dp); struct net_device *br = dsa_port_bridge_dev_get(dp); u16 tx_vid; @@ -235,7 +235,7 @@ static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, /* Transform untagged control packets into pvid-tagged control packets so that * all packets sent by this tagger are VLAN-tagged and we can configure the - * switch to drop untagged packets coming from the DSA master. + * switch to drop untagged packets coming from the DSA conduit. */ static struct sk_buff *sja1105_pvid_tag_control_pkt(struct dsa_port *dp, struct sk_buff *skb, u8 pcp) @@ -266,7 +266,7 @@ static struct sk_buff *sja1105_pvid_tag_control_pkt(struct dsa_port *dp, static struct sk_buff *sja1105_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); @@ -294,7 +294,7 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb, struct net_device *netdev) { struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone; - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); @@ -383,7 +383,7 @@ static struct sk_buff * Buffer it until we get its meta frame. */ if (is_link_local) { - struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct dsa_port *dp = dsa_user_to_port(skb->dev); struct sja1105_tagger_private *priv; struct dsa_switch *ds = dp->ds; @@ -396,7 +396,7 @@ static struct sk_buff if (priv->stampable_skb) { dev_err_ratelimited(ds->dev, "Expected meta frame, is %12llx " - "in the DSA master multicast filter?\n", + "in the DSA conduit multicast filter?\n", SJA1105_META_DMAC); kfree_skb(priv->stampable_skb); } @@ -417,7 +417,7 @@ static struct sk_buff * frame, which serves no further purpose). */ } else if (is_meta) { - struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct dsa_port *dp = dsa_user_to_port(skb->dev); struct sja1105_tagger_private *priv; struct dsa_switch *ds = dp->ds; struct sk_buff *stampable_skb; @@ -550,7 +550,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, } if (source_port != -1 && switch_id != -1) - skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port); else if (vbid >= 1) skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); else @@ -573,16 +573,16 @@ static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header) int switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header); int n_ts = SJA1110_RX_HEADER_N_TS(rx_header); struct sja1105_tagger_data *tagger_data; - struct net_device *master = skb->dev; + struct net_device *conduit = skb->dev; struct dsa_port *cpu_dp; struct dsa_switch *ds; int i; - cpu_dp = master->dsa_ptr; + cpu_dp = conduit->dsa_ptr; ds = dsa_switch_find(cpu_dp->dst->index, switch_id); if (!ds) { net_err_ratelimited("%s: cannot find switch id %d\n", - master->name, switch_id); + conduit->name, switch_id); return NULL; } @@ -649,7 +649,7 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, /* skb->len counts from skb->data, while start_of_padding * counts from the destination MAC address. Right now skb->data - * is still as set by the DSA master, so to trim away the + * is still as set by the DSA conduit, so to trim away the * padding and trailer we need to account for the fact that * skb->data points to skb_mac_header(skb) + ETH_HLEN. */ @@ -698,7 +698,7 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb, else if (source_port == -1 || switch_id == -1) skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); else - skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port); if (!skb->dev) { netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; @@ -778,7 +778,7 @@ static const struct dsa_device_ops sja1105_netdev_ops = { .disconnect = sja1105_disconnect, .needed_headroom = VLAN_HLEN, .flow_dissect = sja1105_flow_dissect, - .promisc_on_master = true, + .promisc_on_conduit = true, }; DSA_TAG_DRIVER(sja1105_netdev_ops); diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 7361b9106382..1ebb25a8b140 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -14,7 +14,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); u8 *trailer; trailer = skb_put(skb, 4); @@ -41,7 +41,7 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev) source_port = trailer[1] & 7; - skb->dev = dsa_master_find_slave(dev, 0, source_port); + skb->dev = dsa_conduit_find_user(dev, 0, source_port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c index af19969f9bc4..c9c163598ef2 100644 --- a/net/dsa/tag_xrs700x.c +++ b/net/dsa/tag_xrs700x.c @@ -13,7 +13,7 @@ static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *partner, *dp = dsa_slave_to_port(dev); + struct dsa_port *partner, *dp = dsa_user_to_port(dev); u8 *trailer; trailer = skb_put(skb, 1); @@ -39,7 +39,7 @@ static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev) if (source_port < 0) return NULL; - skb->dev = dsa_master_find_slave(dev, 0, source_port); + skb->dev = dsa_conduit_find_user(dev, 0, source_port); if (!skb->dev) return NULL; diff --git a/net/dsa/user.c b/net/dsa/user.c new file mode 100644 index 000000000000..d438884a4eb0 --- /dev/null +++ b/net/dsa/user.c @@ -0,0 +1,3727 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * net/dsa/user.c - user device handling + * Copyright (c) 2008-2009 Marvell Semiconductor + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "conduit.h" +#include "dsa.h" +#include "netlink.h" +#include "port.h" +#include "switch.h" +#include "tag.h" +#include "user.h" + +struct dsa_switchdev_event_work { + struct net_device *dev; + struct net_device *orig_dev; + struct work_struct work; + unsigned long event; + /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and + * SWITCHDEV_FDB_DEL_TO_DEVICE + */ + unsigned char addr[ETH_ALEN]; + u16 vid; + bool host_addr; +}; + +enum dsa_standalone_event { + DSA_UC_ADD, + DSA_UC_DEL, + DSA_MC_ADD, + DSA_MC_DEL, +}; + +struct dsa_standalone_event_work { + struct work_struct work; + struct net_device *dev; + enum dsa_standalone_event event; + unsigned char addr[ETH_ALEN]; + u16 vid; +}; + +struct dsa_host_vlan_rx_filtering_ctx { + struct net_device *dev; + const unsigned char *addr; + enum dsa_standalone_event event; +}; + +static bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds) +{ + return ds->ops->port_fdb_add && ds->ops->port_fdb_del && + ds->fdb_isolation && !ds->vlan_filtering_is_global && + !ds->needs_standalone_vlan_filtering; +} + +static bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds) +{ + return ds->ops->port_mdb_add && ds->ops->port_mdb_del && + ds->fdb_isolation && !ds->vlan_filtering_is_global && + !ds->needs_standalone_vlan_filtering; +} + +static void dsa_user_standalone_event_work(struct work_struct *work) +{ + struct dsa_standalone_event_work *standalone_work = + container_of(work, struct dsa_standalone_event_work, work); + const unsigned char *addr = standalone_work->addr; + struct net_device *dev = standalone_work->dev; + struct dsa_port *dp = dsa_user_to_port(dev); + struct switchdev_obj_port_mdb mdb; + struct dsa_switch *ds = dp->ds; + u16 vid = standalone_work->vid; + int err; + + switch (standalone_work->event) { + case DSA_UC_ADD: + err = dsa_port_standalone_host_fdb_add(dp, addr, vid); + if (err) { + dev_err(ds->dev, + "port %d failed to add %pM vid %d to fdb: %d\n", + dp->index, addr, vid, err); + break; + } + break; + + case DSA_UC_DEL: + err = dsa_port_standalone_host_fdb_del(dp, addr, vid); + if (err) { + dev_err(ds->dev, + "port %d failed to delete %pM vid %d from fdb: %d\n", + dp->index, addr, vid, err); + } + + break; + case DSA_MC_ADD: + ether_addr_copy(mdb.addr, addr); + mdb.vid = vid; + + err = dsa_port_standalone_host_mdb_add(dp, &mdb); + if (err) { + dev_err(ds->dev, + "port %d failed to add %pM vid %d to mdb: %d\n", + dp->index, addr, vid, err); + break; + } + break; + case DSA_MC_DEL: + ether_addr_copy(mdb.addr, addr); + mdb.vid = vid; + + err = dsa_port_standalone_host_mdb_del(dp, &mdb); + if (err) { + dev_err(ds->dev, + "port %d failed to delete %pM vid %d from mdb: %d\n", + dp->index, addr, vid, err); + } + + break; + } + + kfree(standalone_work); +} + +static int dsa_user_schedule_standalone_work(struct net_device *dev, + enum dsa_standalone_event event, + const unsigned char *addr, + u16 vid) +{ + struct dsa_standalone_event_work *standalone_work; + + standalone_work = kzalloc(sizeof(*standalone_work), GFP_ATOMIC); + if (!standalone_work) + return -ENOMEM; + + INIT_WORK(&standalone_work->work, dsa_user_standalone_event_work); + standalone_work->event = event; + standalone_work->dev = dev; + + ether_addr_copy(standalone_work->addr, addr); + standalone_work->vid = vid; + + dsa_schedule_work(&standalone_work->work); + + return 0; +} + +static int dsa_user_host_vlan_rx_filtering(void *arg, int vid) +{ + struct dsa_host_vlan_rx_filtering_ctx *ctx = arg; + + return dsa_user_schedule_standalone_work(ctx->dev, ctx->event, + ctx->addr, vid); +} + +static int dsa_user_vlan_for_each(struct net_device *dev, + int (*cb)(void *arg, int vid), void *arg) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_vlan *v; + int err; + + lockdep_assert_held(&dev->addr_list_lock); + + err = cb(arg, 0); + if (err) + return err; + + list_for_each_entry(v, &dp->user_vlans, list) { + err = cb(arg, v->vid); + if (err) + return err; + } + + return 0; +} + +static int dsa_user_sync_uc(struct net_device *dev, + const unsigned char *addr) +{ + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_host_vlan_rx_filtering_ctx ctx = { + .dev = dev, + .addr = addr, + .event = DSA_UC_ADD, + }; + + dev_uc_add(conduit, addr); + + if (!dsa_switch_supports_uc_filtering(dp->ds)) + return 0; + + return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, + &ctx); +} + +static int dsa_user_unsync_uc(struct net_device *dev, + const unsigned char *addr) +{ + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_host_vlan_rx_filtering_ctx ctx = { + .dev = dev, + .addr = addr, + .event = DSA_UC_DEL, + }; + + dev_uc_del(conduit, addr); + + if (!dsa_switch_supports_uc_filtering(dp->ds)) + return 0; + + return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, + &ctx); +} + +static int dsa_user_sync_mc(struct net_device *dev, + const unsigned char *addr) +{ + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_host_vlan_rx_filtering_ctx ctx = { + .dev = dev, + .addr = addr, + .event = DSA_MC_ADD, + }; + + dev_mc_add(conduit, addr); + + if (!dsa_switch_supports_mc_filtering(dp->ds)) + return 0; + + return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, + &ctx); +} + +static int dsa_user_unsync_mc(struct net_device *dev, + const unsigned char *addr) +{ + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_host_vlan_rx_filtering_ctx ctx = { + .dev = dev, + .addr = addr, + .event = DSA_MC_DEL, + }; + + dev_mc_del(conduit, addr); + + if (!dsa_switch_supports_mc_filtering(dp->ds)) + return 0; + + return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, + &ctx); +} + +void dsa_user_sync_ha(struct net_device *dev) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(dev); + + netdev_for_each_synced_mc_addr(ha, dev) + dsa_user_sync_mc(dev, ha->addr); + + netdev_for_each_synced_uc_addr(ha, dev) + dsa_user_sync_uc(dev, ha->addr); + + netif_addr_unlock_bh(dev); + + if (dsa_switch_supports_uc_filtering(ds) || + dsa_switch_supports_mc_filtering(ds)) + dsa_flush_workqueue(); +} + +void dsa_user_unsync_ha(struct net_device *dev) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(dev); + + netdev_for_each_synced_uc_addr(ha, dev) + dsa_user_unsync_uc(dev, ha->addr); + + netdev_for_each_synced_mc_addr(ha, dev) + dsa_user_unsync_mc(dev, ha->addr); + + netif_addr_unlock_bh(dev); + + if (dsa_switch_supports_uc_filtering(ds) || + dsa_switch_supports_mc_filtering(ds)) + dsa_flush_workqueue(); +} + +/* user mii_bus handling ***************************************************/ +static int dsa_user_phy_read(struct mii_bus *bus, int addr, int reg) +{ + struct dsa_switch *ds = bus->priv; + + if (ds->phys_mii_mask & (1 << addr)) + return ds->ops->phy_read(ds, addr, reg); + + return 0xffff; +} + +static int dsa_user_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) +{ + struct dsa_switch *ds = bus->priv; + + if (ds->phys_mii_mask & (1 << addr)) + return ds->ops->phy_write(ds, addr, reg, val); + + return 0; +} + +void dsa_user_mii_bus_init(struct dsa_switch *ds) +{ + ds->user_mii_bus->priv = (void *)ds; + ds->user_mii_bus->name = "dsa user smi"; + ds->user_mii_bus->read = dsa_user_phy_read; + ds->user_mii_bus->write = dsa_user_phy_write; + snprintf(ds->user_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d", + ds->dst->index, ds->index); + ds->user_mii_bus->parent = ds->dev; + ds->user_mii_bus->phy_mask = ~ds->phys_mii_mask; +} + + +/* user device handling ****************************************************/ +static int dsa_user_get_iflink(const struct net_device *dev) +{ + return dsa_user_to_conduit(dev)->ifindex; +} + +static int dsa_user_open(struct net_device *dev) +{ + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + int err; + + err = dev_open(conduit, NULL); + if (err < 0) { + netdev_err(dev, "failed to open conduit %s\n", conduit->name); + goto out; + } + + if (dsa_switch_supports_uc_filtering(ds)) { + err = dsa_port_standalone_host_fdb_add(dp, dev->dev_addr, 0); + if (err) + goto out; + } + + if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) { + err = dev_uc_add(conduit, dev->dev_addr); + if (err < 0) + goto del_host_addr; + } + + err = dsa_port_enable_rt(dp, dev->phydev); + if (err) + goto del_unicast; + + return 0; + +del_unicast: + if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) + dev_uc_del(conduit, dev->dev_addr); +del_host_addr: + if (dsa_switch_supports_uc_filtering(ds)) + dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); +out: + return err; +} + +static int dsa_user_close(struct net_device *dev) +{ + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + dsa_port_disable_rt(dp); + + if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) + dev_uc_del(conduit, dev->dev_addr); + + if (dsa_switch_supports_uc_filtering(ds)) + dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); + + return 0; +} + +static void dsa_user_manage_host_flood(struct net_device *dev) +{ + bool mc = dev->flags & (IFF_PROMISC | IFF_ALLMULTI); + struct dsa_port *dp = dsa_user_to_port(dev); + bool uc = dev->flags & IFF_PROMISC; + + dsa_port_set_host_flood(dp, uc, mc); +} + +static void dsa_user_change_rx_flags(struct net_device *dev, int change) +{ + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(conduit, + dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(conduit, + dev->flags & IFF_PROMISC ? 1 : -1); + + if (dsa_switch_supports_uc_filtering(ds) && + dsa_switch_supports_mc_filtering(ds)) + dsa_user_manage_host_flood(dev); +} + +static void dsa_user_set_rx_mode(struct net_device *dev) +{ + __dev_mc_sync(dev, dsa_user_sync_mc, dsa_user_unsync_mc); + __dev_uc_sync(dev, dsa_user_sync_uc, dsa_user_unsync_uc); +} + +static int dsa_user_set_mac_address(struct net_device *dev, void *a) +{ + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct sockaddr *addr = a; + int err; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + if (ds->ops->port_set_mac_address) { + err = ds->ops->port_set_mac_address(ds, dp->index, + addr->sa_data); + if (err) + return err; + } + + /* If the port is down, the address isn't synced yet to hardware or + * to the DSA conduit, so there is nothing to change. + */ + if (!(dev->flags & IFF_UP)) + goto out_change_dev_addr; + + if (dsa_switch_supports_uc_filtering(ds)) { + err = dsa_port_standalone_host_fdb_add(dp, addr->sa_data, 0); + if (err) + return err; + } + + if (!ether_addr_equal(addr->sa_data, conduit->dev_addr)) { + err = dev_uc_add(conduit, addr->sa_data); + if (err < 0) + goto del_unicast; + } + + if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) + dev_uc_del(conduit, dev->dev_addr); + + if (dsa_switch_supports_uc_filtering(ds)) + dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); + +out_change_dev_addr: + eth_hw_addr_set(dev, addr->sa_data); + + return 0; + +del_unicast: + if (dsa_switch_supports_uc_filtering(ds)) + dsa_port_standalone_host_fdb_del(dp, addr->sa_data, 0); + + return err; +} + +struct dsa_user_dump_ctx { + struct net_device *dev; + struct sk_buff *skb; + struct netlink_callback *cb; + int idx; +}; + +static int +dsa_user_port_fdb_do_dump(const unsigned char *addr, u16 vid, + bool is_static, void *data) +{ + struct dsa_user_dump_ctx *dump = data; + u32 portid = NETLINK_CB(dump->cb->skb).portid; + u32 seq = dump->cb->nlh->nlmsg_seq; + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + if (dump->idx < dump->cb->args[2]) + goto skip; + + nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, + sizeof(*ndm), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = NTF_SELF; + ndm->ndm_type = 0; + ndm->ndm_ifindex = dump->dev->ifindex; + ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; + + if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, addr)) + goto nla_put_failure; + + if (vid && nla_put_u16(dump->skb, NDA_VLAN, vid)) + goto nla_put_failure; + + nlmsg_end(dump->skb, nlh); + +skip: + dump->idx++; + return 0; + +nla_put_failure: + nlmsg_cancel(dump->skb, nlh); + return -EMSGSIZE; +} + +static int +dsa_user_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, struct net_device *filter_dev, + int *idx) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_user_dump_ctx dump = { + .dev = dev, + .skb = skb, + .cb = cb, + .idx = *idx, + }; + int err; + + err = dsa_port_fdb_dump(dp, dsa_user_port_fdb_do_dump, &dump); + *idx = dump.idx; + + return err; +} + +static int dsa_user_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct dsa_user_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->dp->ds; + int port = p->dp->index; + + /* Pass through to switch driver if it supports timestamping */ + switch (cmd) { + case SIOCGHWTSTAMP: + if (ds->ops->port_hwtstamp_get) + return ds->ops->port_hwtstamp_get(ds, port, ifr); + break; + case SIOCSHWTSTAMP: + if (ds->ops->port_hwtstamp_set) + return ds->ops->port_hwtstamp_set(ds, port, ifr); + break; + } + + return phylink_mii_ioctl(p->dp->pl, ifr, cmd); +} + +static int dsa_user_port_attr_set(struct net_device *dev, const void *ctx, + const struct switchdev_attr *attr, + struct netlink_ext_ack *extack) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + int ret; + + if (ctx && ctx != dp) + return 0; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_set_state(dp, attr->u.stp_state, true); + break; + case SWITCHDEV_ATTR_ID_PORT_MST_STATE: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_set_mst_state(dp, &attr->u.mst_state, extack); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: + if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering, + extack); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_ageing_time(dp, attr->u.ageing_time); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_MST: + if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_mst_enable(dp, attr->u.mst, extack); + break; + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags, + extack); + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack); + break; + case SWITCHDEV_ATTR_ID_VLAN_MSTI: + if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_vlan_msti(dp, &attr->u.vlan_msti); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + +/* Must be called under rcu_read_lock() */ +static int +dsa_user_vlan_check_for_8021q_uppers(struct net_device *user, + const struct switchdev_obj_port_vlan *vlan) +{ + struct net_device *upper_dev; + struct list_head *iter; + + netdev_for_each_upper_dev_rcu(user, upper_dev, iter) { + u16 vid; + + if (!is_vlan_dev(upper_dev)) + continue; + + vid = vlan_dev_vlan_id(upper_dev); + if (vid == vlan->vid) + return -EBUSY; + } + + return 0; +} + +static int dsa_user_vlan_add(struct net_device *dev, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct switchdev_obj_port_vlan *vlan; + int err; + + if (dsa_port_skip_vlan_configuration(dp)) { + NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN"); + return 0; + } + + vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + + /* Deny adding a bridge VLAN when there is already an 802.1Q upper with + * the same VID. + */ + if (br_vlan_enabled(dsa_port_bridge_dev_get(dp))) { + rcu_read_lock(); + err = dsa_user_vlan_check_for_8021q_uppers(dev, vlan); + rcu_read_unlock(); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Port already has a VLAN upper with this VID"); + return err; + } + } + + return dsa_port_vlan_add(dp, vlan, extack); +} + +/* Offload a VLAN installed on the bridge or on a foreign interface by + * installing it as a VLAN towards the CPU port. + */ +static int dsa_user_host_vlan_add(struct net_device *dev, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct switchdev_obj_port_vlan vlan; + + /* Do nothing if this is a software bridge */ + if (!dp->bridge) + return -EOPNOTSUPP; + + if (dsa_port_skip_vlan_configuration(dp)) { + NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN"); + return 0; + } + + vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj); + + /* Even though drivers often handle CPU membership in special ways, + * it doesn't make sense to program a PVID, so clear this flag. + */ + vlan.flags &= ~BRIDGE_VLAN_INFO_PVID; + + return dsa_port_host_vlan_add(dp, &vlan, extack); +} + +static int dsa_user_port_obj_add(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + int err; + + if (ctx && ctx != dp) + return 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_MDB: + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); + break; + case SWITCHDEV_OBJ_ID_HOST_MDB: + if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_bridge_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); + break; + case SWITCHDEV_OBJ_ID_PORT_VLAN: + if (dsa_port_offloads_bridge_port(dp, obj->orig_dev)) + err = dsa_user_vlan_add(dev, obj, extack); + else + err = dsa_user_host_vlan_add(dev, obj, extack); + break; + case SWITCHDEV_OBJ_ID_MRP: + if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_mrp_add(dp, SWITCHDEV_OBJ_MRP(obj)); + break; + case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: + if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_mrp_add_ring_role(dp, + SWITCHDEV_OBJ_RING_ROLE_MRP(obj)); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int dsa_user_vlan_del(struct net_device *dev, + const struct switchdev_obj *obj) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct switchdev_obj_port_vlan *vlan; + + if (dsa_port_skip_vlan_configuration(dp)) + return 0; + + vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + + return dsa_port_vlan_del(dp, vlan); +} + +static int dsa_user_host_vlan_del(struct net_device *dev, + const struct switchdev_obj *obj) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct switchdev_obj_port_vlan *vlan; + + /* Do nothing if this is a software bridge */ + if (!dp->bridge) + return -EOPNOTSUPP; + + if (dsa_port_skip_vlan_configuration(dp)) + return 0; + + vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + + return dsa_port_host_vlan_del(dp, vlan); +} + +static int dsa_user_port_obj_del(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + int err; + + if (ctx && ctx != dp) + return 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_MDB: + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); + break; + case SWITCHDEV_OBJ_ID_HOST_MDB: + if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_bridge_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); + break; + case SWITCHDEV_OBJ_ID_PORT_VLAN: + if (dsa_port_offloads_bridge_port(dp, obj->orig_dev)) + err = dsa_user_vlan_del(dev, obj); + else + err = dsa_user_host_vlan_del(dev, obj); + break; + case SWITCHDEV_OBJ_ID_MRP: + if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_mrp_del(dp, SWITCHDEV_OBJ_MRP(obj)); + break; + case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: + if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) + return -EOPNOTSUPP; + + err = dsa_port_mrp_del_ring_role(dp, + SWITCHDEV_OBJ_RING_ROLE_MRP(obj)); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static inline netdev_tx_t dsa_user_netpoll_send_skb(struct net_device *dev, + struct sk_buff *skb) +{ +#ifdef CONFIG_NET_POLL_CONTROLLER + struct dsa_user_priv *p = netdev_priv(dev); + + return netpoll_send_skb(p->netpoll, skb); +#else + BUG(); + return NETDEV_TX_OK; +#endif +} + +static void dsa_skb_tx_timestamp(struct dsa_user_priv *p, + struct sk_buff *skb) +{ + struct dsa_switch *ds = p->dp->ds; + + if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + return; + + if (!ds->ops->port_txtstamp) + return; + + ds->ops->port_txtstamp(ds, p->dp->index, skb); +} + +netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev) +{ + /* SKB for netpoll still need to be mangled with the protocol-specific + * tag to be successfully transmitted + */ + if (unlikely(netpoll_tx_running(dev))) + return dsa_user_netpoll_send_skb(dev, skb); + + /* Queue the SKB for transmission on the parent interface, but + * do not modify its EtherType + */ + skb->dev = dsa_user_to_conduit(dev); + dev_queue_xmit(skb); + + return NETDEV_TX_OK; +} +EXPORT_SYMBOL_GPL(dsa_enqueue_skb); + +static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev) +{ + int needed_headroom = dev->needed_headroom; + int needed_tailroom = dev->needed_tailroom; + + /* For tail taggers, we need to pad short frames ourselves, to ensure + * that the tail tag does not fail at its role of being at the end of + * the packet, once the conduit interface pads the frame. Account for + * that pad length here, and pad later. + */ + if (unlikely(needed_tailroom && skb->len < ETH_ZLEN)) + needed_tailroom += ETH_ZLEN - skb->len; + /* skb_headroom() returns unsigned int... */ + needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0); + needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0); + + if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb))) + /* No reallocation needed, yay! */ + return 0; + + return pskb_expand_head(skb, needed_headroom, needed_tailroom, + GFP_ATOMIC); +} + +static netdev_tx_t dsa_user_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_user_priv *p = netdev_priv(dev); + struct sk_buff *nskb; + + dev_sw_netstats_tx_add(dev, 1, skb->len); + + memset(skb->cb, 0, sizeof(skb->cb)); + + /* Handle tx timestamp if any */ + dsa_skb_tx_timestamp(p, skb); + + if (dsa_realloc_skb(skb, dev)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* needed_tailroom should still be 'warm' in the cache line from + * dsa_realloc_skb(), which has also ensured that padding is safe. + */ + if (dev->needed_tailroom) + eth_skb_pad(skb); + + /* Transmit function may have to reallocate the original SKB, + * in which case it must have freed it. Only free it here on error. + */ + nskb = p->xmit(skb, dev); + if (!nskb) { + kfree_skb(skb); + return NETDEV_TX_OK; + } + + return dsa_enqueue_skb(nskb, dev); +} + +/* ethtool operations *******************************************************/ + +static void dsa_user_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strscpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver)); + strscpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version)); + strscpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info)); +} + +static int dsa_user_get_regs_len(struct net_device *dev) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_regs_len) + return ds->ops->get_regs_len(ds, dp->index); + + return -EOPNOTSUPP; +} + +static void +dsa_user_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_regs) + ds->ops->get_regs(ds, dp->index, regs, _p); +} + +static int dsa_user_nway_reset(struct net_device *dev) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + + return phylink_ethtool_nway_reset(dp->pl); +} + +static int dsa_user_get_eeprom_len(struct net_device *dev) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->cd && ds->cd->eeprom_len) + return ds->cd->eeprom_len; + + if (ds->ops->get_eeprom_len) + return ds->ops->get_eeprom_len(ds); + + return 0; +} + +static int dsa_user_get_eeprom(struct net_device *dev, + struct ethtool_eeprom *eeprom, u8 *data) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eeprom) + return ds->ops->get_eeprom(ds, eeprom, data); + + return -EOPNOTSUPP; +} + +static int dsa_user_set_eeprom(struct net_device *dev, + struct ethtool_eeprom *eeprom, u8 *data) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->set_eeprom) + return ds->ops->set_eeprom(ds, eeprom, data); + + return -EOPNOTSUPP; +} + +static void dsa_user_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (stringset == ETH_SS_STATS) { + int len = ETH_GSTRING_LEN; + + strscpy_pad(data, "tx_packets", len); + strscpy_pad(data + len, "tx_bytes", len); + strscpy_pad(data + 2 * len, "rx_packets", len); + strscpy_pad(data + 3 * len, "rx_bytes", len); + if (ds->ops->get_strings) + ds->ops->get_strings(ds, dp->index, stringset, + data + 4 * len); + } else if (stringset == ETH_SS_TEST) { + net_selftest_get_strings(data); + } + +} + +static void dsa_user_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct pcpu_sw_netstats *s; + unsigned int start; + int i; + + for_each_possible_cpu(i) { + u64 tx_packets, tx_bytes, rx_packets, rx_bytes; + + s = per_cpu_ptr(dev->tstats, i); + do { + start = u64_stats_fetch_begin(&s->syncp); + tx_packets = u64_stats_read(&s->tx_packets); + tx_bytes = u64_stats_read(&s->tx_bytes); + rx_packets = u64_stats_read(&s->rx_packets); + rx_bytes = u64_stats_read(&s->rx_bytes); + } while (u64_stats_fetch_retry(&s->syncp, start)); + data[0] += tx_packets; + data[1] += tx_bytes; + data[2] += rx_packets; + data[3] += rx_bytes; + } + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, dp->index, data + 4); +} + +static int dsa_user_get_sset_count(struct net_device *dev, int sset) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (sset == ETH_SS_STATS) { + int count = 0; + + if (ds->ops->get_sset_count) { + count = ds->ops->get_sset_count(ds, dp->index, sset); + if (count < 0) + return count; + } + + return count + 4; + } else if (sset == ETH_SS_TEST) { + return net_selftest_get_count(); + } + + return -EOPNOTSUPP; +} + +static void dsa_user_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eth_phy_stats) + ds->ops->get_eth_phy_stats(ds, dp->index, phy_stats); +} + +static void dsa_user_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eth_mac_stats) + ds->ops->get_eth_mac_stats(ds, dp->index, mac_stats); +} + +static void +dsa_user_get_eth_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eth_ctrl_stats) + ds->ops->get_eth_ctrl_stats(ds, dp->index, ctrl_stats); +} + +static void +dsa_user_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_rmon_stats) + ds->ops->get_rmon_stats(ds, dp->index, rmon_stats, ranges); +} + +static void dsa_user_net_selftest(struct net_device *ndev, + struct ethtool_test *etest, u64 *buf) +{ + struct dsa_port *dp = dsa_user_to_port(ndev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->self_test) { + ds->ops->self_test(ds, dp->index, etest, buf); + return; + } + + net_selftest(ndev, etest, buf); +} + +static int dsa_user_get_mm(struct net_device *dev, + struct ethtool_mm_state *state) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->get_mm) + return -EOPNOTSUPP; + + return ds->ops->get_mm(ds, dp->index, state); +} + +static int dsa_user_set_mm(struct net_device *dev, struct ethtool_mm_cfg *cfg, + struct netlink_ext_ack *extack) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->set_mm) + return -EOPNOTSUPP; + + return ds->ops->set_mm(ds, dp->index, cfg, extack); +} + +static void dsa_user_get_mm_stats(struct net_device *dev, + struct ethtool_mm_stats *stats) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_mm_stats) + ds->ops->get_mm_stats(ds, dp->index, stats); +} + +static void dsa_user_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + phylink_ethtool_get_wol(dp->pl, w); + + if (ds->ops->get_wol) + ds->ops->get_wol(ds, dp->index, w); +} + +static int dsa_user_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + int ret = -EOPNOTSUPP; + + phylink_ethtool_set_wol(dp->pl, w); + + if (ds->ops->set_wol) + ret = ds->ops->set_wol(ds, dp->index, w); + + return ret; +} + +static int dsa_user_set_eee(struct net_device *dev, struct ethtool_eee *e) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + int ret; + + /* Port's PHY and MAC both need to be EEE capable */ + if (!dev->phydev || !dp->pl) + return -ENODEV; + + if (!ds->ops->set_mac_eee) + return -EOPNOTSUPP; + + ret = ds->ops->set_mac_eee(ds, dp->index, e); + if (ret) + return ret; + + return phylink_ethtool_set_eee(dp->pl, e); +} + +static int dsa_user_get_eee(struct net_device *dev, struct ethtool_eee *e) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + int ret; + + /* Port's PHY and MAC both need to be EEE capable */ + if (!dev->phydev || !dp->pl) + return -ENODEV; + + if (!ds->ops->get_mac_eee) + return -EOPNOTSUPP; + + ret = ds->ops->get_mac_eee(ds, dp->index, e); + if (ret) + return ret; + + return phylink_ethtool_get_eee(dp->pl, e); +} + +static int dsa_user_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + + return phylink_ethtool_ksettings_get(dp->pl, cmd); +} + +static int dsa_user_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + + return phylink_ethtool_ksettings_set(dp->pl, cmd); +} + +static void dsa_user_get_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *pause_stats) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_pause_stats) + ds->ops->get_pause_stats(ds, dp->index, pause_stats); +} + +static void dsa_user_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + + phylink_ethtool_get_pauseparam(dp->pl, pause); +} + +static int dsa_user_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + + return phylink_ethtool_set_pauseparam(dp->pl, pause); +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static int dsa_user_netpoll_setup(struct net_device *dev, + struct netpoll_info *ni) +{ + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_user_priv *p = netdev_priv(dev); + struct netpoll *netpoll; + int err = 0; + + netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); + if (!netpoll) + return -ENOMEM; + + err = __netpoll_setup(netpoll, conduit); + if (err) { + kfree(netpoll); + goto out; + } + + p->netpoll = netpoll; +out: + return err; +} + +static void dsa_user_netpoll_cleanup(struct net_device *dev) +{ + struct dsa_user_priv *p = netdev_priv(dev); + struct netpoll *netpoll = p->netpoll; + + if (!netpoll) + return; + + p->netpoll = NULL; + + __netpoll_free(netpoll); +} + +static void dsa_user_poll_controller(struct net_device *dev) +{ +} +#endif + +static struct dsa_mall_tc_entry * +dsa_user_mall_tc_entry_find(struct net_device *dev, unsigned long cookie) +{ + struct dsa_user_priv *p = netdev_priv(dev); + struct dsa_mall_tc_entry *mall_tc_entry; + + list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) + if (mall_tc_entry->cookie == cookie) + return mall_tc_entry; + + return NULL; +} + +static int +dsa_user_add_cls_matchall_mirred(struct net_device *dev, + struct tc_cls_matchall_offload *cls, + bool ingress) +{ + struct netlink_ext_ack *extack = cls->common.extack; + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_user_priv *p = netdev_priv(dev); + struct dsa_mall_mirror_tc_entry *mirror; + struct dsa_mall_tc_entry *mall_tc_entry; + struct dsa_switch *ds = dp->ds; + struct flow_action_entry *act; + struct dsa_port *to_dp; + int err; + + if (!ds->ops->port_mirror_add) + return -EOPNOTSUPP; + + if (!flow_action_basic_hw_stats_check(&cls->rule->action, + cls->common.extack)) + return -EOPNOTSUPP; + + act = &cls->rule->action.entries[0]; + + if (!act->dev) + return -EINVAL; + + if (!dsa_user_dev_check(act->dev)) + return -EOPNOTSUPP; + + mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); + if (!mall_tc_entry) + return -ENOMEM; + + mall_tc_entry->cookie = cls->cookie; + mall_tc_entry->type = DSA_PORT_MALL_MIRROR; + mirror = &mall_tc_entry->mirror; + + to_dp = dsa_user_to_port(act->dev); + + mirror->to_local_port = to_dp->index; + mirror->ingress = ingress; + + err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress, extack); + if (err) { + kfree(mall_tc_entry); + return err; + } + + list_add_tail(&mall_tc_entry->list, &p->mall_tc_list); + + return err; +} + +static int +dsa_user_add_cls_matchall_police(struct net_device *dev, + struct tc_cls_matchall_offload *cls, + bool ingress) +{ + struct netlink_ext_ack *extack = cls->common.extack; + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_user_priv *p = netdev_priv(dev); + struct dsa_mall_policer_tc_entry *policer; + struct dsa_mall_tc_entry *mall_tc_entry; + struct dsa_switch *ds = dp->ds; + struct flow_action_entry *act; + int err; + + if (!ds->ops->port_policer_add) { + NL_SET_ERR_MSG_MOD(extack, + "Policing offload not implemented"); + return -EOPNOTSUPP; + } + + if (!ingress) { + NL_SET_ERR_MSG_MOD(extack, + "Only supported on ingress qdisc"); + return -EOPNOTSUPP; + } + + if (!flow_action_basic_hw_stats_check(&cls->rule->action, + cls->common.extack)) + return -EOPNOTSUPP; + + list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) { + if (mall_tc_entry->type == DSA_PORT_MALL_POLICER) { + NL_SET_ERR_MSG_MOD(extack, + "Only one port policer allowed"); + return -EEXIST; + } + } + + act = &cls->rule->action.entries[0]; + + mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); + if (!mall_tc_entry) + return -ENOMEM; + + mall_tc_entry->cookie = cls->cookie; + mall_tc_entry->type = DSA_PORT_MALL_POLICER; + policer = &mall_tc_entry->policer; + policer->rate_bytes_per_sec = act->police.rate_bytes_ps; + policer->burst = act->police.burst; + + err = ds->ops->port_policer_add(ds, dp->index, policer); + if (err) { + kfree(mall_tc_entry); + return err; + } + + list_add_tail(&mall_tc_entry->list, &p->mall_tc_list); + + return err; +} + +static int dsa_user_add_cls_matchall(struct net_device *dev, + struct tc_cls_matchall_offload *cls, + bool ingress) +{ + int err = -EOPNOTSUPP; + + if (cls->common.protocol == htons(ETH_P_ALL) && + flow_offload_has_one_action(&cls->rule->action) && + cls->rule->action.entries[0].id == FLOW_ACTION_MIRRED) + err = dsa_user_add_cls_matchall_mirred(dev, cls, ingress); + else if (flow_offload_has_one_action(&cls->rule->action) && + cls->rule->action.entries[0].id == FLOW_ACTION_POLICE) + err = dsa_user_add_cls_matchall_police(dev, cls, ingress); + + return err; +} + +static void dsa_user_del_cls_matchall(struct net_device *dev, + struct tc_cls_matchall_offload *cls) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_mall_tc_entry *mall_tc_entry; + struct dsa_switch *ds = dp->ds; + + mall_tc_entry = dsa_user_mall_tc_entry_find(dev, cls->cookie); + if (!mall_tc_entry) + return; + + list_del(&mall_tc_entry->list); + + switch (mall_tc_entry->type) { + case DSA_PORT_MALL_MIRROR: + if (ds->ops->port_mirror_del) + ds->ops->port_mirror_del(ds, dp->index, + &mall_tc_entry->mirror); + break; + case DSA_PORT_MALL_POLICER: + if (ds->ops->port_policer_del) + ds->ops->port_policer_del(ds, dp->index); + break; + default: + WARN_ON(1); + } + + kfree(mall_tc_entry); +} + +static int dsa_user_setup_tc_cls_matchall(struct net_device *dev, + struct tc_cls_matchall_offload *cls, + bool ingress) +{ + if (cls->common.chain_index) + return -EOPNOTSUPP; + + switch (cls->command) { + case TC_CLSMATCHALL_REPLACE: + return dsa_user_add_cls_matchall(dev, cls, ingress); + case TC_CLSMATCHALL_DESTROY: + dsa_user_del_cls_matchall(dev, cls); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int dsa_user_add_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->cls_flower_add) + return -EOPNOTSUPP; + + return ds->ops->cls_flower_add(ds, port, cls, ingress); +} + +static int dsa_user_del_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->cls_flower_del) + return -EOPNOTSUPP; + + return ds->ops->cls_flower_del(ds, port, cls, ingress); +} + +static int dsa_user_stats_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->cls_flower_stats) + return -EOPNOTSUPP; + + return ds->ops->cls_flower_stats(ds, port, cls, ingress); +} + +static int dsa_user_setup_tc_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) +{ + switch (cls->command) { + case FLOW_CLS_REPLACE: + return dsa_user_add_cls_flower(dev, cls, ingress); + case FLOW_CLS_DESTROY: + return dsa_user_del_cls_flower(dev, cls, ingress); + case FLOW_CLS_STATS: + return dsa_user_stats_cls_flower(dev, cls, ingress); + default: + return -EOPNOTSUPP; + } +} + +static int dsa_user_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv, bool ingress) +{ + struct net_device *dev = cb_priv; + + if (!tc_can_offload(dev)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSMATCHALL: + return dsa_user_setup_tc_cls_matchall(dev, type_data, ingress); + case TC_SETUP_CLSFLOWER: + return dsa_user_setup_tc_cls_flower(dev, type_data, ingress); + default: + return -EOPNOTSUPP; + } +} + +static int dsa_user_setup_tc_block_cb_ig(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + return dsa_user_setup_tc_block_cb(type, type_data, cb_priv, true); +} + +static int dsa_user_setup_tc_block_cb_eg(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + return dsa_user_setup_tc_block_cb(type, type_data, cb_priv, false); +} + +static LIST_HEAD(dsa_user_block_cb_list); + +static int dsa_user_setup_tc_block(struct net_device *dev, + struct flow_block_offload *f) +{ + struct flow_block_cb *block_cb; + flow_setup_cb_t *cb; + + if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + cb = dsa_user_setup_tc_block_cb_ig; + else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) + cb = dsa_user_setup_tc_block_cb_eg; + else + return -EOPNOTSUPP; + + f->driver_block_list = &dsa_user_block_cb_list; + + switch (f->command) { + case FLOW_BLOCK_BIND: + if (flow_block_cb_is_busy(cb, dev, &dsa_user_block_cb_list)) + return -EBUSY; + + block_cb = flow_block_cb_alloc(cb, dev, dev, NULL); + if (IS_ERR(block_cb)) + return PTR_ERR(block_cb); + + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &dsa_user_block_cb_list); + return 0; + case FLOW_BLOCK_UNBIND: + block_cb = flow_block_cb_lookup(f->block, cb, dev); + if (!block_cb) + return -ENOENT; + + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int dsa_user_setup_ft_block(struct dsa_switch *ds, int port, + void *type_data) +{ + struct net_device *conduit = dsa_port_to_conduit(dsa_to_port(ds, port)); + + if (!conduit->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + return conduit->netdev_ops->ndo_setup_tc(conduit, TC_SETUP_FT, type_data); +} + +static int dsa_user_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + switch (type) { + case TC_SETUP_BLOCK: + return dsa_user_setup_tc_block(dev, type_data); + case TC_SETUP_FT: + return dsa_user_setup_ft_block(ds, dp->index, type_data); + default: + break; + } + + if (!ds->ops->port_setup_tc) + return -EOPNOTSUPP; + + return ds->ops->port_setup_tc(ds, dp->index, type, type_data); +} + +static int dsa_user_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *nfc, u32 *rule_locs) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->get_rxnfc) + return -EOPNOTSUPP; + + return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs); +} + +static int dsa_user_set_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *nfc) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->set_rxnfc) + return -EOPNOTSUPP; + + return ds->ops->set_rxnfc(ds, dp->index, nfc); +} + +static int dsa_user_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *ts) +{ + struct dsa_user_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->dp->ds; + + if (!ds->ops->get_ts_info) + return -EOPNOTSUPP; + + return ds->ops->get_ts_info(ds, p->dp->index, ts); +} + +static int dsa_user_vlan_rx_add_vid(struct net_device *dev, __be16 proto, + u16 vid) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct switchdev_obj_port_vlan vlan = { + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .vid = vid, + /* This API only allows programming tagged, non-PVID VIDs */ + .flags = 0, + }; + struct netlink_ext_ack extack = {0}; + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; + struct dsa_vlan *v; + int ret; + + /* User port... */ + ret = dsa_port_vlan_add(dp, &vlan, &extack); + if (ret) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); + return ret; + } + + /* And CPU port... */ + ret = dsa_port_host_vlan_add(dp, &vlan, &extack); + if (ret) { + if (extack._msg) + netdev_err(dev, "CPU port %d: %s\n", dp->cpu_dp->index, + extack._msg); + return ret; + } + + if (!dsa_switch_supports_uc_filtering(ds) && + !dsa_switch_supports_mc_filtering(ds)) + return 0; + + v = kzalloc(sizeof(*v), GFP_KERNEL); + if (!v) { + ret = -ENOMEM; + goto rollback; + } + + netif_addr_lock_bh(dev); + + v->vid = vid; + list_add_tail(&v->list, &dp->user_vlans); + + if (dsa_switch_supports_mc_filtering(ds)) { + netdev_for_each_synced_mc_addr(ha, dev) { + dsa_user_schedule_standalone_work(dev, DSA_MC_ADD, + ha->addr, vid); + } + } + + if (dsa_switch_supports_uc_filtering(ds)) { + netdev_for_each_synced_uc_addr(ha, dev) { + dsa_user_schedule_standalone_work(dev, DSA_UC_ADD, + ha->addr, vid); + } + } + + netif_addr_unlock_bh(dev); + + dsa_flush_workqueue(); + + return 0; + +rollback: + dsa_port_host_vlan_del(dp, &vlan); + dsa_port_vlan_del(dp, &vlan); + + return ret; +} + +static int dsa_user_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, + u16 vid) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct switchdev_obj_port_vlan vlan = { + .vid = vid, + /* This API only allows programming tagged, non-PVID VIDs */ + .flags = 0, + }; + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; + struct dsa_vlan *v; + int err; + + err = dsa_port_vlan_del(dp, &vlan); + if (err) + return err; + + err = dsa_port_host_vlan_del(dp, &vlan); + if (err) + return err; + + if (!dsa_switch_supports_uc_filtering(ds) && + !dsa_switch_supports_mc_filtering(ds)) + return 0; + + netif_addr_lock_bh(dev); + + v = dsa_vlan_find(&dp->user_vlans, &vlan); + if (!v) { + netif_addr_unlock_bh(dev); + return -ENOENT; + } + + list_del(&v->list); + kfree(v); + + if (dsa_switch_supports_mc_filtering(ds)) { + netdev_for_each_synced_mc_addr(ha, dev) { + dsa_user_schedule_standalone_work(dev, DSA_MC_DEL, + ha->addr, vid); + } + } + + if (dsa_switch_supports_uc_filtering(ds)) { + netdev_for_each_synced_uc_addr(ha, dev) { + dsa_user_schedule_standalone_work(dev, DSA_UC_DEL, + ha->addr, vid); + } + } + + netif_addr_unlock_bh(dev); + + dsa_flush_workqueue(); + + return 0; +} + +static int dsa_user_restore_vlan(struct net_device *vdev, int vid, void *arg) +{ + __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q); + + return dsa_user_vlan_rx_add_vid(arg, proto, vid); +} + +static int dsa_user_clear_vlan(struct net_device *vdev, int vid, void *arg) +{ + __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q); + + return dsa_user_vlan_rx_kill_vid(arg, proto, vid); +} + +/* Keep the VLAN RX filtering list in sync with the hardware only if VLAN + * filtering is enabled. The baseline is that only ports that offload a + * VLAN-aware bridge are VLAN-aware, and standalone ports are VLAN-unaware, + * but there are exceptions for quirky hardware. + * + * If ds->vlan_filtering_is_global = true, then standalone ports which share + * the same switch with other ports that offload a VLAN-aware bridge are also + * inevitably VLAN-aware. + * + * To summarize, a DSA switch port offloads: + * + * - If standalone (this includes software bridge, software LAG): + * - if ds->needs_standalone_vlan_filtering = true, OR if + * (ds->vlan_filtering_is_global = true AND there are bridges spanning + * this switch chip which have vlan_filtering=1) + * - the 8021q upper VLANs + * - else (standalone VLAN filtering is not needed, VLAN filtering is not + * global, or it is, but no port is under a VLAN-aware bridge): + * - no VLAN (any 8021q upper is a software VLAN) + * + * - If under a vlan_filtering=0 bridge which it offload: + * - if ds->configure_vlan_while_not_filtering = true (default): + * - the bridge VLANs. These VLANs are committed to hardware but inactive. + * - else (deprecated): + * - no VLAN. The bridge VLANs are not restored when VLAN awareness is + * enabled, so this behavior is broken and discouraged. + * + * - If under a vlan_filtering=1 bridge which it offload: + * - the bridge VLANs + * - the 8021q upper VLANs + */ +int dsa_user_manage_vlan_filtering(struct net_device *user, + bool vlan_filtering) +{ + int err; + + if (vlan_filtering) { + user->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + + err = vlan_for_each(user, dsa_user_restore_vlan, user); + if (err) { + vlan_for_each(user, dsa_user_clear_vlan, user); + user->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + return err; + } + } else { + err = vlan_for_each(user, dsa_user_clear_vlan, user); + if (err) + return err; + + user->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + } + + return 0; +} + +struct dsa_hw_port { + struct list_head list; + struct net_device *dev; + int old_mtu; +}; + +static int dsa_hw_port_list_set_mtu(struct list_head *hw_port_list, int mtu) +{ + const struct dsa_hw_port *p; + int err; + + list_for_each_entry(p, hw_port_list, list) { + if (p->dev->mtu == mtu) + continue; + + err = dev_set_mtu(p->dev, mtu); + if (err) + goto rollback; + } + + return 0; + +rollback: + list_for_each_entry_continue_reverse(p, hw_port_list, list) { + if (p->dev->mtu == p->old_mtu) + continue; + + if (dev_set_mtu(p->dev, p->old_mtu)) + netdev_err(p->dev, "Failed to restore MTU\n"); + } + + return err; +} + +static void dsa_hw_port_list_free(struct list_head *hw_port_list) +{ + struct dsa_hw_port *p, *n; + + list_for_each_entry_safe(p, n, hw_port_list, list) + kfree(p); +} + +/* Make the hardware datapath to/from @dev limited to a common MTU */ +static void dsa_bridge_mtu_normalization(struct dsa_port *dp) +{ + struct list_head hw_port_list; + struct dsa_switch_tree *dst; + int min_mtu = ETH_MAX_MTU; + struct dsa_port *other_dp; + int err; + + if (!dp->ds->mtu_enforcement_ingress) + return; + + if (!dp->bridge) + return; + + INIT_LIST_HEAD(&hw_port_list); + + /* Populate the list of ports that are part of the same bridge + * as the newly added/modified port + */ + list_for_each_entry(dst, &dsa_tree_list, list) { + list_for_each_entry(other_dp, &dst->ports, list) { + struct dsa_hw_port *hw_port; + struct net_device *user; + + if (other_dp->type != DSA_PORT_TYPE_USER) + continue; + + if (!dsa_port_bridge_same(dp, other_dp)) + continue; + + if (!other_dp->ds->mtu_enforcement_ingress) + continue; + + user = other_dp->user; + + if (min_mtu > user->mtu) + min_mtu = user->mtu; + + hw_port = kzalloc(sizeof(*hw_port), GFP_KERNEL); + if (!hw_port) + goto out; + + hw_port->dev = user; + hw_port->old_mtu = user->mtu; + + list_add(&hw_port->list, &hw_port_list); + } + } + + /* Attempt to configure the entire hardware bridge to the newly added + * interface's MTU first, regardless of whether the intention of the + * user was to raise or lower it. + */ + err = dsa_hw_port_list_set_mtu(&hw_port_list, dp->user->mtu); + if (!err) + goto out; + + /* Clearly that didn't work out so well, so just set the minimum MTU on + * all hardware bridge ports now. If this fails too, then all ports will + * still have their old MTU rolled back anyway. + */ + dsa_hw_port_list_set_mtu(&hw_port_list, min_mtu); + +out: + dsa_hw_port_list_free(&hw_port_list); +} + +int dsa_user_change_mtu(struct net_device *dev, int new_mtu) +{ + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_port *cpu_dp = dp->cpu_dp; + struct dsa_switch *ds = dp->ds; + struct dsa_port *other_dp; + int largest_mtu = 0; + int new_conduit_mtu; + int old_conduit_mtu; + int mtu_limit; + int overhead; + int cpu_mtu; + int err; + + if (!ds->ops->port_change_mtu) + return -EOPNOTSUPP; + + dsa_tree_for_each_user_port(other_dp, ds->dst) { + int user_mtu; + + /* During probe, this function will be called for each user + * device, while not all of them have been allocated. That's + * ok, it doesn't change what the maximum is, so ignore it. + */ + if (!other_dp->user) + continue; + + /* Pretend that we already applied the setting, which we + * actually haven't (still haven't done all integrity checks) + */ + if (dp == other_dp) + user_mtu = new_mtu; + else + user_mtu = other_dp->user->mtu; + + if (largest_mtu < user_mtu) + largest_mtu = user_mtu; + } + + overhead = dsa_tag_protocol_overhead(cpu_dp->tag_ops); + mtu_limit = min_t(int, conduit->max_mtu, dev->max_mtu + overhead); + old_conduit_mtu = conduit->mtu; + new_conduit_mtu = largest_mtu + overhead; + if (new_conduit_mtu > mtu_limit) + return -ERANGE; + + /* If the conduit MTU isn't over limit, there's no need to check the CPU + * MTU, since that surely isn't either. + */ + cpu_mtu = largest_mtu; + + /* Start applying stuff */ + if (new_conduit_mtu != old_conduit_mtu) { + err = dev_set_mtu(conduit, new_conduit_mtu); + if (err < 0) + goto out_conduit_failed; + + /* We only need to propagate the MTU of the CPU port to + * upstream switches, so emit a notifier which updates them. + */ + err = dsa_port_mtu_change(cpu_dp, cpu_mtu); + if (err) + goto out_cpu_failed; + } + + err = ds->ops->port_change_mtu(ds, dp->index, new_mtu); + if (err) + goto out_port_failed; + + dev->mtu = new_mtu; + + dsa_bridge_mtu_normalization(dp); + + return 0; + +out_port_failed: + if (new_conduit_mtu != old_conduit_mtu) + dsa_port_mtu_change(cpu_dp, old_conduit_mtu - overhead); +out_cpu_failed: + if (new_conduit_mtu != old_conduit_mtu) + dev_set_mtu(conduit, old_conduit_mtu); +out_conduit_failed: + return err; +} + +static int __maybe_unused +dsa_user_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + unsigned long mask, new_prio; + int err, port = dp->index; + + if (!ds->ops->port_set_default_prio) + return -EOPNOTSUPP; + + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + mask = dcb_ieee_getapp_mask(dev, app); + new_prio = __fls(mask); + + err = ds->ops->port_set_default_prio(ds, port, new_prio); + if (err) { + dcb_ieee_delapp(dev, app); + return err; + } + + return 0; +} + +static int __maybe_unused +dsa_user_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + unsigned long mask, new_prio; + int err, port = dp->index; + u8 dscp = app->protocol; + + if (!ds->ops->port_add_dscp_prio) + return -EOPNOTSUPP; + + if (dscp >= 64) { + netdev_err(dev, "DSCP APP entry with protocol value %u is invalid\n", + dscp); + return -EINVAL; + } + + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + mask = dcb_ieee_getapp_mask(dev, app); + new_prio = __fls(mask); + + err = ds->ops->port_add_dscp_prio(ds, port, dscp, new_prio); + if (err) { + dcb_ieee_delapp(dev, app); + return err; + } + + return 0; +} + +static int __maybe_unused dsa_user_dcbnl_ieee_setapp(struct net_device *dev, + struct dcb_app *app) +{ + switch (app->selector) { + case IEEE_8021QAZ_APP_SEL_ETHERTYPE: + switch (app->protocol) { + case 0: + return dsa_user_dcbnl_set_default_prio(dev, app); + default: + return -EOPNOTSUPP; + } + break; + case IEEE_8021QAZ_APP_SEL_DSCP: + return dsa_user_dcbnl_add_dscp_prio(dev, app); + default: + return -EOPNOTSUPP; + } +} + +static int __maybe_unused +dsa_user_dcbnl_del_default_prio(struct net_device *dev, struct dcb_app *app) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + unsigned long mask, new_prio; + int err, port = dp->index; + + if (!ds->ops->port_set_default_prio) + return -EOPNOTSUPP; + + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + mask = dcb_ieee_getapp_mask(dev, app); + new_prio = mask ? __fls(mask) : 0; + + err = ds->ops->port_set_default_prio(ds, port, new_prio); + if (err) { + dcb_ieee_setapp(dev, app); + return err; + } + + return 0; +} + +static int __maybe_unused +dsa_user_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + int err, port = dp->index; + u8 dscp = app->protocol; + + if (!ds->ops->port_del_dscp_prio) + return -EOPNOTSUPP; + + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + err = ds->ops->port_del_dscp_prio(ds, port, dscp, app->priority); + if (err) { + dcb_ieee_setapp(dev, app); + return err; + } + + return 0; +} + +static int __maybe_unused dsa_user_dcbnl_ieee_delapp(struct net_device *dev, + struct dcb_app *app) +{ + switch (app->selector) { + case IEEE_8021QAZ_APP_SEL_ETHERTYPE: + switch (app->protocol) { + case 0: + return dsa_user_dcbnl_del_default_prio(dev, app); + default: + return -EOPNOTSUPP; + } + break; + case IEEE_8021QAZ_APP_SEL_DSCP: + return dsa_user_dcbnl_del_dscp_prio(dev, app); + default: + return -EOPNOTSUPP; + } +} + +/* Pre-populate the DCB application priority table with the priorities + * configured during switch setup, which we read from hardware here. + */ +static int dsa_user_dcbnl_init(struct net_device *dev) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + int err; + + if (ds->ops->port_get_default_prio) { + int prio = ds->ops->port_get_default_prio(ds, port); + struct dcb_app app = { + .selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE, + .protocol = 0, + .priority = prio, + }; + + if (prio < 0) + return prio; + + err = dcb_ieee_setapp(dev, &app); + if (err) + return err; + } + + if (ds->ops->port_get_dscp_prio) { + int protocol; + + for (protocol = 0; protocol < 64; protocol++) { + struct dcb_app app = { + .selector = IEEE_8021QAZ_APP_SEL_DSCP, + .protocol = protocol, + }; + int prio; + + prio = ds->ops->port_get_dscp_prio(ds, port, protocol); + if (prio == -EOPNOTSUPP) + continue; + if (prio < 0) + return prio; + + app.priority = prio; + + err = dcb_ieee_setapp(dev, &app); + if (err) + return err; + } + } + + return 0; +} + +static const struct ethtool_ops dsa_user_ethtool_ops = { + .get_drvinfo = dsa_user_get_drvinfo, + .get_regs_len = dsa_user_get_regs_len, + .get_regs = dsa_user_get_regs, + .nway_reset = dsa_user_nway_reset, + .get_link = ethtool_op_get_link, + .get_eeprom_len = dsa_user_get_eeprom_len, + .get_eeprom = dsa_user_get_eeprom, + .set_eeprom = dsa_user_set_eeprom, + .get_strings = dsa_user_get_strings, + .get_ethtool_stats = dsa_user_get_ethtool_stats, + .get_sset_count = dsa_user_get_sset_count, + .get_eth_phy_stats = dsa_user_get_eth_phy_stats, + .get_eth_mac_stats = dsa_user_get_eth_mac_stats, + .get_eth_ctrl_stats = dsa_user_get_eth_ctrl_stats, + .get_rmon_stats = dsa_user_get_rmon_stats, + .set_wol = dsa_user_set_wol, + .get_wol = dsa_user_get_wol, + .set_eee = dsa_user_set_eee, + .get_eee = dsa_user_get_eee, + .get_link_ksettings = dsa_user_get_link_ksettings, + .set_link_ksettings = dsa_user_set_link_ksettings, + .get_pause_stats = dsa_user_get_pause_stats, + .get_pauseparam = dsa_user_get_pauseparam, + .set_pauseparam = dsa_user_set_pauseparam, + .get_rxnfc = dsa_user_get_rxnfc, + .set_rxnfc = dsa_user_set_rxnfc, + .get_ts_info = dsa_user_get_ts_info, + .self_test = dsa_user_net_selftest, + .get_mm = dsa_user_get_mm, + .set_mm = dsa_user_set_mm, + .get_mm_stats = dsa_user_get_mm_stats, +}; + +static const struct dcbnl_rtnl_ops __maybe_unused dsa_user_dcbnl_ops = { + .ieee_setapp = dsa_user_dcbnl_ieee_setapp, + .ieee_delapp = dsa_user_dcbnl_ieee_delapp, +}; + +static void dsa_user_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *s) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_stats64) + ds->ops->get_stats64(ds, dp->index, s); + else + dev_get_tstats64(dev, s); +} + +static int dsa_user_fill_forward_path(struct net_device_path_ctx *ctx, + struct net_device_path *path) +{ + struct dsa_port *dp = dsa_user_to_port(ctx->dev); + struct net_device *conduit = dsa_port_to_conduit(dp); + struct dsa_port *cpu_dp = dp->cpu_dp; + + path->dev = ctx->dev; + path->type = DEV_PATH_DSA; + path->dsa.proto = cpu_dp->tag_ops->proto; + path->dsa.port = dp->index; + ctx->dev = conduit; + + return 0; +} + +static const struct net_device_ops dsa_user_netdev_ops = { + .ndo_open = dsa_user_open, + .ndo_stop = dsa_user_close, + .ndo_start_xmit = dsa_user_xmit, + .ndo_change_rx_flags = dsa_user_change_rx_flags, + .ndo_set_rx_mode = dsa_user_set_rx_mode, + .ndo_set_mac_address = dsa_user_set_mac_address, + .ndo_fdb_dump = dsa_user_fdb_dump, + .ndo_eth_ioctl = dsa_user_ioctl, + .ndo_get_iflink = dsa_user_get_iflink, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_netpoll_setup = dsa_user_netpoll_setup, + .ndo_netpoll_cleanup = dsa_user_netpoll_cleanup, + .ndo_poll_controller = dsa_user_poll_controller, +#endif + .ndo_setup_tc = dsa_user_setup_tc, + .ndo_get_stats64 = dsa_user_get_stats64, + .ndo_vlan_rx_add_vid = dsa_user_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = dsa_user_vlan_rx_kill_vid, + .ndo_change_mtu = dsa_user_change_mtu, + .ndo_fill_forward_path = dsa_user_fill_forward_path, +}; + +static struct device_type dsa_type = { + .name = "dsa", +}; + +void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up) +{ + const struct dsa_port *dp = dsa_to_port(ds, port); + + if (dp->pl) + phylink_mac_change(dp->pl, up); +} +EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_change); + +static void dsa_user_phylink_fixed_state(struct phylink_config *config, + struct phylink_link_state *state) +{ + struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); + struct dsa_switch *ds = dp->ds; + + /* No need to check that this operation is valid, the callback would + * not be called if it was not. + */ + ds->ops->phylink_fixed_state(ds, dp->index, state); +} + +/* user device setup *******************************************************/ +static int dsa_user_phy_connect(struct net_device *user_dev, int addr, + u32 flags) +{ + struct dsa_port *dp = dsa_user_to_port(user_dev); + struct dsa_switch *ds = dp->ds; + + user_dev->phydev = mdiobus_get_phy(ds->user_mii_bus, addr); + if (!user_dev->phydev) { + netdev_err(user_dev, "no phy at %d\n", addr); + return -ENODEV; + } + + user_dev->phydev->dev_flags |= flags; + + return phylink_connect_phy(dp->pl, user_dev->phydev); +} + +static int dsa_user_phy_setup(struct net_device *user_dev) +{ + struct dsa_port *dp = dsa_user_to_port(user_dev); + struct device_node *port_dn = dp->dn; + struct dsa_switch *ds = dp->ds; + u32 phy_flags = 0; + int ret; + + dp->pl_config.dev = &user_dev->dev; + dp->pl_config.type = PHYLINK_NETDEV; + + /* The get_fixed_state callback takes precedence over polling the + * link GPIO in PHYLINK (see phylink_get_fixed_state). Only set + * this if the switch provides such a callback. + */ + if (ds->ops->phylink_fixed_state) { + dp->pl_config.get_fixed_state = dsa_user_phylink_fixed_state; + dp->pl_config.poll_fixed_state = true; + } + + ret = dsa_port_phylink_create(dp); + if (ret) + return ret; + + if (ds->ops->get_phy_flags) + phy_flags = ds->ops->get_phy_flags(ds, dp->index); + + ret = phylink_of_phy_connect(dp->pl, port_dn, phy_flags); + if (ret == -ENODEV && ds->user_mii_bus) { + /* We could not connect to a designated PHY or SFP, so try to + * use the switch internal MDIO bus instead + */ + ret = dsa_user_phy_connect(user_dev, dp->index, phy_flags); + } + if (ret) { + netdev_err(user_dev, "failed to connect to PHY: %pe\n", + ERR_PTR(ret)); + dsa_port_phylink_destroy(dp); + } + + return ret; +} + +void dsa_user_setup_tagger(struct net_device *user) +{ + struct dsa_port *dp = dsa_user_to_port(user); + struct net_device *conduit = dsa_port_to_conduit(dp); + struct dsa_user_priv *p = netdev_priv(user); + const struct dsa_port *cpu_dp = dp->cpu_dp; + const struct dsa_switch *ds = dp->ds; + + user->needed_headroom = cpu_dp->tag_ops->needed_headroom; + user->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; + /* Try to save one extra realloc later in the TX path (in the conduit) + * by also inheriting the conduit's needed headroom and tailroom. + * The 8021q driver also does this. + */ + user->needed_headroom += conduit->needed_headroom; + user->needed_tailroom += conduit->needed_tailroom; + + p->xmit = cpu_dp->tag_ops->xmit; + + user->features = conduit->vlan_features | NETIF_F_HW_TC; + user->hw_features |= NETIF_F_HW_TC; + user->features |= NETIF_F_LLTX; + if (user->needed_tailroom) + user->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); + if (ds->needs_standalone_vlan_filtering) + user->features |= NETIF_F_HW_VLAN_CTAG_FILTER; +} + +int dsa_user_suspend(struct net_device *user_dev) +{ + struct dsa_port *dp = dsa_user_to_port(user_dev); + + if (!netif_running(user_dev)) + return 0; + + netif_device_detach(user_dev); + + rtnl_lock(); + phylink_stop(dp->pl); + rtnl_unlock(); + + return 0; +} + +int dsa_user_resume(struct net_device *user_dev) +{ + struct dsa_port *dp = dsa_user_to_port(user_dev); + + if (!netif_running(user_dev)) + return 0; + + netif_device_attach(user_dev); + + rtnl_lock(); + phylink_start(dp->pl); + rtnl_unlock(); + + return 0; +} + +int dsa_user_create(struct dsa_port *port) +{ + struct net_device *conduit = dsa_port_to_conduit(port); + struct dsa_switch *ds = port->ds; + struct net_device *user_dev; + struct dsa_user_priv *p; + const char *name; + int assign_type; + int ret; + + if (!ds->num_tx_queues) + ds->num_tx_queues = 1; + + if (port->name) { + name = port->name; + assign_type = NET_NAME_PREDICTABLE; + } else { + name = "eth%d"; + assign_type = NET_NAME_ENUM; + } + + user_dev = alloc_netdev_mqs(sizeof(struct dsa_user_priv), name, + assign_type, ether_setup, + ds->num_tx_queues, 1); + if (user_dev == NULL) + return -ENOMEM; + + user_dev->rtnl_link_ops = &dsa_link_ops; + user_dev->ethtool_ops = &dsa_user_ethtool_ops; +#if IS_ENABLED(CONFIG_DCB) + user_dev->dcbnl_ops = &dsa_user_dcbnl_ops; +#endif + if (!is_zero_ether_addr(port->mac)) + eth_hw_addr_set(user_dev, port->mac); + else + eth_hw_addr_inherit(user_dev, conduit); + user_dev->priv_flags |= IFF_NO_QUEUE; + if (dsa_switch_supports_uc_filtering(ds)) + user_dev->priv_flags |= IFF_UNICAST_FLT; + user_dev->netdev_ops = &dsa_user_netdev_ops; + if (ds->ops->port_max_mtu) + user_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index); + SET_NETDEV_DEVTYPE(user_dev, &dsa_type); + + SET_NETDEV_DEV(user_dev, port->ds->dev); + SET_NETDEV_DEVLINK_PORT(user_dev, &port->devlink_port); + user_dev->dev.of_node = port->dn; + user_dev->vlan_features = conduit->vlan_features; + + p = netdev_priv(user_dev); + user_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!user_dev->tstats) { + free_netdev(user_dev); + return -ENOMEM; + } + + ret = gro_cells_init(&p->gcells, user_dev); + if (ret) + goto out_free; + + p->dp = port; + INIT_LIST_HEAD(&p->mall_tc_list); + port->user = user_dev; + dsa_user_setup_tagger(user_dev); + + netif_carrier_off(user_dev); + + ret = dsa_user_phy_setup(user_dev); + if (ret) { + netdev_err(user_dev, + "error %d setting up PHY for tree %d, switch %d, port %d\n", + ret, ds->dst->index, ds->index, port->index); + goto out_gcells; + } + + rtnl_lock(); + + ret = dsa_user_change_mtu(user_dev, ETH_DATA_LEN); + if (ret && ret != -EOPNOTSUPP) + dev_warn(ds->dev, "nonfatal error %d setting MTU to %d on port %d\n", + ret, ETH_DATA_LEN, port->index); + + ret = register_netdevice(user_dev); + if (ret) { + netdev_err(conduit, "error %d registering interface %s\n", + ret, user_dev->name); + rtnl_unlock(); + goto out_phy; + } + + if (IS_ENABLED(CONFIG_DCB)) { + ret = dsa_user_dcbnl_init(user_dev); + if (ret) { + netdev_err(user_dev, + "failed to initialize DCB: %pe\n", + ERR_PTR(ret)); + rtnl_unlock(); + goto out_unregister; + } + } + + ret = netdev_upper_dev_link(conduit, user_dev, NULL); + + rtnl_unlock(); + + if (ret) + goto out_unregister; + + return 0; + +out_unregister: + unregister_netdev(user_dev); +out_phy: + rtnl_lock(); + phylink_disconnect_phy(p->dp->pl); + rtnl_unlock(); + dsa_port_phylink_destroy(p->dp); +out_gcells: + gro_cells_destroy(&p->gcells); +out_free: + free_percpu(user_dev->tstats); + free_netdev(user_dev); + port->user = NULL; + return ret; +} + +void dsa_user_destroy(struct net_device *user_dev) +{ + struct net_device *conduit = dsa_user_to_conduit(user_dev); + struct dsa_port *dp = dsa_user_to_port(user_dev); + struct dsa_user_priv *p = netdev_priv(user_dev); + + netif_carrier_off(user_dev); + rtnl_lock(); + netdev_upper_dev_unlink(conduit, user_dev); + unregister_netdevice(user_dev); + phylink_disconnect_phy(dp->pl); + rtnl_unlock(); + + dsa_port_phylink_destroy(dp); + gro_cells_destroy(&p->gcells); + free_percpu(user_dev->tstats); + free_netdev(user_dev); +} + +int dsa_user_change_conduit(struct net_device *dev, struct net_device *conduit, + struct netlink_ext_ack *extack) +{ + struct net_device *old_conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct net_device *upper; + struct list_head *iter; + int err; + + if (conduit == old_conduit) + return 0; + + if (!ds->ops->port_change_conduit) { + NL_SET_ERR_MSG_MOD(extack, + "Driver does not support changing DSA conduit"); + return -EOPNOTSUPP; + } + + if (!netdev_uses_dsa(conduit)) { + NL_SET_ERR_MSG_MOD(extack, + "Interface not eligible as DSA conduit"); + return -EOPNOTSUPP; + } + + netdev_for_each_upper_dev_rcu(conduit, upper, iter) { + if (dsa_user_dev_check(upper)) + continue; + if (netif_is_bridge_master(upper)) + continue; + NL_SET_ERR_MSG_MOD(extack, "Cannot join conduit with unknown uppers"); + return -EOPNOTSUPP; + } + + /* Since we allow live-changing the DSA conduit, plus we auto-open the + * DSA conduit when the user port opens => we need to ensure that the + * new DSA conduit is open too. + */ + if (dev->flags & IFF_UP) { + err = dev_open(conduit, extack); + if (err) + return err; + } + + netdev_upper_dev_unlink(old_conduit, dev); + + err = netdev_upper_dev_link(conduit, dev, extack); + if (err) + goto out_revert_old_conduit_unlink; + + err = dsa_port_change_conduit(dp, conduit, extack); + if (err) + goto out_revert_conduit_link; + + /* Update the MTU of the new CPU port through cross-chip notifiers */ + err = dsa_user_change_mtu(dev, dev->mtu); + if (err && err != -EOPNOTSUPP) { + netdev_warn(dev, + "nonfatal error updating MTU with new conduit: %pe\n", + ERR_PTR(err)); + } + + /* If the port doesn't have its own MAC address and relies on the DSA + * conduit's one, inherit it again from the new DSA conduit. + */ + if (is_zero_ether_addr(dp->mac)) + eth_hw_addr_inherit(dev, conduit); + + return 0; + +out_revert_conduit_link: + netdev_upper_dev_unlink(conduit, dev); +out_revert_old_conduit_unlink: + netdev_upper_dev_link(old_conduit, dev, NULL); + return err; +} + +bool dsa_user_dev_check(const struct net_device *dev) +{ + return dev->netdev_ops == &dsa_user_netdev_ops; +} +EXPORT_SYMBOL_GPL(dsa_user_dev_check); + +static int dsa_user_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct netlink_ext_ack *extack; + int err = NOTIFY_DONE; + + if (!dsa_user_dev_check(dev)) + return err; + + extack = netdev_notifier_info_to_extack(&info->info); + + if (netif_is_bridge_master(info->upper_dev)) { + if (info->linking) { + err = dsa_port_bridge_join(dp, info->upper_dev, extack); + if (!err) + dsa_bridge_mtu_normalization(dp); + if (err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_WEAK_MOD(extack, + "Offloading not supported"); + err = 0; + } + err = notifier_from_errno(err); + } else { + dsa_port_bridge_leave(dp, info->upper_dev); + err = NOTIFY_OK; + } + } else if (netif_is_lag_master(info->upper_dev)) { + if (info->linking) { + err = dsa_port_lag_join(dp, info->upper_dev, + info->upper_info, extack); + if (err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_WEAK_MOD(extack, + "Offloading not supported"); + err = 0; + } + err = notifier_from_errno(err); + } else { + dsa_port_lag_leave(dp, info->upper_dev); + err = NOTIFY_OK; + } + } else if (is_hsr_master(info->upper_dev)) { + if (info->linking) { + err = dsa_port_hsr_join(dp, info->upper_dev, extack); + if (err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_WEAK_MOD(extack, + "Offloading not supported"); + err = 0; + } + err = notifier_from_errno(err); + } else { + dsa_port_hsr_leave(dp, info->upper_dev); + err = NOTIFY_OK; + } + } + + return err; +} + +static int dsa_user_prechangeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + + if (!dsa_user_dev_check(dev)) + return NOTIFY_DONE; + + if (netif_is_bridge_master(info->upper_dev) && !info->linking) + dsa_port_pre_bridge_leave(dp, info->upper_dev); + else if (netif_is_lag_master(info->upper_dev) && !info->linking) + dsa_port_pre_lag_leave(dp, info->upper_dev); + /* dsa_port_pre_hsr_leave is not yet necessary since hsr devices cannot + * meaningfully placed under a bridge yet + */ + + return NOTIFY_DONE; +} + +static int +dsa_user_lag_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = NOTIFY_DONE; + struct dsa_port *dp; + + if (!netif_is_lag_master(dev)) + return err; + + netdev_for_each_lower_dev(dev, lower, iter) { + if (!dsa_user_dev_check(lower)) + continue; + + dp = dsa_user_to_port(lower); + if (!dp->lag) + /* Software LAG */ + continue; + + err = dsa_user_changeupper(lower, info); + if (notifier_to_errno(err)) + break; + } + + return err; +} + +/* Same as dsa_user_lag_changeupper() except that it calls + * dsa_user_prechangeupper() + */ +static int +dsa_user_lag_prechangeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = NOTIFY_DONE; + struct dsa_port *dp; + + if (!netif_is_lag_master(dev)) + return err; + + netdev_for_each_lower_dev(dev, lower, iter) { + if (!dsa_user_dev_check(lower)) + continue; + + dp = dsa_user_to_port(lower); + if (!dp->lag) + /* Software LAG */ + continue; + + err = dsa_user_prechangeupper(lower, info); + if (notifier_to_errno(err)) + break; + } + + return err; +} + +static int +dsa_prevent_bridging_8021q_upper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *ext_ack; + struct net_device *user, *br; + struct dsa_port *dp; + + ext_ack = netdev_notifier_info_to_extack(&info->info); + + if (!is_vlan_dev(dev)) + return NOTIFY_DONE; + + user = vlan_dev_real_dev(dev); + if (!dsa_user_dev_check(user)) + return NOTIFY_DONE; + + dp = dsa_user_to_port(user); + br = dsa_port_bridge_dev_get(dp); + if (!br) + return NOTIFY_DONE; + + /* Deny enslaving a VLAN device into a VLAN-aware bridge */ + if (br_vlan_enabled(br) && + netif_is_bridge_master(info->upper_dev) && info->linking) { + NL_SET_ERR_MSG_MOD(ext_ack, + "Cannot make VLAN device join VLAN-aware bridge"); + return notifier_from_errno(-EINVAL); + } + + return NOTIFY_DONE; +} + +static int +dsa_user_check_8021q_upper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct net_device *br = dsa_port_bridge_dev_get(dp); + struct bridge_vlan_info br_info; + struct netlink_ext_ack *extack; + int err = NOTIFY_DONE; + u16 vid; + + if (!br || !br_vlan_enabled(br)) + return NOTIFY_DONE; + + extack = netdev_notifier_info_to_extack(&info->info); + vid = vlan_dev_vlan_id(info->upper_dev); + + /* br_vlan_get_info() returns -EINVAL or -ENOENT if the + * device, respectively the VID is not found, returning + * 0 means success, which is a failure for us here. + */ + err = br_vlan_get_info(br, vid, &br_info); + if (err == 0) { + NL_SET_ERR_MSG_MOD(extack, + "This VLAN is already configured by the bridge"); + return notifier_from_errno(-EBUSY); + } + + return NOTIFY_DONE; +} + +static int +dsa_user_prechangeupper_sanity_check(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct dsa_switch *ds; + struct dsa_port *dp; + int err; + + if (!dsa_user_dev_check(dev)) + return dsa_prevent_bridging_8021q_upper(dev, info); + + dp = dsa_user_to_port(dev); + ds = dp->ds; + + if (ds->ops->port_prechangeupper) { + err = ds->ops->port_prechangeupper(ds, dp->index, info); + if (err) + return notifier_from_errno(err); + } + + if (is_vlan_dev(info->upper_dev)) + return dsa_user_check_8021q_upper(dev, info); + + return NOTIFY_DONE; +} + +/* To be eligible as a DSA conduit, a LAG must have all lower interfaces be + * eligible DSA conduits. Additionally, all LAG slaves must be DSA conduits of + * switches in the same switch tree. + */ +static int dsa_lag_conduit_validate(struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *lower1, *lower2; + struct list_head *iter1, *iter2; + + netdev_for_each_lower_dev(lag_dev, lower1, iter1) { + netdev_for_each_lower_dev(lag_dev, lower2, iter2) { + if (!netdev_uses_dsa(lower1) || + !netdev_uses_dsa(lower2)) { + NL_SET_ERR_MSG_MOD(extack, + "All LAG ports must be eligible as DSA conduits"); + return notifier_from_errno(-EINVAL); + } + + if (lower1 == lower2) + continue; + + if (!dsa_port_tree_same(lower1->dsa_ptr, + lower2->dsa_ptr)) { + NL_SET_ERR_MSG_MOD(extack, + "LAG contains DSA conduits of disjoint switch trees"); + return notifier_from_errno(-EINVAL); + } + } + } + + return NOTIFY_DONE; +} + +static int +dsa_conduit_prechangeupper_sanity_check(struct net_device *conduit, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); + + if (!netdev_uses_dsa(conduit)) + return NOTIFY_DONE; + + if (!info->linking) + return NOTIFY_DONE; + + /* Allow DSA switch uppers */ + if (dsa_user_dev_check(info->upper_dev)) + return NOTIFY_DONE; + + /* Allow bridge uppers of DSA conduits, subject to further + * restrictions in dsa_bridge_prechangelower_sanity_check() + */ + if (netif_is_bridge_master(info->upper_dev)) + return NOTIFY_DONE; + + /* Allow LAG uppers, subject to further restrictions in + * dsa_lag_conduit_prechangelower_sanity_check() + */ + if (netif_is_lag_master(info->upper_dev)) + return dsa_lag_conduit_validate(info->upper_dev, extack); + + NL_SET_ERR_MSG_MOD(extack, + "DSA conduit cannot join unknown upper interfaces"); + return notifier_from_errno(-EBUSY); +} + +static int +dsa_lag_conduit_prechangelower_sanity_check(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); + struct net_device *lag_dev = info->upper_dev; + struct net_device *lower; + struct list_head *iter; + + if (!netdev_uses_dsa(lag_dev) || !netif_is_lag_master(lag_dev)) + return NOTIFY_DONE; + + if (!info->linking) + return NOTIFY_DONE; + + if (!netdev_uses_dsa(dev)) { + NL_SET_ERR_MSG(extack, + "Only DSA conduits can join a LAG DSA conduit"); + return notifier_from_errno(-EINVAL); + } + + netdev_for_each_lower_dev(lag_dev, lower, iter) { + if (!dsa_port_tree_same(dev->dsa_ptr, lower->dsa_ptr)) { + NL_SET_ERR_MSG(extack, + "Interface is DSA conduit for a different switch tree than this LAG"); + return notifier_from_errno(-EINVAL); + } + + break; + } + + return NOTIFY_DONE; +} + +/* Don't allow bridging of DSA conduits, since the bridge layer rx_handler + * prevents the DSA fake ethertype handler to be invoked, so we don't get the + * chance to strip off and parse the DSA switch tag protocol header (the bridge + * layer just returns RX_HANDLER_CONSUMED, stopping RX processing for these + * frames). + * The only case where that would not be an issue is when bridging can already + * be offloaded, such as when the DSA conduit is itself a DSA or plain switchdev + * port, and is bridged only with other ports from the same hardware device. + */ +static int +dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *br = info->upper_dev; + struct netlink_ext_ack *extack; + struct net_device *lower; + struct list_head *iter; + + if (!netif_is_bridge_master(br)) + return NOTIFY_DONE; + + if (!info->linking) + return NOTIFY_DONE; + + extack = netdev_notifier_info_to_extack(&info->info); + + netdev_for_each_lower_dev(br, lower, iter) { + if (!netdev_uses_dsa(new_lower) && !netdev_uses_dsa(lower)) + continue; + + if (!netdev_port_same_parent_id(lower, new_lower)) { + NL_SET_ERR_MSG(extack, + "Cannot do software bridging with a DSA conduit"); + return notifier_from_errno(-EINVAL); + } + } + + return NOTIFY_DONE; +} + +static void dsa_tree_migrate_ports_from_lag_conduit(struct dsa_switch_tree *dst, + struct net_device *lag_dev) +{ + struct net_device *new_conduit = dsa_tree_find_first_conduit(dst); + struct dsa_port *dp; + int err; + + dsa_tree_for_each_user_port(dp, dst) { + if (dsa_port_to_conduit(dp) != lag_dev) + continue; + + err = dsa_user_change_conduit(dp->user, new_conduit, NULL); + if (err) { + netdev_err(dp->user, + "failed to restore conduit to %s: %pe\n", + new_conduit->name, ERR_PTR(err)); + } + } +} + +static int dsa_conduit_lag_join(struct net_device *conduit, + struct net_device *lag_dev, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack) +{ + struct dsa_port *cpu_dp = conduit->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct dsa_port *dp; + int err; + + err = dsa_conduit_lag_setup(lag_dev, cpu_dp, uinfo, extack); + if (err) + return err; + + dsa_tree_for_each_user_port(dp, dst) { + if (dsa_port_to_conduit(dp) != conduit) + continue; + + err = dsa_user_change_conduit(dp->user, lag_dev, extack); + if (err) + goto restore; + } + + return 0; + +restore: + dsa_tree_for_each_user_port_continue_reverse(dp, dst) { + if (dsa_port_to_conduit(dp) != lag_dev) + continue; + + err = dsa_user_change_conduit(dp->user, conduit, NULL); + if (err) { + netdev_err(dp->user, + "failed to restore conduit to %s: %pe\n", + conduit->name, ERR_PTR(err)); + } + } + + dsa_conduit_lag_teardown(lag_dev, conduit->dsa_ptr); + + return err; +} + +static void dsa_conduit_lag_leave(struct net_device *conduit, + struct net_device *lag_dev) +{ + struct dsa_port *dp, *cpu_dp = lag_dev->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct dsa_port *new_cpu_dp = NULL; + struct net_device *lower; + struct list_head *iter; + + netdev_for_each_lower_dev(lag_dev, lower, iter) { + if (netdev_uses_dsa(lower)) { + new_cpu_dp = lower->dsa_ptr; + break; + } + } + + if (new_cpu_dp) { + /* Update the CPU port of the user ports still under the LAG + * so that dsa_port_to_conduit() continues to work properly + */ + dsa_tree_for_each_user_port(dp, dst) + if (dsa_port_to_conduit(dp) == lag_dev) + dp->cpu_dp = new_cpu_dp; + + /* Update the index of the virtual CPU port to match the lowest + * physical CPU port + */ + lag_dev->dsa_ptr = new_cpu_dp; + wmb(); + } else { + /* If the LAG DSA conduit has no ports left, migrate back all + * user ports to the first physical CPU port + */ + dsa_tree_migrate_ports_from_lag_conduit(dst, lag_dev); + } + + /* This DSA conduit has left its LAG in any case, so let + * the CPU port leave the hardware LAG as well + */ + dsa_conduit_lag_teardown(lag_dev, conduit->dsa_ptr); +} + +static int dsa_conduit_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack; + int err = NOTIFY_DONE; + + if (!netdev_uses_dsa(dev)) + return err; + + extack = netdev_notifier_info_to_extack(&info->info); + + if (netif_is_lag_master(info->upper_dev)) { + if (info->linking) { + err = dsa_conduit_lag_join(dev, info->upper_dev, + info->upper_info, extack); + err = notifier_from_errno(err); + } else { + dsa_conduit_lag_leave(dev, info->upper_dev); + err = NOTIFY_OK; + } + } + + return err; +} + +static int dsa_user_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_PRECHANGEUPPER: { + struct netdev_notifier_changeupper_info *info = ptr; + int err; + + err = dsa_user_prechangeupper_sanity_check(dev, info); + if (notifier_to_errno(err)) + return err; + + err = dsa_conduit_prechangeupper_sanity_check(dev, info); + if (notifier_to_errno(err)) + return err; + + err = dsa_lag_conduit_prechangelower_sanity_check(dev, info); + if (notifier_to_errno(err)) + return err; + + err = dsa_bridge_prechangelower_sanity_check(dev, info); + if (notifier_to_errno(err)) + return err; + + err = dsa_user_prechangeupper(dev, ptr); + if (notifier_to_errno(err)) + return err; + + err = dsa_user_lag_prechangeupper(dev, ptr); + if (notifier_to_errno(err)) + return err; + + break; + } + case NETDEV_CHANGEUPPER: { + int err; + + err = dsa_user_changeupper(dev, ptr); + if (notifier_to_errno(err)) + return err; + + err = dsa_user_lag_changeupper(dev, ptr); + if (notifier_to_errno(err)) + return err; + + err = dsa_conduit_changeupper(dev, ptr); + if (notifier_to_errno(err)) + return err; + + break; + } + case NETDEV_CHANGELOWERSTATE: { + struct netdev_notifier_changelowerstate_info *info = ptr; + struct dsa_port *dp; + int err = 0; + + if (dsa_user_dev_check(dev)) { + dp = dsa_user_to_port(dev); + + err = dsa_port_lag_change(dp, info->lower_state_info); + } + + /* Mirror LAG port events on DSA conduits that are in + * a LAG towards their respective switch CPU ports + */ + if (netdev_uses_dsa(dev)) { + dp = dev->dsa_ptr; + + err = dsa_port_lag_change(dp, info->lower_state_info); + } + + return notifier_from_errno(err); + } + case NETDEV_CHANGE: + case NETDEV_UP: { + /* Track state of conduit port. + * DSA driver may require the conduit port (and indirectly + * the tagger) to be available for some special operation. + */ + if (netdev_uses_dsa(dev)) { + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->ds->dst; + + /* Track when the conduit port is UP */ + dsa_tree_conduit_oper_state_change(dst, dev, + netif_oper_up(dev)); + + /* Track when the conduit port is ready and can accept + * packet. + * NETDEV_UP event is not enough to flag a port as ready. + * We also have to wait for linkwatch_do_dev to dev_activate + * and emit a NETDEV_CHANGE event. + * We check if a conduit port is ready by checking if the dev + * have a qdisc assigned and is not noop. + */ + dsa_tree_conduit_admin_state_change(dst, dev, + !qdisc_tx_is_noop(dev)); + + return NOTIFY_OK; + } + + return NOTIFY_DONE; + } + case NETDEV_GOING_DOWN: { + struct dsa_port *dp, *cpu_dp; + struct dsa_switch_tree *dst; + LIST_HEAD(close_list); + + if (!netdev_uses_dsa(dev)) + return NOTIFY_DONE; + + cpu_dp = dev->dsa_ptr; + dst = cpu_dp->ds->dst; + + dsa_tree_conduit_admin_state_change(dst, dev, false); + + list_for_each_entry(dp, &dst->ports, list) { + if (!dsa_port_is_user(dp)) + continue; + + if (dp->cpu_dp != cpu_dp) + continue; + + list_add(&dp->user->close_list, &close_list); + } + + dev_close_many(&close_list, true); + + return NOTIFY_OK; + } + default: + break; + } + + return NOTIFY_DONE; +} + +static void +dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work) +{ + struct switchdev_notifier_fdb_info info = {}; + + info.addr = switchdev_work->addr; + info.vid = switchdev_work->vid; + info.offloaded = true; + call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, + switchdev_work->orig_dev, &info.info, NULL); +} + +static void dsa_user_switchdev_event_work(struct work_struct *work) +{ + struct dsa_switchdev_event_work *switchdev_work = + container_of(work, struct dsa_switchdev_event_work, work); + const unsigned char *addr = switchdev_work->addr; + struct net_device *dev = switchdev_work->dev; + u16 vid = switchdev_work->vid; + struct dsa_switch *ds; + struct dsa_port *dp; + int err; + + dp = dsa_user_to_port(dev); + ds = dp->ds; + + switch (switchdev_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + if (switchdev_work->host_addr) + err = dsa_port_bridge_host_fdb_add(dp, addr, vid); + else if (dp->lag) + err = dsa_port_lag_fdb_add(dp, addr, vid); + else + err = dsa_port_fdb_add(dp, addr, vid); + if (err) { + dev_err(ds->dev, + "port %d failed to add %pM vid %d to fdb: %d\n", + dp->index, addr, vid, err); + break; + } + dsa_fdb_offload_notify(switchdev_work); + break; + + case SWITCHDEV_FDB_DEL_TO_DEVICE: + if (switchdev_work->host_addr) + err = dsa_port_bridge_host_fdb_del(dp, addr, vid); + else if (dp->lag) + err = dsa_port_lag_fdb_del(dp, addr, vid); + else + err = dsa_port_fdb_del(dp, addr, vid); + if (err) { + dev_err(ds->dev, + "port %d failed to delete %pM vid %d from fdb: %d\n", + dp->index, addr, vid, err); + } + + break; + } + + kfree(switchdev_work); +} + +static bool dsa_foreign_dev_check(const struct net_device *dev, + const struct net_device *foreign_dev) +{ + const struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch_tree *dst = dp->ds->dst; + + if (netif_is_bridge_master(foreign_dev)) + return !dsa_tree_offloads_bridge_dev(dst, foreign_dev); + + if (netif_is_bridge_port(foreign_dev)) + return !dsa_tree_offloads_bridge_port(dst, foreign_dev); + + /* Everything else is foreign */ + return true; +} + +static int dsa_user_fdb_event(struct net_device *dev, + struct net_device *orig_dev, + unsigned long event, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info) +{ + struct dsa_switchdev_event_work *switchdev_work; + struct dsa_port *dp = dsa_user_to_port(dev); + bool host_addr = fdb_info->is_local; + struct dsa_switch *ds = dp->ds; + + if (ctx && ctx != dp) + return 0; + + if (!dp->bridge) + return 0; + + if (switchdev_fdb_is_dynamically_learned(fdb_info)) { + if (dsa_port_offloads_bridge_port(dp, orig_dev)) + return 0; + + /* FDB entries learned by the software bridge or by foreign + * bridge ports should be installed as host addresses only if + * the driver requests assisted learning. + */ + if (!ds->assisted_learning_on_cpu_port) + return 0; + } + + /* Also treat FDB entries on foreign interfaces bridged with us as host + * addresses. + */ + if (dsa_foreign_dev_check(dev, orig_dev)) + host_addr = true; + + /* Check early that we're not doing work in vain. + * Host addresses on LAG ports still require regular FDB ops, + * since the CPU port isn't in a LAG. + */ + if (dp->lag && !host_addr) { + if (!ds->ops->lag_fdb_add || !ds->ops->lag_fdb_del) + return -EOPNOTSUPP; + } else { + if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del) + return -EOPNOTSUPP; + } + + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); + if (!switchdev_work) + return -ENOMEM; + + netdev_dbg(dev, "%s FDB entry towards %s, addr %pM vid %d%s\n", + event == SWITCHDEV_FDB_ADD_TO_DEVICE ? "Adding" : "Deleting", + orig_dev->name, fdb_info->addr, fdb_info->vid, + host_addr ? " as host address" : ""); + + INIT_WORK(&switchdev_work->work, dsa_user_switchdev_event_work); + switchdev_work->event = event; + switchdev_work->dev = dev; + switchdev_work->orig_dev = orig_dev; + + ether_addr_copy(switchdev_work->addr, fdb_info->addr); + switchdev_work->vid = fdb_info->vid; + switchdev_work->host_addr = host_addr; + + dsa_schedule_work(&switchdev_work->work); + + return 0; +} + +/* Called under rcu_read_lock() */ +static int dsa_user_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + int err; + + switch (event) { + case SWITCHDEV_PORT_ATTR_SET: + err = switchdev_handle_port_attr_set(dev, ptr, + dsa_user_dev_check, + dsa_user_port_attr_set); + return notifier_from_errno(err); + case SWITCHDEV_FDB_ADD_TO_DEVICE: + case SWITCHDEV_FDB_DEL_TO_DEVICE: + err = switchdev_handle_fdb_event_to_device(dev, event, ptr, + dsa_user_dev_check, + dsa_foreign_dev_check, + dsa_user_fdb_event); + return notifier_from_errno(err); + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static int dsa_user_switchdev_blocking_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + int err; + + switch (event) { + case SWITCHDEV_PORT_OBJ_ADD: + err = switchdev_handle_port_obj_add_foreign(dev, ptr, + dsa_user_dev_check, + dsa_foreign_dev_check, + dsa_user_port_obj_add); + return notifier_from_errno(err); + case SWITCHDEV_PORT_OBJ_DEL: + err = switchdev_handle_port_obj_del_foreign(dev, ptr, + dsa_user_dev_check, + dsa_foreign_dev_check, + dsa_user_port_obj_del); + return notifier_from_errno(err); + case SWITCHDEV_PORT_ATTR_SET: + err = switchdev_handle_port_attr_set(dev, ptr, + dsa_user_dev_check, + dsa_user_port_attr_set); + return notifier_from_errno(err); + } + + return NOTIFY_DONE; +} + +static struct notifier_block dsa_user_nb __read_mostly = { + .notifier_call = dsa_user_netdevice_event, +}; + +struct notifier_block dsa_user_switchdev_notifier = { + .notifier_call = dsa_user_switchdev_event, +}; + +struct notifier_block dsa_user_switchdev_blocking_notifier = { + .notifier_call = dsa_user_switchdev_blocking_event, +}; + +int dsa_user_register_notifier(void) +{ + struct notifier_block *nb; + int err; + + err = register_netdevice_notifier(&dsa_user_nb); + if (err) + return err; + + err = register_switchdev_notifier(&dsa_user_switchdev_notifier); + if (err) + goto err_switchdev_nb; + + nb = &dsa_user_switchdev_blocking_notifier; + err = register_switchdev_blocking_notifier(nb); + if (err) + goto err_switchdev_blocking_nb; + + return 0; + +err_switchdev_blocking_nb: + unregister_switchdev_notifier(&dsa_user_switchdev_notifier); +err_switchdev_nb: + unregister_netdevice_notifier(&dsa_user_nb); + return err; +} + +void dsa_user_unregister_notifier(void) +{ + struct notifier_block *nb; + int err; + + nb = &dsa_user_switchdev_blocking_notifier; + err = unregister_switchdev_blocking_notifier(nb); + if (err) + pr_err("DSA: failed to unregister switchdev blocking notifier (%d)\n", err); + + err = unregister_switchdev_notifier(&dsa_user_switchdev_notifier); + if (err) + pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err); + + err = unregister_netdevice_notifier(&dsa_user_nb); + if (err) + pr_err("DSA: failed to unregister user notifier (%d)\n", err); +} diff --git a/net/dsa/user.h b/net/dsa/user.h new file mode 100644 index 000000000000..996069130bea --- /dev/null +++ b/net/dsa/user.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_USER_H +#define __DSA_USER_H + +#include +#include +#include +#include +#include +#include +#include + +struct net_device; +struct netlink_ext_ack; + +extern struct notifier_block dsa_user_switchdev_notifier; +extern struct notifier_block dsa_user_switchdev_blocking_notifier; + +struct dsa_user_priv { + /* Copy of CPU port xmit for faster access in user transmit hot path */ + struct sk_buff * (*xmit)(struct sk_buff *skb, + struct net_device *dev); + + struct gro_cells gcells; + + /* DSA port data, such as switch, port index, etc. */ + struct dsa_port *dp; + +#ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *netpoll; +#endif + + /* TC context */ + struct list_head mall_tc_list; +}; + +void dsa_user_mii_bus_init(struct dsa_switch *ds); +int dsa_user_create(struct dsa_port *dp); +void dsa_user_destroy(struct net_device *user_dev); +int dsa_user_suspend(struct net_device *user_dev); +int dsa_user_resume(struct net_device *user_dev); +int dsa_user_register_notifier(void); +void dsa_user_unregister_notifier(void); +void dsa_user_sync_ha(struct net_device *dev); +void dsa_user_unsync_ha(struct net_device *dev); +void dsa_user_setup_tagger(struct net_device *user); +int dsa_user_change_mtu(struct net_device *dev, int new_mtu); +int dsa_user_change_conduit(struct net_device *dev, struct net_device *conduit, + struct netlink_ext_ack *extack); +int dsa_user_manage_vlan_filtering(struct net_device *dev, + bool vlan_filtering); + +static inline struct dsa_port *dsa_user_to_port(const struct net_device *dev) +{ + struct dsa_user_priv *p = netdev_priv(dev); + + return p->dp; +} + +static inline struct net_device * +dsa_user_to_conduit(const struct net_device *dev) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + + return dsa_port_to_conduit(dp); +} + +#endif -- cgit v1.2.3 From 87cd83714f30ef2f19f0390e98beb8d78e173f0f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 23 Oct 2023 11:17:29 -0700 Subject: net: dsa: Rename IFLA_DSA_MASTER to IFLA_DSA_CONDUIT This preserves the existing IFLA_DSA_MASTER which is part of the uAPI and creates an alias named IFLA_DSA_CONDUIT. Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20231023181729.1191071-3-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- Documentation/networking/dsa/configuration.rst | 4 ++-- include/uapi/linux/if_link.h | 4 +++- net/dsa/netlink.c | 10 +++++----- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/Documentation/networking/dsa/configuration.rst b/Documentation/networking/dsa/configuration.rst index e6c9719874b0..6cc4ded3cc23 100644 --- a/Documentation/networking/dsa/configuration.rst +++ b/Documentation/networking/dsa/configuration.rst @@ -393,7 +393,7 @@ description which has an ``ethernet`` property. It is up to the user to configure the system for the switch to use other conduits. DSA uses the ``rtnl_link_ops`` mechanism (with a "dsa" ``kind``) to allow -changing the DSA conduit of a user port. The ``IFLA_DSA_MASTER`` u32 netlink +changing the DSA conduit of a user port. The ``IFLA_DSA_CONDUIT`` u32 netlink attribute contains the ifindex of the conduit device that handles each user device. The DSA conduit must be a valid candidate based on firmware node information, or a LAG interface which contains only slaves which are valid @@ -435,7 +435,7 @@ Using iproute2, the following manipulations are possible: dsa master bond0 Notice that in the case of CPU ports under a LAG, the use of the -``IFLA_DSA_MASTER`` netlink attribute is not strictly needed, but rather, DSA +``IFLA_DSA_CONDUIT`` netlink attribute is not strictly needed, but rather, DSA reacts to the ``IFLA_MASTER`` attribute change of its present conduit (``eth0``) and migrates all user ports to the new upper of ``eth0``, ``bond0``. Similarly, when ``bond0`` is destroyed using ``RTM_DELLINK``, DSA migrates the user ports diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 9f8a3da0f14f..f4191be137a4 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1394,7 +1394,9 @@ enum { enum { IFLA_DSA_UNSPEC, - IFLA_DSA_MASTER, + IFLA_DSA_CONDUIT, + /* Deprecated, use IFLA_DSA_CONDUIT instead */ + IFLA_DSA_MASTER = IFLA_DSA_CONDUIT, __IFLA_DSA_MAX, }; diff --git a/net/dsa/netlink.c b/net/dsa/netlink.c index f56f90a25b99..1332e56349e5 100644 --- a/net/dsa/netlink.c +++ b/net/dsa/netlink.c @@ -8,7 +8,7 @@ #include "user.h" static const struct nla_policy dsa_policy[IFLA_DSA_MAX + 1] = { - [IFLA_DSA_MASTER] = { .type = NLA_U32 }, + [IFLA_DSA_CONDUIT] = { .type = NLA_U32 }, }; static int dsa_changelink(struct net_device *dev, struct nlattr *tb[], @@ -20,8 +20,8 @@ static int dsa_changelink(struct net_device *dev, struct nlattr *tb[], if (!data) return 0; - if (data[IFLA_DSA_MASTER]) { - u32 ifindex = nla_get_u32(data[IFLA_DSA_MASTER]); + if (data[IFLA_DSA_CONDUIT]) { + u32 ifindex = nla_get_u32(data[IFLA_DSA_CONDUIT]); struct net_device *conduit; conduit = __dev_get_by_index(dev_net(dev), ifindex); @@ -38,7 +38,7 @@ static int dsa_changelink(struct net_device *dev, struct nlattr *tb[], static size_t dsa_get_size(const struct net_device *dev) { - return nla_total_size(sizeof(u32)) + /* IFLA_DSA_MASTER */ + return nla_total_size(sizeof(u32)) + /* IFLA_DSA_CONDUIT */ 0; } @@ -46,7 +46,7 @@ static int dsa_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct net_device *conduit = dsa_user_to_conduit(dev); - if (nla_put_u32(skb, IFLA_DSA_MASTER, conduit->ifindex)) + if (nla_put_u32(skb, IFLA_DSA_CONDUIT, conduit->ifindex)) return -EMSGSIZE; return 0; -- cgit v1.2.3 From 70f06c115bcca26ceeebf938e48bc8143668e38b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 24 Oct 2023 13:05:51 +0200 Subject: sched: act_ct: switch to per-action label counting net->ct.labels_used was meant to convey 'number of ip/nftables rules that need the label extension allocated'. act_ct enables this for each net namespace, which voids all attempts to avoid ct->ext allocation when possible. Move this increment to the control plane to request label extension space allocation only when its needed. Signed-off-by: Florian Westphal Reviewed-by: Pedro Tammela Reviewed-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_ct.h | 1 + net/sched/act_ct.c | 41 ++++++++++++++++++----------------------- 2 files changed, 19 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index b24ea2d9400b..8a6dbfb23336 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -22,6 +22,7 @@ struct tcf_ct_params { struct nf_nat_range2 range; bool ipv4_range; + bool put_labels; u16 ct_action; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 7c652d14528b..43b06cb284ce 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -690,7 +690,6 @@ static struct tc_action_ops act_ct_ops; struct tc_ct_action_net { struct tc_action_net tn; /* Must be first */ - bool labels; }; /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ @@ -829,8 +828,13 @@ static void tcf_ct_params_free(struct tcf_ct_params *params) } if (params->ct_ft) tcf_ct_flow_table_put(params->ct_ft); - if (params->tmpl) + if (params->tmpl) { + if (params->put_labels) + nf_connlabels_put(nf_ct_net(params->tmpl)); + nf_ct_put(params->tmpl); + } + kfree(params); } @@ -1154,9 +1158,9 @@ static int tcf_ct_fill_params(struct net *net, struct nlattr **tb, struct netlink_ext_ack *extack) { - struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); struct nf_conntrack_zone zone; int err, family, proto, len; + bool put_labels = false; struct nf_conn *tmpl; char *name; @@ -1186,15 +1190,20 @@ static int tcf_ct_fill_params(struct net *net, } if (tb[TCA_CT_LABELS]) { + unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; + if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) { NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled."); return -EOPNOTSUPP; } - if (!tn->labels) { + if (nf_connlabels_get(net, n_bits - 1)) { NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length"); return -EOPNOTSUPP; + } else { + put_labels = true; } + tcf_ct_set_key_val(tb, p->labels, TCA_CT_LABELS, p->labels_mask, TCA_CT_LABELS_MASK, @@ -1238,10 +1247,15 @@ static int tcf_ct_fill_params(struct net *net, } } + p->put_labels = put_labels; + if (p->ct_action & TCA_CT_ACT_COMMIT) __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); return 0; err: + if (put_labels) + nf_connlabels_put(net); + nf_ct_put(p->tmpl); p->tmpl = NULL; return err; @@ -1542,32 +1556,13 @@ static struct tc_action_ops act_ct_ops = { static __net_init int ct_init_net(struct net *net) { - unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); - if (nf_connlabels_get(net, n_bits - 1)) { - tn->labels = false; - pr_err("act_ct: Failed to set connlabels length"); - } else { - tn->labels = true; - } - return tc_action_net_init(net, &tn->tn, &act_ct_ops); } static void __net_exit ct_exit_net(struct list_head *net_list) { - struct net *net; - - rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) { - struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); - - if (tn->labels) - nf_connlabels_put(net); - } - rtnl_unlock(); - tc_action_net_exit(net_list, act_ct_ops.net_id); } -- cgit v1.2.3 From d866ae9aaa4325f1097e8b7a50f202348ca89b87 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 23 Oct 2023 13:44:34 -0700 Subject: mptcp: add a new sysctl for make after break timeout The MPTCP protocol allows sockets with no alive subflows to stay in ESTABLISHED status for and user-defined timeout, to allow for later subflows creation. Currently such timeout is constant - TCP_TIMEWAIT_LEN. Let the user-space configure them via a newly added sysctl, to better cope with busy servers and simplify (make them faster) the relevant pktdrill tests. Note that the new know does not apply to orphaned MPTCP socket waiting for the data_fin handshake completion: they always wait TCP_TIMEWAIT_LEN. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-2-v1-1-9dc60939d371@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/networking/mptcp-sysctl.rst | 11 +++++++++++ net/mptcp/ctrl.c | 16 ++++++++++++++++ net/mptcp/protocol.c | 6 +++--- net/mptcp/protocol.h | 1 + 4 files changed, 31 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index 15f1919d640c..69975ce25a02 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -25,6 +25,17 @@ add_addr_timeout - INTEGER (seconds) Default: 120 +close_timeout - INTEGER (seconds) + Set the make-after-break timeout: in absence of any close or + shutdown syscall, MPTCP sockets will maintain the status + unchanged for such time, after the last subflow removal, before + moving to TCP_CLOSE. + + The default value matches TCP_TIMEWAIT_LEN. This is a per-namespace + sysctl. + + Default: 60 + checksum_enabled - BOOLEAN Control whether DSS checksum can be enabled. diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index e72b518c5d02..13fe0748dde8 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -27,6 +27,7 @@ struct mptcp_pernet { #endif unsigned int add_addr_timeout; + unsigned int close_timeout; unsigned int stale_loss_cnt; u8 mptcp_enabled; u8 checksum_enabled; @@ -65,6 +66,13 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net) return mptcp_get_pernet(net)->stale_loss_cnt; } +unsigned int mptcp_close_timeout(const struct sock *sk) +{ + if (sock_flag(sk, SOCK_DEAD)) + return TCP_TIMEWAIT_LEN; + return mptcp_get_pernet(sock_net(sk))->close_timeout; +} + int mptcp_get_pm_type(const struct net *net) { return mptcp_get_pernet(net)->pm_type; @@ -79,6 +87,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; + pernet->close_timeout = TCP_TIMEWAIT_LEN; pernet->checksum_enabled = 0; pernet->allow_join_initial_addr_port = 1; pernet->stale_loss_cnt = 4; @@ -141,6 +150,12 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dostring, }, + { + .procname = "close_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, {} }; @@ -163,6 +178,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[4].data = &pernet->stale_loss_cnt; table[5].data = &pernet->pm_type; table[6].data = &pernet->scheduler; + table[7].data = &pernet->close_timeout; hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, ARRAY_SIZE(mptcp_sysctl_table)); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 886ab689a8ae..a21f8ed26343 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2391,8 +2391,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (msk->in_accept_queue && msk->first == ssk && (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) { /* ensure later check in mptcp_worker() will dispose the msk */ - mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1)); sock_set_flag(sk, SOCK_DEAD); + mptcp_set_close_tout(sk, tcp_jiffies32 - (mptcp_close_timeout(sk) + 1)); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); mptcp_subflow_drop_ctx(ssk); goto out_release; @@ -2516,7 +2516,7 @@ static bool mptcp_close_tout_expired(const struct sock *sk) return false; return time_after32(tcp_jiffies32, - inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN); + inet_csk(sk)->icsk_mtup.probe_timestamp + mptcp_close_timeout(sk)); } static void mptcp_check_fastclose(struct mptcp_sock *msk) @@ -2659,7 +2659,7 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) return; close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + - TCP_TIMEWAIT_LEN; + mptcp_close_timeout(sk); /* the close timeout takes precedence on the fail one, and here at least one of * them is active diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index c4c05afdc48c..40228b55e3e9 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -615,6 +615,7 @@ unsigned int mptcp_get_add_addr_timeout(const struct net *net); int mptcp_is_checksum_enabled(const struct net *net); int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); +unsigned int mptcp_close_timeout(const struct sock *sk); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, -- cgit v1.2.3 From bf0e96108fb6707613dd055aff5e98b02b99bb14 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 23 Oct 2023 13:44:35 -0700 Subject: mptcp: properly account fastopen data Currently the socket level counter aggregating the received data does not take in account the data received via fastopen. Address the issue updating the counter as required. Fixes: 38967f424b5b ("mptcp: track some aggregate data counters") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-2-v1-2-9dc60939d371@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/fastopen.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index bceaab8dd8e4..74698582a285 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -52,6 +52,7 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf mptcp_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); + mptcp_sk(sk)->bytes_received += skb->len; sk->sk_data_ready(sk); -- cgit v1.2.3 From f1f26512a9bf18f7a4c0d59df113a49f39d7d4b6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 23 Oct 2023 13:44:36 -0700 Subject: mptcp: use plain bool instead of custom binary enum The 'data_avail' subflow field is already used as plain boolean, drop the custom binary enum type and switch to bool. No functional changed intended. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-2-v1-3-9dc60939d371@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.h | 7 +------ net/mptcp/subflow.c | 12 ++++++------ 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 40228b55e3e9..6df62a8a73bc 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -434,11 +434,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk) return (struct mptcp_subflow_request_sock *)rsk; } -enum mptcp_data_avail { - MPTCP_SUBFLOW_NODATA, - MPTCP_SUBFLOW_DATA_AVAIL, -}; - struct mptcp_delegated_action { struct napi_struct napi; struct list_head head; @@ -494,7 +489,7 @@ struct mptcp_subflow_context { valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ __unused : 9; - enum mptcp_data_avail data_avail; + bool data_avail; bool scheduled; u32 remote_nonce; u64 thmac; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9c1f8d1d63d2..dbc7a52b322f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1237,7 +1237,7 @@ static bool subflow_check_data_avail(struct sock *ssk) struct sk_buff *skb; if (!skb_peek(&ssk->sk_receive_queue)) - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + WRITE_ONCE(subflow->data_avail, false); if (subflow->data_avail) return true; @@ -1271,7 +1271,7 @@ static bool subflow_check_data_avail(struct sock *ssk) continue; } - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + WRITE_ONCE(subflow->data_avail, true); break; } return true; @@ -1293,7 +1293,7 @@ fallback: goto reset; } mptcp_subflow_fail(msk, ssk); - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + WRITE_ONCE(subflow->data_avail, true); return true; } @@ -1310,7 +1310,7 @@ reset: while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); tcp_send_active_reset(ssk, GFP_ATOMIC); - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + WRITE_ONCE(subflow->data_avail, false); return false; } @@ -1322,7 +1322,7 @@ reset: subflow->map_seq = READ_ONCE(msk->ack_seq); subflow->map_data_len = skb->len; subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + WRITE_ONCE(subflow->data_avail, true); return true; } @@ -1334,7 +1334,7 @@ bool mptcp_subflow_data_available(struct sock *sk) if (subflow->map_valid && mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { subflow->map_valid = 0; - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + WRITE_ONCE(subflow->data_avail, false); pr_debug("Done with mapping: seq=%u data_len=%u", subflow->map_subflow_seq, -- cgit v1.2.3 From 5684ab1a0effbfeb706f47d85785f653005b97b1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 23 Oct 2023 13:44:38 -0700 Subject: mptcp: give rcvlowat some love The MPTCP protocol allow setting sk_rcvlowat, but the value there is currently ignored. Additionally, the default subflows sk_rcvlowat basically disables per subflow delayed ack: the MPTCP protocol move the incoming data from the subflows into the msk socket as soon as the TCP stacks invokes the subflow data_ready callback. Later, when __tcp_ack_snd_check() takes action, the subflow-level copied_seq matches rcv_nxt, and that mandate for an immediate ack. Let the mptcp receive path be aware of such threshold, explicitly tracking the amount of data available to be ready and checking vs sk_rcvlowat in mptcp_poll() and before waking-up readers. Additionally implement the set_rcvlowat() callback, to properly handle the rcvbuf auto-tuning on sk_rcvlowat changes. Finally to properly handle delayed ack, force the subflow level threshold to 0 and instead explicitly ask for an immediate ack when the msk level th is not reached. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-2-v1-5-9dc60939d371@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 24 +++++++++++------------- net/mptcp/protocol.h | 20 ++++++++++++++++++++ net/mptcp/sockopt.c | 42 ++++++++++++++++++++++++++++++++++++++++++ net/mptcp/subflow.c | 12 ++++++++++-- 4 files changed, 83 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a21f8ed26343..7036e30c449f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -863,9 +863,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) /* Wake-up the reader only for in-sequence data */ mptcp_data_lock(sk); - if (move_skbs_to_msk(msk, ssk)) + if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) sk->sk_data_ready(sk); - mptcp_data_unlock(sk); } @@ -1922,6 +1921,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, if (!(flags & MSG_PEEK)) { MPTCP_SKB_CB(skb)->offset += count; MPTCP_SKB_CB(skb)->map_seq += count; + msk->bytes_consumed += count; } break; } @@ -1932,6 +1932,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize); __skb_unlink(skb, &msk->receive_queue); __kfree_skb(skb); + msk->bytes_consumed += count; } if (copied >= len) @@ -2755,6 +2756,7 @@ static void __mptcp_init_sock(struct sock *sk) msk->rmem_fwd_alloc = 0; WRITE_ONCE(msk->rmem_released, 0); msk->timer_ival = TCP_RTO_MIN; + msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; @@ -2964,16 +2966,9 @@ void __mptcp_unaccepted_force_close(struct sock *sk) __mptcp_destroy_sock(sk); } -static __poll_t mptcp_check_readable(struct mptcp_sock *msk) +static __poll_t mptcp_check_readable(struct sock *sk) { - /* Concurrent splices from sk_receive_queue into receive_queue will - * always show at least one non-empty queue when checked in this order. - */ - if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) && - skb_queue_empty_lockless(&msk->receive_queue)) - return 0; - - return EPOLLIN | EPOLLRDNORM; + return mptcp_epollin_ready(sk) ? EPOLLIN | EPOLLRDNORM : 0; } static void mptcp_check_listen_stop(struct sock *sk) @@ -3011,7 +3006,7 @@ bool __mptcp_close(struct sock *sk, long timeout) goto cleanup; } - if (mptcp_check_readable(msk) || timeout < 0) { + if (mptcp_data_avail(msk) || timeout < 0) { /* If the msk has read data, or the caller explicitly ask it, * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose */ @@ -3138,6 +3133,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) msk->snd_data_fin_enable = false; msk->rcv_fastclose = false; msk->use_64bit_ack = false; + msk->bytes_consumed = 0; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); mptcp_pm_data_reset(msk); mptcp_ca_reset(sk); @@ -3909,7 +3905,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) { - mask |= mptcp_check_readable(msk); + mask |= mptcp_check_readable(sk); if (shutdown & SEND_SHUTDOWN) mask |= EPOLLOUT | EPOLLWRNORM; else @@ -3947,6 +3943,7 @@ static const struct proto_ops mptcp_stream_ops = { .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, + .set_rcvlowat = mptcp_set_rcvlowat, }; static struct inet_protosw mptcp_protosw = { @@ -4048,6 +4045,7 @@ static const struct proto_ops mptcp_v6_stream_ops = { #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif + .set_rcvlowat = mptcp_set_rcvlowat, }; static struct proto mptcp_v6_prot; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 6df62a8a73bc..5971d34a3dee 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -269,6 +269,7 @@ struct mptcp_sock { atomic64_t rcv_wnd_sent; u64 rcv_data_fin_seq; u64 bytes_retrans; + u64 bytes_consumed; int rmem_fwd_alloc; int snd_burst; int old_wspace; @@ -659,6 +660,24 @@ struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk); int mptcp_sched_get_send(struct mptcp_sock *msk); int mptcp_sched_get_retrans(struct mptcp_sock *msk); +static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) +{ + return READ_ONCE(msk->bytes_received) - READ_ONCE(msk->bytes_consumed); +} + +static inline bool mptcp_epollin_ready(const struct sock *sk) +{ + /* mptcp doesn't have to deal with small skbs in the receive queue, + * at it can always coalesce them + */ + return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) || + (mem_cgroup_sockets_enabled && sk->sk_memcg && + mem_cgroup_under_socket_pressure(sk->sk_memcg)) || + READ_ONCE(tcp_memory_pressure); +} + +int mptcp_set_rcvlowat(struct sock *sk, int val); + static inline bool __tcp_can_send(const struct sock *ssk) { /* only send if our side has not closed yet */ @@ -733,6 +752,7 @@ static inline bool mptcp_is_fully_established(struct sock *sk) return inet_sk_state_load(sk) == TCP_ESTABLISHED && READ_ONCE(mptcp_sk(sk)->fully_established); } + void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 59bd5e114392..d15891e23f45 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1472,9 +1472,51 @@ void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) msk_owned_by_me(msk); + ssk->sk_rcvlowat = 0; + if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { sync_socket_options(msk, ssk); subflow->setsockopt_seq = msk->setsockopt_seq; } } + +/* unfortunately this is different enough from the tcp version so + * that we can't factor it out + */ +int mptcp_set_rcvlowat(struct sock *sk, int val) +{ + struct mptcp_subflow_context *subflow; + int space, cap; + + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) + cap = sk->sk_rcvbuf >> 1; + else + cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; + val = min(val, cap); + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); + + /* Check if we need to signal EPOLLIN right now */ + if (mptcp_epollin_ready(sk)) + sk->sk_data_ready(sk); + + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) + return 0; + + space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, val); + if (space <= sk->sk_rcvbuf) + return 0; + + /* propagate the rcvbuf changes to all the subflows */ + WRITE_ONCE(sk->sk_rcvbuf, space); + mptcp_for_each_subflow(mptcp_sk(sk), subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow; + + slow = lock_sock_fast(ssk); + WRITE_ONCE(ssk->sk_rcvbuf, space); + tcp_sk(ssk)->window_clamp = val; + unlock_sock_fast(ssk, slow); + } + return 0; +} diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index dbc7a52b322f..080b16426222 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1405,10 +1405,18 @@ static void subflow_data_ready(struct sock *sk) WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && !subflow->mp_join && !(state & TCPF_CLOSE)); - if (mptcp_subflow_data_available(sk)) + if (mptcp_subflow_data_available(sk)) { mptcp_data_ready(parent, sk); - else if (unlikely(sk->sk_err)) + + /* subflow-level lowat test are not relevant. + * respect the msk-level threshold eventually mandating an immediate ack + */ + if (mptcp_data_avail(msk) < parent->sk_rcvlowat && + (tcp_sk(sk)->rcv_nxt - tcp_sk(sk)->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss) + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + } else if (unlikely(sk->sk_err)) { subflow_error_report(sk); + } } static void subflow_write_space(struct sock *ssk) -- cgit v1.2.3 From 0ffe8e74904027aa48d1f8bd52675c6f0a4c88d1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 23 Oct 2023 13:44:39 -0700 Subject: mptcp: use copy_from_iter helpers on transmit The perf traces show an high cost for the MPTCP transmit path memcpy. It turn out that the helper currently in use carries quite a bit of unneeded overhead, e.g. to map/unmap the memory pages. Moving to the 'copy_from_iter' variant removes such overhead and additionally gains the no-cache support. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-2-v1-6-9dc60939d371@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7036e30c449f..5489f024dd7e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1760,6 +1760,18 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, return ret; } +static int do_copy_data_nocache(struct sock *sk, int copy, + struct iov_iter *from, char *to) +{ + if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { + if (!copy_from_iter_full_nocache(to, copy, from)) + return -EFAULT; + } else if (!copy_from_iter_full(to, copy, from)) { + return -EFAULT; + } + return 0; +} + static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1833,11 +1845,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!sk_wmem_schedule(sk, total_ts)) goto wait_for_memory; - if (copy_page_from_iter(dfrag->page, offset, psize, - &msg->msg_iter) != psize) { - ret = -EFAULT; + ret = do_copy_data_nocache(sk, psize, &msg->msg_iter, + page_address(dfrag->page) + offset); + if (ret) goto do_error; - } /* data successfully copied into the write queue */ sk_forward_alloc_add(sk, -total_ts); -- cgit v1.2.3 From a1ab24e5fc4a3048a1b3a24ab0ddc7b73358baa9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 23 Oct 2023 13:44:40 -0700 Subject: mptcp: consolidate sockopt synchronization Move the socket option synchronization for active subflows at subflow creation time. This allows removing the now unused unlocked variant of such helper. While at that, clean-up a bit the mptcp_subflow_create_socket() errors path. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-2-v1-7-9dc60939d371@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 -- net/mptcp/sockopt.c | 22 ---------------------- net/mptcp/subflow.c | 18 +++++++++--------- 3 files changed, 9 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 5489f024dd7e..e44a3da12b96 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -121,8 +121,6 @@ struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk) ret = __mptcp_socket_create(msk); if (ret) return ERR_PTR(ret); - - mptcp_sockopt_sync(msk, msk->first); } return msk->first; diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index d15891e23f45..abf0645cb65d 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1444,28 +1444,6 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); } -static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) -{ - bool slow = lock_sock_fast(ssk); - - sync_socket_options(msk, ssk); - - unlock_sock_fast(ssk, slow); -} - -void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); - - msk_owned_by_me(msk); - - if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { - __mptcp_sockopt_sync(msk, ssk); - - subflow->setsockopt_seq = msk->setsockopt_seq; - } -} - void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 080b16426222..df208666fd19 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1533,8 +1533,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif - mptcp_sockopt_sync(msk, ssk); - ssk->sk_bound_dev_if = ifindex; err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); if (err) @@ -1645,7 +1643,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, err = security_mptcp_add_subflow(sk, sf->sk); if (err) - goto release_ssk; + goto err_free; /* the newly created socket has to be in the same cgroup as its parent */ mptcp_attach_cgroup(sk, sf->sk); @@ -1659,15 +1657,12 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); err = tcp_set_ulp(sf->sk, "mptcp"); + if (err) + goto err_free; -release_ssk: + mptcp_sockopt_sync_locked(mptcp_sk(sk), sf->sk); release_sock(sf->sk); - if (err) { - sock_release(sf); - return err; - } - /* the newly created socket really belongs to the owning MPTCP master * socket, even if for additional subflows the allocation is performed * by a kernel workqueue. Adjust inode references, so that the @@ -1687,6 +1682,11 @@ release_ssk: mptcp_subflow_ops_override(sf->sk); return 0; + +err_free: + release_sock(sf->sk); + sock_release(sf); + return err; } static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, -- cgit v1.2.3 From 9fdc779331bd3e0b2570c2bced396d53a04a33b9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 23 Oct 2023 13:44:41 -0700 Subject: mptcp: ignore notsent_lowat setting at the subflow level Any latency related tuning taking action at the subflow level does not really affect the user-space, as only the main MPTCP socket is relevant. Anyway any limiting setting may foul the MPTCP scheduler, not being able to fully use the subflow-level cwin, leading to very poor b/w usage. Enforce notsent_lowat to be a no-op on every subflow. Note that TCP_NOTSENT_LOWAT is currently not supported, and properly dealing with that will require more invasive changes. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-2-v1-8-9dc60939d371@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/sockopt.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index abf0645cb65d..72858d7d8974 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1452,6 +1452,12 @@ void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) ssk->sk_rcvlowat = 0; + /* subflows must ignore any latency-related settings: will not affect + * the user-space - only the msk is relevant - but will foul the + * mptcp scheduler + */ + tcp_sk(ssk)->notsent_lowat = UINT_MAX; + if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { sync_socket_options(msk, ssk); -- cgit v1.2.3 From 8005184fd1ca6aeb3fea36f4eb9463fc1b90c114 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 23 Oct 2023 13:44:42 -0700 Subject: mptcp: refactor sndbuf auto-tuning The MPTCP protocol account for the data enqueued on all the subflows to the main socket send buffer, while the send buffer auto-tuning algorithm set the main socket send buffer size as the max size among the subflows. That causes bad performances when at least one subflow is sndbuf limited, e.g. due to very high latency, as the MPTCP scheduler can't even fill such buffer. Change the send-buffer auto-tuning algorithm to compute the main socket send buffer size as the sum of all the subflows buffer size. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-2-v1-9-9dc60939d371@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 18 ++++++++++++++++-- net/mptcp/protocol.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++----- net/mptcp/sockopt.c | 5 ++++- net/mptcp/subflow.c | 3 +-- 4 files changed, 70 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e44a3da12b96..1dacc072dcca 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -890,6 +890,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_sockopt_sync_locked(msk, ssk); mptcp_subflow_joined(msk, ssk); mptcp_stop_tout_timer(sk); + __mptcp_propagate_sndbuf(sk, ssk); return true; } @@ -1076,15 +1077,16 @@ static void mptcp_enter_memory_pressure(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); bool first = true; - sk_stream_moderate_sndbuf(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (first) tcp_enter_memory_pressure(ssk); sk_stream_moderate_sndbuf(ssk); + first = false; } + __mptcp_sync_sndbuf(sk); } /* ensure we get enough memory for the frag hdr, beyond some minimal amount of @@ -2458,6 +2460,7 @@ out_release: WRITE_ONCE(msk->first, NULL); out: + __mptcp_sync_sndbuf(sk); if (need_push) __mptcp_push_pending(sk, 0); @@ -3224,7 +3227,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, * uses the correct data */ mptcp_copy_inaddrs(nsk, ssk); - mptcp_propagate_sndbuf(nsk, ssk); + __mptcp_propagate_sndbuf(nsk, ssk); mptcp_rcv_space_init(msk, ssk); bh_unlock_sock(nsk); @@ -3402,6 +3405,8 @@ static void mptcp_release_cb(struct sock *sk) __mptcp_set_connected(sk); if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) __mptcp_error_report(sk); + if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) + __mptcp_sync_sndbuf(sk); } __mptcp_update_rmem(sk); @@ -3446,6 +3451,14 @@ void mptcp_subflow_process_delegated(struct sock *ssk, long status) __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags); mptcp_data_unlock(sk); } + if (status & BIT(MPTCP_DELEGATE_SNDBUF)) { + mptcp_data_lock(sk); + if (!sock_owned_by_user(sk)) + __mptcp_sync_sndbuf(sk); + else + __set_bit(MPTCP_SYNC_SNDBUF, &mptcp_sk(sk)->cb_flags); + mptcp_data_unlock(sk); + } if (status & BIT(MPTCP_DELEGATE_ACK)) schedule_3rdack_retransmission(ssk); } @@ -3530,6 +3543,7 @@ bool mptcp_finish_join(struct sock *ssk) /* active subflow, already present inside the conn_list */ if (!list_empty(&subflow->node)) { mptcp_subflow_joined(msk, ssk); + mptcp_propagate_sndbuf(parent, ssk); return true; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 5971d34a3dee..9092fcf18798 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -125,6 +125,7 @@ #define MPTCP_RETRANSMIT 4 #define MPTCP_FLUSH_JOIN_LIST 5 #define MPTCP_CONNECTED 6 +#define MPTCP_SYNC_SNDBUF 7 struct mptcp_skb_cb { u64 map_seq; @@ -445,6 +446,7 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); #define MPTCP_DELEGATE_SCHEDULED 0 #define MPTCP_DELEGATE_SEND 1 #define MPTCP_DELEGATE_ACK 2 +#define MPTCP_DELEGATE_SNDBUF 3 #define MPTCP_DELEGATE_ACTIONS_MASK (~BIT(MPTCP_DELEGATE_SCHEDULED)) /* MPTCP subflow context */ @@ -518,6 +520,9 @@ struct mptcp_subflow_context { u32 setsockopt_seq; u32 stale_rcv_tstamp; + int cached_sndbuf; /* sndbuf size when last synced with the msk sndbuf, + * protected by the msk socket lock + */ struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ @@ -780,13 +785,52 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } -static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) +static inline void __mptcp_sync_sndbuf(struct sock *sk) { - if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf)) - return false; + struct mptcp_subflow_context *subflow; + int ssk_sndbuf, new_sndbuf; + + if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) + return; + + new_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[0]; + mptcp_for_each_subflow(mptcp_sk(sk), subflow) { + ssk_sndbuf = READ_ONCE(mptcp_subflow_tcp_sock(subflow)->sk_sndbuf); + + subflow->cached_sndbuf = ssk_sndbuf; + new_sndbuf += ssk_sndbuf; + } + + /* the msk max wmem limit is * tcp wmem[2] */ + WRITE_ONCE(sk->sk_sndbuf, new_sndbuf); +} + +/* The called held both the msk socket and the subflow socket locks, + * possibly under BH + */ +static inline void __mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + + if (READ_ONCE(ssk->sk_sndbuf) != subflow->cached_sndbuf) + __mptcp_sync_sndbuf(sk); +} + +/* the caller held only the subflow socket lock, either in process or + * BH context. Additionally this can be called under the msk data lock, + * so we can't acquire such lock here: let the delegate action acquires + * the needed locks in suitable order. + */ +static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + + if (likely(READ_ONCE(ssk->sk_sndbuf) == subflow->cached_sndbuf)) + return; - WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf); - return true; + local_bh_disable(); + mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_SNDBUF); + local_bh_enable(); } static inline void mptcp_write_space(struct sock *sk) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 72858d7d8974..574e221bb765 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -95,6 +95,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in case SO_SNDBUFFORCE: ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); + mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; break; case SO_RCVBUF: case SO_RCVBUFFORCE: @@ -1415,8 +1416,10 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_userlocks & tx_rx_locks) { ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; - if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) + if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) { WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); + mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; + } if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index df208666fd19..2b43577f952e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -421,6 +421,7 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc void __mptcp_set_connected(struct sock *sk) { + __mptcp_propagate_sndbuf(sk, mptcp_sk(sk)->first); if (sk->sk_state == TCP_SYN_SENT) { inet_sk_state_store(sk, TCP_ESTABLISHED); sk->sk_state_change(sk); @@ -472,7 +473,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) return; msk = mptcp_sk(parent); - mptcp_propagate_sndbuf(parent, sk); subflow->rel_write_seq = 1; subflow->conn_finished = 1; subflow->ssn_offset = TCP_SKB_CB(skb)->seq; @@ -1736,7 +1736,6 @@ static void subflow_state_change(struct sock *sk) msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { - mptcp_propagate_sndbuf(parent, sk); mptcp_do_fallback(sk); mptcp_rcv_space_init(msk, sk); pr_fallback(msk); -- cgit v1.2.3 From e57a34478586fe3562560ccebd655b707a5b4a56 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Tue, 24 Oct 2023 07:26:33 -0700 Subject: ipv6: drop feature RTAX_FEATURE_ALLFRAG RTAX_FEATURE_ALLFRAG was added before the first git commit: https://www.mail-archive.com/bk-commits-head@vger.kernel.org/msg03399.html The feature would send packets to the fragmentation path if a box receives a PMTU value with less than 1280 byte. However, since commit 9d289715eb5c ("ipv6: stop sending PTB packets for MTU < 1280"), such message would be simply discarded. The feature flag is neither supported in iproute2 utility. In theory one can still manipulate it with direct netlink message, but it is not ideal because it was based on obsoleted guidance of RFC-2460 (replaced by RFC-8200). The feature would always test false at the moment, so remove related code or mark them as unused. Signed-off-by: Yan Zhai Reviewed-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/d78e44dcd9968a252143ffe78460446476a472a1.1698156966.git.yan@cloudflare.com Signed-off-by: Jakub Kicinski --- include/net/dst.h | 7 ------- include/net/inet_connection_sock.h | 1 - include/net/inet_sock.h | 1 - include/uapi/linux/rtnetlink.h | 2 +- net/ipv4/tcp_output.c | 20 +------------------- net/ipv6/ip6_output.c | 15 ++------------- net/ipv6/tcp_ipv6.c | 1 - net/ipv6/xfrm6_output.c | 2 +- net/mptcp/subflow.c | 1 - 9 files changed, 5 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/include/net/dst.h b/include/net/dst.h index f8b8599a0600..f5dfc8fb7b37 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -222,13 +222,6 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr return msecs_to_jiffies(dst_metric(dst, metric)); } -static inline u32 -dst_allfrag(const struct dst_entry *dst) -{ - int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG); - return ret; -} - static inline int dst_metric_locked(const struct dst_entry *dst, int metric) { diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 086d1193c9ef..d0a2f827d5f2 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -44,7 +44,6 @@ struct inet_connection_sock_af_ops { struct request_sock *req_unhash, bool *own_req); u16 net_header_len; - u16 net_frag_header_len; u16 sockaddr_len; int (*setsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 98e11958cdff..74db6d97cae1 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -244,7 +244,6 @@ struct inet_sock { }; #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ -#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */ enum { INET_FLAGS_PKTINFO = 0, diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index aa2482a0614a..3b687d20c9ed 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -505,7 +505,7 @@ enum { #define RTAX_FEATURE_ECN (1 << 0) #define RTAX_FEATURE_SACK (1 << 1) /* unused */ #define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */ -#define RTAX_FEATURE_ALLFRAG (1 << 3) +#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */ #define RTAX_FEATURE_TCP_USEC_TS (1 << 4) #define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2866ccbccde0..ca4d7594efd4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1698,14 +1698,6 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) */ mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr); - /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ - if (icsk->icsk_af_ops->net_frag_header_len) { - const struct dst_entry *dst = __sk_dst_get(sk); - - if (dst && dst_allfrag(dst)) - mss_now -= icsk->icsk_af_ops->net_frag_header_len; - } - /* Clamp it (mss_clamp does not include tcp options) */ if (mss_now > tp->rx_opt.mss_clamp) mss_now = tp->rx_opt.mss_clamp; @@ -1733,21 +1725,11 @@ int tcp_mss_to_mtu(struct sock *sk, int mss) { const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - int mtu; - mtu = mss + + return mss + tp->tcp_header_len + icsk->icsk_ext_hdr_len + icsk->icsk_af_ops->net_header_len; - - /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ - if (icsk->icsk_af_ops->net_frag_header_len) { - const struct dst_entry *dst = __sk_dst_get(sk); - - if (dst && dst_allfrag(dst)) - mtu += icsk->icsk_af_ops->net_frag_header_len; - } - return mtu; } EXPORT_SYMBOL(tcp_mss_to_mtu); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3c7de89d6755..86efd901ee5a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -191,7 +191,6 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); if ((skb->len > mtu && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); else @@ -1017,9 +1016,6 @@ slow_path: return err; fail_toobig: - if (skb->sk && dst_allfrag(skb_dst(skb))) - sk_gso_disable(skb->sk); - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); err = -EMSGSIZE; @@ -1384,10 +1380,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, cork->base.mark = ipc6->sockc.mark; sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); - if (dst_allfrag(xfrm_dst_path(&rt->dst))) - cork->base.flags |= IPCORK_ALLFRAG; cork->base.length = 0; - cork->base.transmit_time = ipc6->sockc.transmit_time; return 0; @@ -1444,8 +1437,6 @@ static int __ip6_append_data(struct sock *sk, headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + - (dst_allfrag(&rt->dst) ? - sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; if (mtu <= fragheaderlen || @@ -1555,7 +1546,7 @@ emsgsize: while (length > 0) { /* Check if the remaining data fits into current packet. */ - copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; + copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; @@ -1586,7 +1577,7 @@ alloc_new_skb: */ datalen = length + fraggap; - if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) + if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; fraglen = datalen + fragheaderlen; pagedlen = 0; @@ -1835,7 +1826,6 @@ static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) struct dst_entry *dst = cork->base.dst; cork->base.dst = NULL; - cork->base.flags &= ~IPCORK_ALLFRAG; skb_dst_set(skb, dst); } @@ -1856,7 +1846,6 @@ static void ip6_cork_release(struct inet_cork_full *cork, if (cork->base.dst) { dst_release(cork->base.dst); cork->base.dst = NULL; - cork->base.flags &= ~IPCORK_ALLFRAG; } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0c8a14ba104f..dc27988512a6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1895,7 +1895,6 @@ const struct inet_connection_sock_af_ops ipv6_specific = { .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct ipv6hdr), - .net_frag_header_len = sizeof(struct frag_hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index ad07904642ca..5f7b1fdbffe6 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -95,7 +95,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) return -EMSGSIZE; } - if (toobig || dst_allfrag(skb_dst(skb))) + if (toobig) return ip6_fragment(net, sk, skb, __xfrm6_output_finish); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 2b43577f952e..e120e9616454 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -2051,7 +2051,6 @@ void __init mptcp_subflow_init(void) subflow_v6m_specific.send_check = ipv4_specific.send_check; subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; - subflow_v6m_specific.net_frag_header_len = 0; subflow_v6m_specific.rebuild_header = subflow_rebuild_header; tcpv6_prot_override = tcpv6_prot; -- cgit v1.2.3 From 1f7ec1b3721d7f49f13d01e6f5ed5f28a305e3b6 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Tue, 24 Oct 2023 07:26:37 -0700 Subject: ipv6: refactor ip6_finish_output for GSO handling Separate GSO and non-GSO packets handling to make the logic cleaner. For GSO packets, frag_max_size check can be omitted because it is only useful for packets defragmented by netfilter hooks. Both local output and GRO logic won't produce GSO packets when defragment is needed. This also mirrors what IPv4 side code is doing. Suggested-by: Florian Westphal Signed-off-by: Yan Zhai Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/0e1d4599f858e2becff5c4fe0b5f843236bc3fe8.1698156966.git.yan@cloudflare.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 86efd901ee5a..4010dd97aaf8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -172,6 +172,16 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, return ret; } +static int ip6_finish_output_gso(struct net *net, struct sock *sk, + struct sk_buff *skb, unsigned int mtu) +{ + if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && + !skb_gso_validate_network_len(skb, mtu)) + return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + + return ip6_finish_output2(net, sk, skb); +} + static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned int mtu; @@ -185,16 +195,14 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff #endif mtu = ip6_skb_dst_mtu(skb); - if (skb_is_gso(skb) && - !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && - !skb_gso_validate_network_len(skb, mtu)) - return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + if (skb_is_gso(skb)) + return ip6_finish_output_gso(net, sk, skb, mtu); - if ((skb->len > mtu && !skb_is_gso(skb)) || + if (skb->len > mtu || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); - else - return ip6_finish_output2(net, sk, skb); + + return ip6_finish_output2(net, sk, skb); } static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) -- cgit v1.2.3 From 03d6c848bfb406e9ef6d9846d759e97beaeea113 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Tue, 24 Oct 2023 07:26:40 -0700 Subject: ipv6: avoid atomic fragment on GSO packets When the ipv6 stack output a GSO packet, if its gso_size is larger than dst MTU, then all segments would be fragmented. However, it is possible for a GSO packet to have a trailing segment with smaller actual size than both gso_size as well as the MTU, which leads to an "atomic fragment". Atomic fragments are considered harmful in RFC-8021. An Existing report from APNIC also shows that atomic fragments are more likely to be dropped even it is equivalent to a no-op [1]. Add an extra check in the GSO slow output path. For each segment from the original over-sized packet, if it fits with the path MTU, then avoid generating an atomic fragment. Link: https://www.potaroo.net/presentations/2022-03-01-ipv6-frag.pdf [1] Fixes: b210de4f8c97 ("net: ipv6: Validate GSO SKB before finish IPv6 processing") Reported-by: David Wragg Signed-off-by: Yan Zhai Link: https://lore.kernel.org/r/90912e3503a242dca0bc36958b11ed03a2696e5e.1698156966.git.yan@cloudflare.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4010dd97aaf8..a722a43dd668 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -164,7 +164,13 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, int err; skb_mark_not_on_list(segs); - err = ip6_fragment(net, sk, segs, ip6_finish_output2); + /* Last GSO segment can be smaller than gso_size (and MTU). + * Adding a fragment header would produce an "atomic fragment", + * which is considered harmful (RFC-8021). Avoid that. + */ + err = segs->len > mtu ? + ip6_fragment(net, sk, segs, ip6_finish_output2) : + ip6_finish_output2(net, sk, segs); if (err && ret == 0) ret = err; } -- cgit v1.2.3 From bfbf81b31093e0dc3d61b390a9bd0904d3bf5374 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Tue, 24 Oct 2023 15:23:07 -0600 Subject: net: ipv6/addrconf: clamp preferred_lft to the maximum allowed Without this patch, there is nothing to stop the preferred lifetime of a temporary address from being greater than its valid lifetime. If that was the case, the valid lifetime was effectively ignored. Signed-off-by: Alex Henrie Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20231024212312.299370-2-alexhenrie24@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c2d471ad7922..26aedaab3647 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1399,6 +1399,7 @@ retry: idev->cnf.temp_valid_lft + age); cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor; cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft); + cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft); cfg.plen = ifp->prefix_len; tmp_tstamp = ifp->tstamp; -- cgit v1.2.3 From 629df6701c8a9172f4274af6de9dfa99e2c7ac56 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Tue, 24 Oct 2023 15:23:08 -0600 Subject: net: ipv6/addrconf: clamp preferred_lft to the minimum required If the preferred lifetime was less than the minimum required lifetime, ipv6_create_tempaddr would error out without creating any new address. On my machine and network, this error happened immediately with the preferred lifetime set to 1 second, after a few minutes with the preferred lifetime set to 4 seconds, and not at all with the preferred lifetime set to 5 seconds. During my investigation, I found a Stack Exchange post from another person who seems to have had the same problem: They stopped getting new addresses if they lowered the preferred lifetime below 3 seconds, and they didn't really know why. The preferred lifetime is a preference, not a hard requirement. The kernel does not strictly forbid new connections on a deprecated address, nor does it guarantee that the address will be disposed of the instant its total valid lifetime expires. So rather than disable IPv6 privacy extensions altogether if the minimum required lifetime swells above the preferred lifetime, it is more in keeping with the user's intent to increase the temporary address's lifetime to the minimum necessary for the current network conditions. With these fixes, setting the preferred lifetime to 3 or 4 seconds "just works" because the extra fraction of a second is practically unnoticeable. It's even possible to reduce the time before deprecation to 1 or 2 seconds by also disabling duplicate address detection (setting /proc/sys/net/ipv6/conf/*/dad_transmits to 0). I realize that that is a pretty niche use case, but I know at least one person who would gladly sacrifice performance and convenience to be sure that they are getting the maximum possible level of privacy. Link: https://serverfault.com/a/1031168/310447 Signed-off-by: Alex Henrie Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20231024212312.299370-3-alexhenrie24@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 26aedaab3647..3aaea56b5166 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1407,15 +1407,23 @@ retry: write_unlock_bh(&idev->lock); - /* A temporary address is created only if this calculated Preferred - * Lifetime is greater than REGEN_ADVANCE time units. In particular, - * an implementation must not create a temporary address with a zero - * Preferred Lifetime. + /* From RFC 4941: + * + * A temporary address is created only if this calculated Preferred + * Lifetime is greater than REGEN_ADVANCE time units. In + * particular, an implementation must not create a temporary address + * with a zero Preferred Lifetime. + * + * Clamp the preferred lifetime to a minimum of regen_advance, unless + * that would exceed valid_lft. + * * Use age calculation as in addrconf_verify to avoid unnecessary * temporary addresses being generated. */ age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; - if (cfg.preferred_lft <= regen_advance + age) { + if (cfg.preferred_lft <= regen_advance + age) + cfg.preferred_lft = regen_advance + age + 1; + if (cfg.preferred_lft > cfg.valid_lft) { in6_ifa_put(ifp); in6_dev_put(idev); ret = -1; -- cgit v1.2.3 From ea23fbd2a8f7dadfa9cd9b9d73f3b8a69eec0671 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Oct 2023 09:22:04 -0700 Subject: netlink: make range pointers in policies const struct nla_policy is usually constant itself, but unless we make the ranges inside constant we won't be able to make range structs const. The ranges are not modified by the core. Reviewed-by: Johannes Berg Reviewed-by: David Ahern Reviewed-by: Nikolay Aleksandrov Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231025162204.132528-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_netlink.c | 2 +- drivers/net/vxlan/vxlan_mdb.c | 2 +- include/net/netlink.h | 4 ++-- net/ipv6/ioam6_iptunnel.c | 2 +- net/sched/sch_fq.c | 2 +- net/sched/sch_fq_pie.c | 2 +- net/sched/sch_qfq.c | 2 +- net/sched/sch_taprio.c | 2 +- net/wireless/nl80211.c | 2 +- tools/net/ynl/ynl-gen-c.py | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 27cbe148f0db..cfa74cf8bb1a 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -85,7 +85,7 @@ nla_put_failure: } /* Limit the max delay range to 300s */ -static struct netlink_range_validation delay_range = { +static const struct netlink_range_validation delay_range = { .max = 300000, }; diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c index 5e041622261a..3a21389658ce 100644 --- a/drivers/net/vxlan/vxlan_mdb.c +++ b/drivers/net/vxlan/vxlan_mdb.c @@ -311,7 +311,7 @@ vxlan_mdbe_src_list_pol[MDBE_SRC_LIST_MAX + 1] = { [MDBE_SRC_LIST_ENTRY] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_entry_pol), }; -static struct netlink_range_validation vni_range = { +static const struct netlink_range_validation vni_range = { .max = VXLAN_N_VID - 1, }; diff --git a/include/net/netlink.h b/include/net/netlink.h index aba2b162a226..83bdf787aeee 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -360,8 +360,8 @@ struct nla_policy { const u32 mask; const char *reject_message; const struct nla_policy *nested_policy; - struct netlink_range_validation *range; - struct netlink_range_validation_signed *range_signed; + const struct netlink_range_validation *range; + const struct netlink_range_validation_signed *range_signed; struct { s16 min, max; }; diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index f6f5b83dd954..7563f8c6aa87 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -46,7 +46,7 @@ struct ioam6_lwt { struct ioam6_lwt_encap tuninfo; }; -static struct netlink_range_validation freq_range = { +static const struct netlink_range_validation freq_range = { .min = IOAM6_IPTUNNEL_FREQ_MIN, .max = IOAM6_IPTUNNEL_FREQ_MAX, }; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index bf9d00518a60..0fd18c344ab5 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -897,7 +897,7 @@ static int fq_resize(struct Qdisc *sch, u32 log) return 0; } -static struct netlink_range_validation iq_range = { +static const struct netlink_range_validation iq_range = { .max = INT_MAX, }; diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 68e6acd0f130..5b595773e59b 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -202,7 +202,7 @@ out: return NET_XMIT_CN; } -static struct netlink_range_validation fq_pie_q_range = { +static const struct netlink_range_validation fq_pie_q_range = { .min = 1, .max = 1 << 20, }; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 5598f8be18ae..28315166fe8e 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -213,7 +213,7 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct qfq_class, common); } -static struct netlink_range_validation lmax_range = { +static const struct netlink_range_validation lmax_range = { .min = QFQ_MIN_LMAX, .max = QFQ_MAX_LMAX, }; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 1cb5e41c0ec7..2e1949de4171 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1015,7 +1015,7 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { TC_FP_PREEMPTIBLE), }; -static struct netlink_range_validation_signed taprio_cycle_time_range = { +static const struct netlink_range_validation_signed taprio_cycle_time_range = { .min = 0, .max = INT_MAX, }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2650543dcebe..2f8353bf603c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -463,7 +463,7 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = { [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, }; -static struct netlink_range_validation nl80211_punct_bitmap_range = { +static const struct netlink_range_validation nl80211_punct_bitmap_range = { .min = 0, .max = 0xffff, }; diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 0fee68863db4..31fd96f14fc0 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2038,7 +2038,7 @@ def print_kernel_policy_ranges(family, cw): first = False sign = '' if attr.type[0] == 'u' else '_signed' - cw.block_start(line=f'struct netlink_range_validation{sign} {c_lower(attr.enum_name)}_range =') + cw.block_start(line=f'static const struct netlink_range_validation{sign} {c_lower(attr.enum_name)}_range =') members = [] if 'min' in attr.checks: members.append(('min', attr.get_limit('min'))) -- cgit v1.2.3 From 8c73b26315aadb82218360d0a9a05e515f6e4118 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:21:53 +0100 Subject: net/tcp: Prepare tcp_md5sig_pool for TCP-AO TCP-AO, similarly to TCP-MD5, needs to allocate tfms on a slow-path, which is setsockopt() and use crypto ahash requests on fast paths, which are RX/TX softirqs. Also, it needs a temporary/scratch buffer for preparing the hash. Rework tcp_md5sig_pool in order to support other hashing algorithms than MD5. It will make it possible to share pre-allocated crypto_ahash descriptors and scratch area between all TCP hash users. Internally tcp_sigpool calls crypto_clone_ahash() API over pre-allocated crypto ahash tfm. Kudos to Herbert, who provided this new crypto API. I was a little concerned over GFP_ATOMIC allocations of ahash and crypto_request in RX/TX (see tcp_sigpool_start()), so I benchmarked both "backends" with different algorithms, using patched version of iperf3[2]. On my laptop with i7-7600U @ 2.80GHz: clone-tfm per-CPU-requests TCP-MD5 2.25 Gbits/sec 2.30 Gbits/sec TCP-AO(hmac(sha1)) 2.53 Gbits/sec 2.54 Gbits/sec TCP-AO(hmac(sha512)) 1.67 Gbits/sec 1.64 Gbits/sec TCP-AO(hmac(sha384)) 1.77 Gbits/sec 1.80 Gbits/sec TCP-AO(hmac(sha224)) 1.29 Gbits/sec 1.30 Gbits/sec TCP-AO(hmac(sha3-512)) 481 Mbits/sec 480 Mbits/sec TCP-AO(hmac(md5)) 2.07 Gbits/sec 2.12 Gbits/sec TCP-AO(hmac(rmd160)) 1.01 Gbits/sec 995 Mbits/sec TCP-AO(cmac(aes128)) [not supporetd yet] 2.11 Gbits/sec So, it seems that my concerns don't have strong grounds and per-CPU crypto_request allocation can be dropped/removed from tcp_sigpool once ciphers get crypto_clone_ahash() support. [1]: https://lore.kernel.org/all/ZDefxOq6Ax0JeTRH@gondor.apana.org.au/T/#u [2]: https://github.com/0x7f454c46/iperf/tree/tcp-md5-ao Signed-off-by: Dmitry Safonov Reviewed-by: Steen Hegelund Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp.h | 50 +++++-- net/ipv4/Kconfig | 4 + net/ipv4/Makefile | 1 + net/ipv4/tcp.c | 145 ++++--------------- net/ipv4/tcp_ipv4.c | 97 +++++++------ net/ipv4/tcp_minisocks.c | 21 ++- net/ipv4/tcp_sigpool.c | 358 +++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/tcp_ipv6.c | 60 ++++---- 8 files changed, 525 insertions(+), 211 deletions(-) create mode 100644 net/ipv4/tcp_sigpool.c (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 993b7fcd4e46..f6e2db5292b5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1737,12 +1737,39 @@ union tcp_md5sum_block { #endif }; -/* - pool: digest algorithm, hash description and scratch buffer */ -struct tcp_md5sig_pool { - struct ahash_request *md5_req; - void *scratch; +/* + * struct tcp_sigpool - per-CPU pool of ahash_requests + * @scratch: per-CPU temporary area, that can be used between + * tcp_sigpool_start() and tcp_sigpool_end() to perform + * crypto request + * @req: pre-allocated ahash request + */ +struct tcp_sigpool { + void *scratch; + struct ahash_request *req; }; +int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size); +void tcp_sigpool_get(unsigned int id); +void tcp_sigpool_release(unsigned int id); +int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, + const struct sk_buff *skb, + unsigned int header_len); + +/** + * tcp_sigpool_start - disable bh and start using tcp_sigpool_ahash + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + * @c: returned tcp_sigpool for usage (uninitialized on failure) + * + * Returns 0 on success, error otherwise. + */ +int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c); +/** + * tcp_sigpool_end - enable bh and stop using tcp_sigpool + * @c: tcp_sigpool context that was returned by tcp_sigpool_start() + */ +void tcp_sigpool_end(struct tcp_sigpool *c); +size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len); /* - functions */ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); @@ -1798,17 +1825,12 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, #define tcp_twsk_md5_key(twsk) NULL #endif -bool tcp_alloc_md5sig_pool(void); - -struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); -static inline void tcp_put_md5sig_pool(void) -{ - local_bh_enable(); -} +int tcp_md5_alloc_sigpool(void); +void tcp_md5_release_sigpool(void); +void tcp_md5_add_sigpool(void); +extern int tcp_md5_sigpool_id; -int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, - unsigned int header_len); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, +int tcp_md5_hash_key(struct tcp_sigpool *hp, const struct tcp_md5sig_key *key); /* From tcp_fastopen.c */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 2dfb12230f08..89e2ab023272 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -741,10 +741,14 @@ config DEFAULT_TCP_CONG default "bbr" if DEFAULT_BBR default "cubic" +config TCP_SIGPOOL + tristate + config TCP_MD5SIG bool "TCP: MD5 Signature Option support (RFC2385)" select CRYPTO select CRYPTO_MD5 + select TCP_SIGPOOL help RFC2385 specifies a method of giving MD5 protection to TCP sessions. Its main (only?) use is to protect BGP sessions between core routers diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index b18ba8ef93ad..cd760793cfcb 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o +obj-$(CONFIG_TCP_SIGPOOL) += tcp_sigpool.o obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 156264531124..dca9ca2f1081 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4305,141 +4305,52 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, EXPORT_SYMBOL(tcp_getsockopt); #ifdef CONFIG_TCP_MD5SIG -static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool); -static DEFINE_MUTEX(tcp_md5sig_mutex); -static bool tcp_md5sig_pool_populated = false; +int tcp_md5_sigpool_id = -1; +EXPORT_SYMBOL_GPL(tcp_md5_sigpool_id); -static void __tcp_alloc_md5sig_pool(void) +int tcp_md5_alloc_sigpool(void) { - struct crypto_ahash *hash; - int cpu; - - hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(hash)) - return; - - for_each_possible_cpu(cpu) { - void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch; - struct ahash_request *req; - - if (!scratch) { - scratch = kmalloc_node(sizeof(union tcp_md5sum_block) + - sizeof(struct tcphdr), - GFP_KERNEL, - cpu_to_node(cpu)); - if (!scratch) - return; - per_cpu(tcp_md5sig_pool, cpu).scratch = scratch; - } - if (per_cpu(tcp_md5sig_pool, cpu).md5_req) - continue; - - req = ahash_request_alloc(hash, GFP_KERNEL); - if (!req) - return; - - ahash_request_set_callback(req, 0, NULL, NULL); - - per_cpu(tcp_md5sig_pool, cpu).md5_req = req; - } - /* before setting tcp_md5sig_pool_populated, we must commit all writes - * to memory. See smp_rmb() in tcp_get_md5sig_pool() - */ - smp_wmb(); - /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool() - * and tcp_get_md5sig_pool(). - */ - WRITE_ONCE(tcp_md5sig_pool_populated, true); -} - -bool tcp_alloc_md5sig_pool(void) -{ - /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ - if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) { - mutex_lock(&tcp_md5sig_mutex); - - if (!tcp_md5sig_pool_populated) - __tcp_alloc_md5sig_pool(); + size_t scratch_size; + int ret; - mutex_unlock(&tcp_md5sig_mutex); + scratch_size = sizeof(union tcp_md5sum_block) + sizeof(struct tcphdr); + ret = tcp_sigpool_alloc_ahash("md5", scratch_size); + if (ret >= 0) { + /* As long as any md5 sigpool was allocated, the return + * id would stay the same. Re-write the id only for the case + * when previously all MD5 keys were deleted and this call + * allocates the first MD5 key, which may return a different + * sigpool id than was used previously. + */ + WRITE_ONCE(tcp_md5_sigpool_id, ret); /* Avoids the compiler potentially being smart here */ + return 0; } - /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ - return READ_ONCE(tcp_md5sig_pool_populated); + return ret; } -EXPORT_SYMBOL(tcp_alloc_md5sig_pool); - -/** - * tcp_get_md5sig_pool - get md5sig_pool for this user - * - * We use percpu structure, so if we succeed, we exit with preemption - * and BH disabled, to make sure another thread or softirq handling - * wont try to get same context. - */ -struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) +void tcp_md5_release_sigpool(void) { - local_bh_disable(); - - /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ - if (READ_ONCE(tcp_md5sig_pool_populated)) { - /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ - smp_rmb(); - return this_cpu_ptr(&tcp_md5sig_pool); - } - local_bh_enable(); - return NULL; + tcp_sigpool_release(READ_ONCE(tcp_md5_sigpool_id)); } -EXPORT_SYMBOL(tcp_get_md5sig_pool); -int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, - const struct sk_buff *skb, unsigned int header_len) +void tcp_md5_add_sigpool(void) { - struct scatterlist sg; - const struct tcphdr *tp = tcp_hdr(skb); - struct ahash_request *req = hp->md5_req; - unsigned int i; - const unsigned int head_data_len = skb_headlen(skb) > header_len ? - skb_headlen(skb) - header_len : 0; - const struct skb_shared_info *shi = skb_shinfo(skb); - struct sk_buff *frag_iter; - - sg_init_table(&sg, 1); - - sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len); - ahash_request_set_crypt(req, &sg, NULL, head_data_len); - if (crypto_ahash_update(req)) - return 1; - - for (i = 0; i < shi->nr_frags; ++i) { - const skb_frag_t *f = &shi->frags[i]; - unsigned int offset = skb_frag_off(f); - struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT); - - sg_set_page(&sg, page, skb_frag_size(f), - offset_in_page(offset)); - ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f)); - if (crypto_ahash_update(req)) - return 1; - } - - skb_walk_frags(skb, frag_iter) - if (tcp_md5_hash_skb_data(hp, frag_iter, 0)) - return 1; - - return 0; + tcp_sigpool_get(READ_ONCE(tcp_md5_sigpool_id)); } -EXPORT_SYMBOL(tcp_md5_hash_skb_data); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) +int tcp_md5_hash_key(struct tcp_sigpool *hp, + const struct tcp_md5sig_key *key) { u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */ struct scatterlist sg; sg_init_one(&sg, key->key, keylen); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen); + ahash_request_set_crypt(hp->req, &sg, NULL, keylen); - /* We use data_race() because tcp_md5_do_add() might change key->key under us */ - return data_race(crypto_ahash_update(hp->md5_req)); + /* We use data_race() because tcp_md5_do_add() might change + * key->key under us + */ + return data_race(crypto_ahash_update(hp->req)); } EXPORT_SYMBOL(tcp_md5_hash_key); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7583d4e34c8c..7d81e90b6f5c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1221,10 +1221,6 @@ static int __tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO); if (!key) return -ENOMEM; - if (!tcp_alloc_md5sig_pool()) { - sock_kfree_s(sk, key, sizeof(*key)); - return -ENOMEM; - } memcpy(key->key, newkey, newkeylen); key->keylen = newkeylen; @@ -1246,8 +1242,13 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, struct tcp_sock *tp = tcp_sk(sk); if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) { - if (tcp_md5sig_info_add(sk, GFP_KERNEL)) + if (tcp_md5_alloc_sigpool()) + return -ENOMEM; + + if (tcp_md5sig_info_add(sk, GFP_KERNEL)) { + tcp_md5_release_sigpool(); return -ENOMEM; + } if (!static_branch_inc(&tcp_md5_needed.key)) { struct tcp_md5sig_info *md5sig; @@ -1255,6 +1256,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk)); rcu_assign_pointer(tp->md5sig_info, NULL); kfree_rcu(md5sig, rcu); + tcp_md5_release_sigpool(); return -EUSERS; } } @@ -1271,8 +1273,12 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, struct tcp_sock *tp = tcp_sk(sk); if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) { - if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC))) + tcp_md5_add_sigpool(); + + if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC))) { + tcp_md5_release_sigpool(); return -ENOMEM; + } if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) { struct tcp_md5sig_info *md5sig; @@ -1281,6 +1287,7 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, net_warn_ratelimited("Too many TCP-MD5 keys in the system\n"); rcu_assign_pointer(tp->md5sig_info, NULL); kfree_rcu(md5sig, rcu); + tcp_md5_release_sigpool(); return -EUSERS; } } @@ -1380,7 +1387,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, cmd.tcpm_key, cmd.tcpm_keylen); } -static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, +static int tcp_v4_md5_hash_headers(struct tcp_sigpool *hp, __be32 daddr, __be32 saddr, const struct tcphdr *th, int nbytes) { @@ -1400,38 +1407,35 @@ static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, _th->check = 0; sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp) + sizeof(*th)); - return crypto_ahash_update(hp->md5_req); + return crypto_ahash_update(hp->req); } static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th) { - struct tcp_md5sig_pool *hp; - struct ahash_request *req; + struct tcp_sigpool hp; - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) + if (tcp_v4_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } @@ -1440,9 +1444,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb) { - struct tcp_md5sig_pool *hp; - struct ahash_request *req; const struct tcphdr *th = tcp_hdr(skb); + struct tcp_sigpool hp; __be32 saddr, daddr; if (sk) { /* valid for establish/request sockets */ @@ -1454,30 +1457,28 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, daddr = iph->daddr; } - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len)) + if (tcp_v4_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) goto clear_hash; - if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) + if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } @@ -2296,6 +2297,18 @@ static int tcp_v4_init_sock(struct sock *sk) return 0; } +#ifdef CONFIG_TCP_MD5SIG +static void tcp_md5sig_info_free_rcu(struct rcu_head *head) +{ + struct tcp_md5sig_info *md5sig; + + md5sig = container_of(head, struct tcp_md5sig_info, rcu); + kfree(md5sig); + static_branch_slow_dec_deferred(&tcp_md5_needed); + tcp_md5_release_sigpool(); +} +#endif + void tcp_v4_destroy_sock(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2320,10 +2333,12 @@ void tcp_v4_destroy_sock(struct sock *sk) #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list, if any */ if (tp->md5sig_info) { + struct tcp_md5sig_info *md5sig; + + md5sig = rcu_dereference_protected(tp->md5sig_info, 1); tcp_clear_md5_list(sk); - kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu); - tp->md5sig_info = NULL; - static_branch_slow_dec_deferred(&tcp_md5_needed); + call_rcu(&md5sig->rcu, tcp_md5sig_info_free_rcu); + rcu_assign_pointer(tp->md5sig_info, NULL); } #endif diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ace806c5bd0c..3dcb3fc36e64 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -261,10 +261,9 @@ static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw) tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); if (!tcptw->tw_md5_key) return; - if (!tcp_alloc_md5sig_pool()) - goto out_free; if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) goto out_free; + tcp_md5_add_sigpool(); } return; out_free: @@ -349,16 +348,26 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } EXPORT_SYMBOL(tcp_time_wait); +#ifdef CONFIG_TCP_MD5SIG +static void tcp_md5_twsk_free_rcu(struct rcu_head *head) +{ + struct tcp_md5sig_key *key; + + key = container_of(head, struct tcp_md5sig_key, rcu); + kfree(key); + static_branch_slow_dec_deferred(&tcp_md5_needed); + tcp_md5_release_sigpool(); +} +#endif + void tcp_twsk_destructor(struct sock *sk) { #ifdef CONFIG_TCP_MD5SIG if (static_branch_unlikely(&tcp_md5_needed.key)) { struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_key) { - kfree_rcu(twsk->tw_md5_key, rcu); - static_branch_slow_dec_deferred(&tcp_md5_needed); - } + if (twsk->tw_md5_key) + call_rcu(&twsk->tw_md5_key->rcu, tcp_md5_twsk_free_rcu); } #endif } diff --git a/net/ipv4/tcp_sigpool.c b/net/ipv4/tcp_sigpool.c new file mode 100644 index 000000000000..65a8eaae2fec --- /dev/null +++ b/net/ipv4/tcp_sigpool.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include + +static size_t __scratch_size; +static DEFINE_PER_CPU(void __rcu *, sigpool_scratch); + +struct sigpool_entry { + struct crypto_ahash *hash; + const char *alg; + struct kref kref; + uint16_t needs_key:1, + reserved:15; +}; + +#define CPOOL_SIZE (PAGE_SIZE / sizeof(struct sigpool_entry)) +static struct sigpool_entry cpool[CPOOL_SIZE]; +static unsigned int cpool_populated; +static DEFINE_MUTEX(cpool_mutex); + +/* Slow-path */ +struct scratches_to_free { + struct rcu_head rcu; + unsigned int cnt; + void *scratches[]; +}; + +static void free_old_scratches(struct rcu_head *head) +{ + struct scratches_to_free *stf; + + stf = container_of(head, struct scratches_to_free, rcu); + while (stf->cnt--) + kfree(stf->scratches[stf->cnt]); + kfree(stf); +} + +/** + * sigpool_reserve_scratch - re-allocates scratch buffer, slow-path + * @size: request size for the scratch/temp buffer + */ +static int sigpool_reserve_scratch(size_t size) +{ + struct scratches_to_free *stf; + size_t stf_sz = struct_size(stf, scratches, num_possible_cpus()); + int cpu, err = 0; + + lockdep_assert_held(&cpool_mutex); + if (__scratch_size >= size) + return 0; + + stf = kmalloc(stf_sz, GFP_KERNEL); + if (!stf) + return -ENOMEM; + stf->cnt = 0; + + size = max(size, __scratch_size); + cpus_read_lock(); + for_each_possible_cpu(cpu) { + void *scratch, *old_scratch; + + scratch = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); + if (!scratch) { + err = -ENOMEM; + break; + } + + old_scratch = rcu_replace_pointer(per_cpu(sigpool_scratch, cpu), + scratch, lockdep_is_held(&cpool_mutex)); + if (!cpu_online(cpu) || !old_scratch) { + kfree(old_scratch); + continue; + } + stf->scratches[stf->cnt++] = old_scratch; + } + cpus_read_unlock(); + if (!err) + __scratch_size = size; + + call_rcu(&stf->rcu, free_old_scratches); + return err; +} + +static void sigpool_scratch_free(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + kfree(rcu_replace_pointer(per_cpu(sigpool_scratch, cpu), + NULL, lockdep_is_held(&cpool_mutex))); + __scratch_size = 0; +} + +static int __cpool_try_clone(struct crypto_ahash *hash) +{ + struct crypto_ahash *tmp; + + tmp = crypto_clone_ahash(hash); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + crypto_free_ahash(tmp); + return 0; +} + +static int __cpool_alloc_ahash(struct sigpool_entry *e, const char *alg) +{ + struct crypto_ahash *cpu0_hash; + int ret; + + e->alg = kstrdup(alg, GFP_KERNEL); + if (!e->alg) + return -ENOMEM; + + cpu0_hash = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(cpu0_hash)) { + ret = PTR_ERR(cpu0_hash); + goto out_free_alg; + } + + e->needs_key = crypto_ahash_get_flags(cpu0_hash) & CRYPTO_TFM_NEED_KEY; + + ret = __cpool_try_clone(cpu0_hash); + if (ret) + goto out_free_cpu0_hash; + e->hash = cpu0_hash; + kref_init(&e->kref); + return 0; + +out_free_cpu0_hash: + crypto_free_ahash(cpu0_hash); +out_free_alg: + kfree(e->alg); + e->alg = NULL; + return ret; +} + +/** + * tcp_sigpool_alloc_ahash - allocates pool for ahash requests + * @alg: name of async hash algorithm + * @scratch_size: reserve a tcp_sigpool::scratch buffer of this size + */ +int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size) +{ + int i, ret; + + /* slow-path */ + mutex_lock(&cpool_mutex); + ret = sigpool_reserve_scratch(scratch_size); + if (ret) + goto out; + for (i = 0; i < cpool_populated; i++) { + if (!cpool[i].alg) + continue; + if (strcmp(cpool[i].alg, alg)) + continue; + + if (kref_read(&cpool[i].kref) > 0) + kref_get(&cpool[i].kref); + else + kref_init(&cpool[i].kref); + ret = i; + goto out; + } + + for (i = 0; i < cpool_populated; i++) { + if (!cpool[i].alg) + break; + } + if (i >= CPOOL_SIZE) { + ret = -ENOSPC; + goto out; + } + + ret = __cpool_alloc_ahash(&cpool[i], alg); + if (!ret) { + ret = i; + if (i == cpool_populated) + cpool_populated++; + } +out: + mutex_unlock(&cpool_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(tcp_sigpool_alloc_ahash); + +static void __cpool_free_entry(struct sigpool_entry *e) +{ + crypto_free_ahash(e->hash); + kfree(e->alg); + memset(e, 0, sizeof(*e)); +} + +static void cpool_cleanup_work_cb(struct work_struct *work) +{ + bool free_scratch = true; + unsigned int i; + + mutex_lock(&cpool_mutex); + for (i = 0; i < cpool_populated; i++) { + if (kref_read(&cpool[i].kref) > 0) { + free_scratch = false; + continue; + } + if (!cpool[i].alg) + continue; + __cpool_free_entry(&cpool[i]); + } + if (free_scratch) + sigpool_scratch_free(); + mutex_unlock(&cpool_mutex); +} + +static DECLARE_WORK(cpool_cleanup_work, cpool_cleanup_work_cb); +static void cpool_schedule_cleanup(struct kref *kref) +{ + schedule_work(&cpool_cleanup_work); +} + +/** + * tcp_sigpool_release - decreases number of users for a pool. If it was + * the last user of the pool, releases any memory that was consumed. + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + */ +void tcp_sigpool_release(unsigned int id) +{ + if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) + return; + + /* slow-path */ + kref_put(&cpool[id].kref, cpool_schedule_cleanup); +} +EXPORT_SYMBOL_GPL(tcp_sigpool_release); + +/** + * tcp_sigpool_get - increases number of users (refcounter) for a pool + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + */ +void tcp_sigpool_get(unsigned int id) +{ + if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) + return; + kref_get(&cpool[id].kref); +} +EXPORT_SYMBOL_GPL(tcp_sigpool_get); + +int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) __cond_acquires(RCU_BH) +{ + struct crypto_ahash *hash; + + rcu_read_lock_bh(); + if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) { + rcu_read_unlock_bh(); + return -EINVAL; + } + + hash = crypto_clone_ahash(cpool[id].hash); + if (IS_ERR(hash)) { + rcu_read_unlock_bh(); + return PTR_ERR(hash); + } + + c->req = ahash_request_alloc(hash, GFP_ATOMIC); + if (!c->req) { + crypto_free_ahash(hash); + rcu_read_unlock_bh(); + return -ENOMEM; + } + ahash_request_set_callback(c->req, 0, NULL, NULL); + + /* Pairs with tcp_sigpool_reserve_scratch(), scratch area is + * valid (allocated) until tcp_sigpool_end(). + */ + c->scratch = rcu_dereference_bh(*this_cpu_ptr(&sigpool_scratch)); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_sigpool_start); + +void tcp_sigpool_end(struct tcp_sigpool *c) __releases(RCU_BH) +{ + struct crypto_ahash *hash = crypto_ahash_reqtfm(c->req); + + rcu_read_unlock_bh(); + ahash_request_free(c->req); + crypto_free_ahash(hash); +} +EXPORT_SYMBOL_GPL(tcp_sigpool_end); + +/** + * tcp_sigpool_algo - return algorithm of tcp_sigpool + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + * @buf: buffer to return name of algorithm + * @buf_len: size of @buf + */ +size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len) +{ + if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) + return -EINVAL; + + return strscpy(buf, cpool[id].alg, buf_len); +} +EXPORT_SYMBOL_GPL(tcp_sigpool_algo); + +/** + * tcp_sigpool_hash_skb_data - hash data in skb with initialized tcp_sigpool + * @hp: tcp_sigpool pointer + * @skb: buffer to add sign for + * @header_len: TCP header length for this segment + */ +int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, + const struct sk_buff *skb, + unsigned int header_len) +{ + const unsigned int head_data_len = skb_headlen(skb) > header_len ? + skb_headlen(skb) - header_len : 0; + const struct skb_shared_info *shi = skb_shinfo(skb); + const struct tcphdr *tp = tcp_hdr(skb); + struct ahash_request *req = hp->req; + struct sk_buff *frag_iter; + struct scatterlist sg; + unsigned int i; + + sg_init_table(&sg, 1); + + sg_set_buf(&sg, ((u8 *)tp) + header_len, head_data_len); + ahash_request_set_crypt(req, &sg, NULL, head_data_len); + if (crypto_ahash_update(req)) + return 1; + + for (i = 0; i < shi->nr_frags; ++i) { + const skb_frag_t *f = &shi->frags[i]; + unsigned int offset = skb_frag_off(f); + struct page *page; + + page = skb_frag_page(f) + (offset >> PAGE_SHIFT); + sg_set_page(&sg, page, skb_frag_size(f), offset_in_page(offset)); + ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f)); + if (crypto_ahash_update(req)) + return 1; + } + + skb_walk_frags(skb, frag_iter) + if (tcp_sigpool_hash_skb_data(hp, frag_iter, 0)) + return 1; + + return 0; +} +EXPORT_SYMBOL(tcp_sigpool_hash_skb_data); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Per-CPU pool of crypto requests"); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index dc27988512a6..ee53dad20a59 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -671,7 +671,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, cmd.tcpm_key, cmd.tcpm_keylen); } -static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, +static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp, const struct in6_addr *daddr, const struct in6_addr *saddr, const struct tcphdr *th, int nbytes) @@ -692,39 +692,36 @@ static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, _th->check = 0; sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp) + sizeof(*th)); - return crypto_ahash_update(hp->md5_req); + return crypto_ahash_update(hp->req); } static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, const struct tcphdr *th) { - struct tcp_md5sig_pool *hp; - struct ahash_request *req; + struct tcp_sigpool hp; - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) + if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } @@ -734,10 +731,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, const struct sock *sk, const struct sk_buff *skb) { - const struct in6_addr *saddr, *daddr; - struct tcp_md5sig_pool *hp; - struct ahash_request *req; const struct tcphdr *th = tcp_hdr(skb); + const struct in6_addr *saddr, *daddr; + struct tcp_sigpool hp; if (sk) { /* valid for establish/request sockets */ saddr = &sk->sk_v6_rcv_saddr; @@ -748,30 +744,28 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, daddr = &ip6h->daddr; } - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) + if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) goto clear_hash; - if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) + if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } -- cgit v1.2.3 From c845f5f3590ef4669fe5464f8a42be6442cd174b Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:21:54 +0100 Subject: net/tcp: Add TCP-AO config and structures Introduce new kernel config option and common structures as well as helpers to be used by TCP-AO code. Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/linux/tcp.h | 9 +++-- include/net/tcp.h | 8 ++--- include/net/tcp_ao.h | 90 ++++++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/tcp.h | 2 ++ net/ipv4/Kconfig | 13 +++++++ 5 files changed, 114 insertions(+), 8 deletions(-) create mode 100644 include/net/tcp_ao.h (limited to 'net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6df715b6e51d..64e7b560fa79 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -447,13 +447,18 @@ struct tcp_sock { bool syn_smc; /* SYN includes SMC */ #endif -#ifdef CONFIG_TCP_MD5SIG -/* TCP AF-Specific parts; only used by MD5 Signature support so far */ +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) +/* TCP AF-Specific parts; only used by TCP-AO/MD5 Signature support so far */ const struct tcp_sock_af_ops *af_specific; +#ifdef CONFIG_TCP_MD5SIG /* TCP MD5 Signature Option information */ struct tcp_md5sig_info __rcu *md5sig_info; #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_info __rcu *ao_info; +#endif +#endif /* TCP fastopen related information */ struct tcp_fastopen_request *fastopen_req; diff --git a/include/net/tcp.h b/include/net/tcp.h index f6e2db5292b5..3153712faa3d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -1688,12 +1689,7 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) tp->retransmit_skb_hint = NULL; } -union tcp_md5_addr { - struct in_addr a4; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr a6; -#endif -}; +#define tcp_md5_addr tcp_ao_addr /* - key database */ struct tcp_md5sig_key { diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h new file mode 100644 index 000000000000..af76e1c47bea --- /dev/null +++ b/include/net/tcp_ao.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _TCP_AO_H +#define _TCP_AO_H + +#define TCP_AO_KEY_ALIGN 1 +#define __tcp_ao_key_align __aligned(TCP_AO_KEY_ALIGN) + +union tcp_ao_addr { + struct in_addr a4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr a6; +#endif +}; + +struct tcp_ao_hdr { + u8 kind; + u8 length; + u8 keyid; + u8 rnext_keyid; +}; + +struct tcp_ao_key { + struct hlist_node node; + union tcp_ao_addr addr; + u8 key[TCP_AO_MAXKEYLEN] __tcp_ao_key_align; + unsigned int tcp_sigpool_id; + unsigned int digest_size; + u8 prefixlen; + u8 family; + u8 keylen; + u8 keyflags; + u8 sndid; + u8 rcvid; + u8 maclen; + struct rcu_head rcu; + u8 traffic_keys[]; +}; + +static inline u8 *rcv_other_key(struct tcp_ao_key *key) +{ + return key->traffic_keys; +} + +static inline u8 *snd_other_key(struct tcp_ao_key *key) +{ + return key->traffic_keys + key->digest_size; +} + +static inline int tcp_ao_maclen(const struct tcp_ao_key *key) +{ + return key->maclen; +} + +static inline int tcp_ao_len(const struct tcp_ao_key *key) +{ + return tcp_ao_maclen(key) + sizeof(struct tcp_ao_hdr); +} + +static inline unsigned int tcp_ao_digest_size(struct tcp_ao_key *key) +{ + return key->digest_size; +} + +static inline int tcp_ao_sizeof_key(const struct tcp_ao_key *key) +{ + return sizeof(struct tcp_ao_key) + (key->digest_size << 1); +} + +struct tcp_ao_info { + /* List of tcp_ao_key's */ + struct hlist_head head; + /* current_key and rnext_key aren't maintained on listen sockets. + * Their purpose is to cache keys on established connections, + * saving needless lookups. Never dereference any of them from + * listen sockets. + * ::current_key may change in RX to the key that was requested by + * the peer, please use READ_ONCE()/WRITE_ONCE() in order to avoid + * load/store tearing. + * Do the same for ::rnext_key, if you don't hold socket lock + * (it's changed only by userspace request in setsockopt()). + */ + struct tcp_ao_key *current_key; + struct tcp_ao_key *rnext_key; + u32 flags; + __be32 lisn; + __be32 risn; + struct rcu_head rcu; +}; + +#endif /* _TCP_AO_H */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 8aa3916e14f6..74a1267baac5 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -361,6 +361,8 @@ struct tcp_diag_md5sig { __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; }; +#define TCP_AO_MAXKEYLEN 80 + /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ #define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1 diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 89e2ab023272..8e94ed7c56a0 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -744,6 +744,19 @@ config DEFAULT_TCP_CONG config TCP_SIGPOOL tristate +config TCP_AO + bool "TCP: Authentication Option (RFC5925)" + select CRYPTO + select TCP_SIGPOOL + depends on 64BIT && IPV6 != m # seq-number extension needs WRITE_ONCE(u64) + help + TCP-AO specifies the use of stronger Message Authentication Codes (MACs), + protects against replays for long-lived TCP connections, and + provides more details on the association of security with TCP + connections than TCP MD5 (See RFC5925) + + If unsure, say N. + config TCP_MD5SIG bool "TCP: MD5 Signature Option support (RFC2385)" select CRYPTO -- cgit v1.2.3 From 4954f17ddefc51d218625dcdfaf422a253dad3fa Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:21:55 +0100 Subject: net/tcp: Introduce TCP_AO setsockopt()s Add 3 setsockopt()s: 1. TCP_AO_ADD_KEY to add a new Master Key Tuple (MKT) on a socket 2. TCP_AO_DEL_KEY to delete present MKT from a socket 3. TCP_AO_INFO to change flags, Current_key/RNext_key on a TCP-AO sk Userspace has to introduce keys on every socket it wants to use TCP-AO option on, similarly to TCP_MD5SIG/TCP_MD5SIG_EXT. RFC5925 prohibits definition of MKTs that would match the same peer, so do sanity checks on the data provided by userspace. Be as conservative as possible, including refusal of defining MKT on an established connection with no AO, removing the key in-use and etc. (1) and (2) are to be used by userspace key manager to add/remove keys. (3) main purpose is to set RNext_key, which (as prescribed by RFC5925) is the KeyID that will be requested in TCP-AO header from the peer to sign their segments with. At this moment the life of ao_info ends in tcp_v4_destroy_sock(). Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/linux/sockptr.h | 23 ++ include/net/tcp.h | 3 + include/net/tcp_ao.h | 17 +- include/uapi/linux/tcp.h | 46 +++ net/ipv4/Makefile | 1 + net/ipv4/tcp.c | 17 + net/ipv4/tcp_ao.c | 794 +++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 10 +- net/ipv6/Makefile | 1 + net/ipv6/tcp_ao.c | 19 ++ net/ipv6/tcp_ipv6.c | 39 ++- 11 files changed, 952 insertions(+), 18 deletions(-) create mode 100644 net/ipv4/tcp_ao.c create mode 100644 net/ipv6/tcp_ao.c (limited to 'net') diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index bae5e2369b4f..307961b41541 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -55,6 +55,29 @@ static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) return copy_from_sockptr_offset(dst, src, 0, size); } +static inline int copy_struct_from_sockptr(void *dst, size_t ksize, + sockptr_t src, size_t usize) +{ + size_t size = min(ksize, usize); + size_t rest = max(ksize, usize) - size; + + if (!sockptr_is_kernel(src)) + return copy_struct_from_user(dst, ksize, src.user, size); + + if (usize < ksize) { + memset(dst + size, 0, rest); + } else if (usize > ksize) { + char *p = src.kernel; + + while (rest--) { + if (*p++) + return -E2BIG; + } + } + memcpy(dst, src.kernel, size); + return 0; +} + static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset, const void *src, size_t size) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 3153712faa3d..ff204471d451 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2175,6 +2175,9 @@ struct tcp_sock_af_ops { sockptr_t optval, int optlen); #endif +#ifdef CONFIG_TCP_AO + int (*ao_parse)(struct sock *sk, int optname, sockptr_t optval, int optlen); +#endif }; struct tcp_request_sock_ops { diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index af76e1c47bea..a81e40fd255a 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -81,10 +81,25 @@ struct tcp_ao_info { */ struct tcp_ao_key *current_key; struct tcp_ao_key *rnext_key; - u32 flags; + u32 ao_required :1, + __unused :31; __be32 lisn; __be32 risn; struct rcu_head rcu; }; +#ifdef CONFIG_TCP_AO +int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, + sockptr_t optval, int optlen); +void tcp_ao_destroy_sock(struct sock *sk); +/* ipv4 specific functions */ +int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +/* ipv6 specific functions */ +int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +#else +static inline void tcp_ao_destroy_sock(struct sock *sk) +{ +} +#endif + #endif /* _TCP_AO_H */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 74a1267baac5..fa49f03e62fe 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -129,6 +129,9 @@ enum { #define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */ +#define TCP_AO_ADD_KEY 38 /* Add/Set MKT */ +#define TCP_AO_DEL_KEY 39 /* Delete MKT */ +#define TCP_AO_INFO 40 /* Modify TCP-AO per-socket options */ #define TCP_REPAIR_ON 1 #define TCP_REPAIR_OFF 0 @@ -363,6 +366,49 @@ struct tcp_diag_md5sig { #define TCP_AO_MAXKEYLEN 80 +#define TCP_AO_KEYF_IFINDEX (1 << 0) /* L3 ifindex for VRF */ + +struct tcp_ao_add { /* setsockopt(TCP_AO_ADD_KEY) */ + struct __kernel_sockaddr_storage addr; /* peer's address for the key */ + char alg_name[64]; /* crypto hash algorithm to use */ + __s32 ifindex; /* L3 dev index for VRF */ + __u32 set_current :1, /* set key as Current_key at once */ + set_rnext :1, /* request it from peer with RNext_key */ + reserved :30; /* must be 0 */ + __u16 reserved2; /* padding, must be 0 */ + __u8 prefix; /* peer's address prefix */ + __u8 sndid; /* SendID for outgoing segments */ + __u8 rcvid; /* RecvID to match for incoming seg */ + __u8 maclen; /* length of authentication code (hash) */ + __u8 keyflags; /* see TCP_AO_KEYF_ */ + __u8 keylen; /* length of ::key */ + __u8 key[TCP_AO_MAXKEYLEN]; +} __attribute__((aligned(8))); + +struct tcp_ao_del { /* setsockopt(TCP_AO_DEL_KEY) */ + struct __kernel_sockaddr_storage addr; /* peer's address for the key */ + __s32 ifindex; /* L3 dev index for VRF */ + __u32 set_current :1, /* corresponding ::current_key */ + set_rnext :1, /* corresponding ::rnext */ + reserved :30; /* must be 0 */ + __u16 reserved2; /* padding, must be 0 */ + __u8 prefix; /* peer's address prefix */ + __u8 sndid; /* SendID for outgoing segments */ + __u8 rcvid; /* RecvID to match for incoming seg */ + __u8 current_key; /* KeyID to set as Current_key */ + __u8 rnext; /* KeyID to set as Rnext_key */ + __u8 keyflags; /* see TCP_AO_KEYF_ */ +} __attribute__((aligned(8))); + +struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */ + __u32 set_current :1, /* corresponding ::current_key */ + set_rnext :1, /* corresponding ::rnext */ + ao_required :1, /* don't accept non-AO connects */ + reserved :29; /* must be 0 */ + __u8 current_key; /* KeyID to set as Current_key */ + __u8 rnext; /* KeyID to set as Rnext_key */ +} __attribute__((aligned(8))); + /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ #define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1 diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index cd760793cfcb..e144a02a6a61 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o xfrm4_protocol.o +obj-$(CONFIG_TCP_AO) += tcp_ao.o ifeq ($(CONFIG_BPF_JIT),y) obj-$(CONFIG_BPF_SYSCALL) += bpf_tcp_ca.o diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dca9ca2f1081..b6faee8a1e67 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3593,6 +3593,23 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, __tcp_sock_set_quickack(sk, val); break; +#ifdef CONFIG_TCP_AO + case TCP_AO_ADD_KEY: + case TCP_AO_DEL_KEY: + case TCP_AO_INFO: { + /* If this is the first TCP-AO setsockopt() on the socket, + * sk_state has to be LISTEN or CLOSE + */ + if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) || + rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk))) + err = tp->af_specific->ao_parse(sk, optname, optval, + optlen); + else + err = -EISCONN; + break; + } +#endif #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: case TCP_MD5SIG_EXT: diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c new file mode 100644 index 000000000000..3c2d005a37ce --- /dev/null +++ b/net/ipv4/tcp_ao.c @@ -0,0 +1,794 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * INET An implementation of the TCP Authentication Option (TCP-AO). + * See RFC5925. + * + * Authors: Dmitry Safonov + * Francesco Ruggeri + * Salam Noureddine + */ +#define pr_fmt(fmt) "TCP: " fmt + +#include +#include +#include + +#include +#include + +/* Optimized version of tcp_ao_do_lookup(): only for sockets for which + * it's known that the keys in ao_info are matching peer's + * family/address/VRF/etc. + */ +static struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, + int sndid, int rcvid) +{ + struct tcp_ao_key *key; + + hlist_for_each_entry_rcu(key, &ao->head, node) { + if ((sndid >= 0 && key->sndid != sndid) || + (rcvid >= 0 && key->rcvid != rcvid)) + continue; + return key; + } + + return NULL; +} + +static int ipv4_prefix_cmp(const struct in_addr *addr1, + const struct in_addr *addr2, + unsigned int prefixlen) +{ + __be32 mask = inet_make_mask(prefixlen); + __be32 a1 = addr1->s_addr & mask; + __be32 a2 = addr2->s_addr & mask; + + if (a1 == a2) + return 0; + return memcmp(&a1, &a2, sizeof(a1)); +} + +static int __tcp_ao_key_cmp(const struct tcp_ao_key *key, + const union tcp_ao_addr *addr, u8 prefixlen, + int family, int sndid, int rcvid) +{ + if (sndid >= 0 && key->sndid != sndid) + return (key->sndid > sndid) ? 1 : -1; + if (rcvid >= 0 && key->rcvid != rcvid) + return (key->rcvid > rcvid) ? 1 : -1; + + if (family == AF_UNSPEC) + return 0; + if (key->family != family) + return (key->family > family) ? 1 : -1; + + if (family == AF_INET) { + if (ntohl(key->addr.a4.s_addr) == INADDR_ANY) + return 0; + if (ntohl(addr->a4.s_addr) == INADDR_ANY) + return 0; + return ipv4_prefix_cmp(&key->addr.a4, &addr->a4, prefixlen); +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (ipv6_addr_any(&key->addr.a6) || ipv6_addr_any(&addr->a6)) + return 0; + if (ipv6_prefix_equal(&key->addr.a6, &addr->a6, prefixlen)) + return 0; + return memcmp(&key->addr.a6, &addr->a6, sizeof(addr->a6)); +#endif + } + return -1; +} + +static int tcp_ao_key_cmp(const struct tcp_ao_key *key, + const union tcp_ao_addr *addr, u8 prefixlen, + int family, int sndid, int rcvid) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6 && ipv6_addr_v4mapped(&addr->a6)) { + __be32 addr4 = addr->a6.s6_addr32[3]; + + return __tcp_ao_key_cmp(key, (union tcp_ao_addr *)&addr4, + prefixlen, AF_INET, sndid, rcvid); + } +#endif + return __tcp_ao_key_cmp(key, addr, prefixlen, family, sndid, rcvid); +} + +static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, + const union tcp_ao_addr *addr, int family, u8 prefix, + int sndid, int rcvid) +{ + struct tcp_ao_key *key; + struct tcp_ao_info *ao; + + ao = rcu_dereference_check(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao) + return NULL; + + hlist_for_each_entry_rcu(key, &ao->head, node) { + u8 prefixlen = min(prefix, key->prefixlen); + + if (!tcp_ao_key_cmp(key, addr, prefixlen, family, sndid, rcvid)) + return key; + } + return NULL; +} + +static struct tcp_ao_info *tcp_ao_alloc_info(gfp_t flags) +{ + struct tcp_ao_info *ao; + + ao = kzalloc(sizeof(*ao), flags); + if (!ao) + return NULL; + INIT_HLIST_HEAD(&ao->head); + + return ao; +} + +static void tcp_ao_link_mkt(struct tcp_ao_info *ao, struct tcp_ao_key *mkt) +{ + hlist_add_head_rcu(&mkt->node, &ao->head); +} + +static void tcp_ao_key_free_rcu(struct rcu_head *head) +{ + struct tcp_ao_key *key = container_of(head, struct tcp_ao_key, rcu); + + tcp_sigpool_release(key->tcp_sigpool_id); + kfree_sensitive(key); +} + +void tcp_ao_destroy_sock(struct sock *sk) +{ + struct tcp_ao_info *ao; + struct tcp_ao_key *key; + struct hlist_node *n; + + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1); + tcp_sk(sk)->ao_info = NULL; + + if (!ao) + return; + + hlist_for_each_entry_safe(key, n, &ao->head, node) { + hlist_del_rcu(&key->node); + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + call_rcu(&key->rcu, tcp_ao_key_free_rcu); + } + + kfree_rcu(ao, rcu); +} + +static bool tcp_ao_can_set_current_rnext(struct sock *sk) +{ + /* There aren't current/rnext keys on TCP_LISTEN sockets */ + if (sk->sk_state == TCP_LISTEN) + return false; + return true; +} + +static int tcp_ao_verify_ipv4(struct sock *sk, struct tcp_ao_add *cmd, + union tcp_ao_addr **addr) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)&cmd->addr; + struct inet_sock *inet = inet_sk(sk); + + if (sin->sin_family != AF_INET) + return -EINVAL; + + /* Currently matching is not performed on port (or port ranges) */ + if (sin->sin_port != 0) + return -EINVAL; + + /* Check prefix and trailing 0's in addr */ + if (cmd->prefix != 0) { + __be32 mask; + + if (ntohl(sin->sin_addr.s_addr) == INADDR_ANY) + return -EINVAL; + if (cmd->prefix > 32) + return -EINVAL; + + mask = inet_make_mask(cmd->prefix); + if (sin->sin_addr.s_addr & ~mask) + return -EINVAL; + + /* Check that MKT address is consistent with socket */ + if (ntohl(inet->inet_daddr) != INADDR_ANY && + (inet->inet_daddr & mask) != sin->sin_addr.s_addr) + return -EINVAL; + } else { + if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY) + return -EINVAL; + } + + *addr = (union tcp_ao_addr *)&sin->sin_addr; + return 0; +} + +static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key) +{ + unsigned int syn_tcp_option_space; + bool is_kdf_aes_128_cmac = false; + struct crypto_ahash *tfm; + struct tcp_sigpool hp; + void *tmp_key = NULL; + int err; + + /* RFC5926, 3.1.1.2. KDF_AES_128_CMAC */ + if (!strcmp("cmac(aes128)", cmd->alg_name)) { + strscpy(cmd->alg_name, "cmac(aes)", sizeof(cmd->alg_name)); + is_kdf_aes_128_cmac = (cmd->keylen != 16); + tmp_key = kmalloc(cmd->keylen, GFP_KERNEL); + if (!tmp_key) + return -ENOMEM; + } + + key->maclen = cmd->maclen ?: 12; /* 12 is the default in RFC5925 */ + + /* Check: maclen + tcp-ao header <= (MAX_TCP_OPTION_SPACE - mss + * - tstamp - wscale - sackperm), + * see tcp_syn_options(), tcp_synack_options(), commit 33ad798c924b. + * + * In order to allow D-SACK with TCP-AO, the header size should be: + * (MAX_TCP_OPTION_SPACE - TCPOLEN_TSTAMP_ALIGNED + * - TCPOLEN_SACK_BASE_ALIGNED + * - 2 * TCPOLEN_SACK_PERBLOCK) = 8 (maclen = 4), + * see tcp_established_options(). + * + * RFC5925, 2.2: + * Typical MACs are 96-128 bits (12-16 bytes), but any length + * that fits in the header of the segment being authenticated + * is allowed. + * + * RFC5925, 7.6: + * TCP-AO continues to consume 16 bytes in non-SYN segments, + * leaving a total of 24 bytes for other options, of which + * the timestamp consumes 10. This leaves 14 bytes, of which 10 + * are used for a single SACK block. When two SACK blocks are used, + * such as to handle D-SACK, a smaller TCP-AO MAC would be required + * to make room for the additional SACK block (i.e., to leave 18 + * bytes for the D-SACK variant of the SACK option) [RFC2883]. + * Note that D-SACK is not supportable in TCP MD5 in the presence + * of timestamps, because TCP MD5’s MAC length is fixed and too + * large to leave sufficient option space. + */ + syn_tcp_option_space = MAX_TCP_OPTION_SPACE; + syn_tcp_option_space -= TCPOLEN_TSTAMP_ALIGNED; + syn_tcp_option_space -= TCPOLEN_WSCALE_ALIGNED; + syn_tcp_option_space -= TCPOLEN_SACKPERM_ALIGNED; + if (tcp_ao_len(key) > syn_tcp_option_space) { + err = -EMSGSIZE; + goto err_kfree; + } + + key->keylen = cmd->keylen; + memcpy(key->key, cmd->key, cmd->keylen); + + err = tcp_sigpool_start(key->tcp_sigpool_id, &hp); + if (err) + goto err_kfree; + + tfm = crypto_ahash_reqtfm(hp.req); + if (is_kdf_aes_128_cmac) { + void *scratch = hp.scratch; + struct scatterlist sg; + + memcpy(tmp_key, cmd->key, cmd->keylen); + sg_init_one(&sg, tmp_key, cmd->keylen); + + /* Using zero-key of 16 bytes as described in RFC5926 */ + memset(scratch, 0, 16); + err = crypto_ahash_setkey(tfm, scratch, 16); + if (err) + goto err_pool_end; + + err = crypto_ahash_init(hp.req); + if (err) + goto err_pool_end; + + ahash_request_set_crypt(hp.req, &sg, key->key, cmd->keylen); + err = crypto_ahash_update(hp.req); + if (err) + goto err_pool_end; + + err |= crypto_ahash_final(hp.req); + if (err) + goto err_pool_end; + key->keylen = 16; + } + + err = crypto_ahash_setkey(tfm, key->key, key->keylen); + if (err) + goto err_pool_end; + + tcp_sigpool_end(&hp); + kfree_sensitive(tmp_key); + + if (tcp_ao_maclen(key) > key->digest_size) + return -EINVAL; + + return 0; + +err_pool_end: + tcp_sigpool_end(&hp); +err_kfree: + kfree_sensitive(tmp_key); + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int tcp_ao_verify_ipv6(struct sock *sk, struct tcp_ao_add *cmd, + union tcp_ao_addr **paddr, + unsigned short int *family) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd->addr; + struct in6_addr *addr = &sin6->sin6_addr; + u8 prefix = cmd->prefix; + + if (sin6->sin6_family != AF_INET6) + return -EINVAL; + + /* Currently matching is not performed on port (or port ranges) */ + if (sin6->sin6_port != 0) + return -EINVAL; + + /* Check prefix and trailing 0's in addr */ + if (cmd->prefix != 0 && ipv6_addr_v4mapped(addr)) { + __be32 addr4 = addr->s6_addr32[3]; + __be32 mask; + + if (prefix > 32 || ntohl(addr4) == INADDR_ANY) + return -EINVAL; + + mask = inet_make_mask(prefix); + if (addr4 & ~mask) + return -EINVAL; + + /* Check that MKT address is consistent with socket */ + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + __be32 daddr4 = sk->sk_v6_daddr.s6_addr32[3]; + + if (!ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + return -EINVAL; + if ((daddr4 & mask) != addr4) + return -EINVAL; + } + + *paddr = (union tcp_ao_addr *)&addr->s6_addr32[3]; + *family = AF_INET; + return 0; + } else if (cmd->prefix != 0) { + struct in6_addr pfx; + + if (ipv6_addr_any(addr) || prefix > 128) + return -EINVAL; + + ipv6_addr_prefix(&pfx, addr, prefix); + if (ipv6_addr_cmp(&pfx, addr)) + return -EINVAL; + + /* Check that MKT address is consistent with socket */ + if (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_prefix_equal(&sk->sk_v6_daddr, addr, prefix)) + + return -EINVAL; + } else { + if (!ipv6_addr_any(addr)) + return -EINVAL; + } + + *paddr = (union tcp_ao_addr *)addr; + return 0; +} +#else +static int tcp_ao_verify_ipv6(struct sock *sk, struct tcp_ao_add *cmd, + union tcp_ao_addr **paddr, + unsigned short int *family) +{ + return -EOPNOTSUPP; +} +#endif + +static struct tcp_ao_info *setsockopt_ao_info(struct sock *sk) +{ + if (sk_fullsock(sk)) { + return rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + } + return ERR_PTR(-ESOCKTNOSUPPORT); +} + +#define TCP_AO_KEYF_ALL (0) + +static struct tcp_ao_key *tcp_ao_key_alloc(struct sock *sk, + struct tcp_ao_add *cmd) +{ + const char *algo = cmd->alg_name; + unsigned int digest_size; + struct crypto_ahash *tfm; + struct tcp_ao_key *key; + struct tcp_sigpool hp; + int err, pool_id; + size_t size; + + /* Force null-termination of alg_name */ + cmd->alg_name[ARRAY_SIZE(cmd->alg_name) - 1] = '\0'; + + /* RFC5926, 3.1.1.2. KDF_AES_128_CMAC */ + if (!strcmp("cmac(aes128)", algo)) + algo = "cmac(aes)"; + + /* Full TCP header (th->doff << 2) should fit into scratch area, + * see tcp_ao_hash_header(). + */ + pool_id = tcp_sigpool_alloc_ahash(algo, 60); + if (pool_id < 0) + return ERR_PTR(pool_id); + + err = tcp_sigpool_start(pool_id, &hp); + if (err) + goto err_free_pool; + + tfm = crypto_ahash_reqtfm(hp.req); + if (crypto_ahash_alignmask(tfm) > TCP_AO_KEY_ALIGN) { + err = -EOPNOTSUPP; + goto err_pool_end; + } + digest_size = crypto_ahash_digestsize(tfm); + tcp_sigpool_end(&hp); + + size = sizeof(struct tcp_ao_key) + (digest_size << 1); + key = sock_kmalloc(sk, size, GFP_KERNEL); + if (!key) { + err = -ENOMEM; + goto err_free_pool; + } + + key->tcp_sigpool_id = pool_id; + key->digest_size = digest_size; + return key; + +err_pool_end: + tcp_sigpool_end(&hp); +err_free_pool: + tcp_sigpool_release(pool_id); + return ERR_PTR(err); +} + +static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, + sockptr_t optval, int optlen) +{ + struct tcp_ao_info *ao_info; + union tcp_ao_addr *addr; + struct tcp_ao_key *key; + struct tcp_ao_add cmd; + bool first = false; + int ret; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + ret = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen); + if (ret) + return ret; + + if (cmd.keylen > TCP_AO_MAXKEYLEN) + return -EINVAL; + + if (cmd.reserved != 0 || cmd.reserved2 != 0) + return -EINVAL; + + if (family == AF_INET) + ret = tcp_ao_verify_ipv4(sk, &cmd, &addr); + else + ret = tcp_ao_verify_ipv6(sk, &cmd, &addr, &family); + if (ret) + return ret; + + if (cmd.keyflags & ~TCP_AO_KEYF_ALL) + return -EINVAL; + + if (cmd.set_current || cmd.set_rnext) { + if (!tcp_ao_can_set_current_rnext(sk)) + return -EINVAL; + } + + ao_info = setsockopt_ao_info(sk); + if (IS_ERR(ao_info)) + return PTR_ERR(ao_info); + + if (!ao_info) { + ao_info = tcp_ao_alloc_info(GFP_KERNEL); + if (!ao_info) + return -ENOMEM; + first = true; + } else { + /* Check that neither RecvID nor SendID match any + * existing key for the peer, RFC5925 3.1: + * > The IDs of MKTs MUST NOT overlap where their + * > TCP connection identifiers overlap. + */ + if (__tcp_ao_do_lookup(sk, addr, family, + cmd.prefix, -1, cmd.rcvid)) + return -EEXIST; + if (__tcp_ao_do_lookup(sk, addr, family, + cmd.prefix, cmd.sndid, -1)) + return -EEXIST; + } + + key = tcp_ao_key_alloc(sk, &cmd); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto err_free_ao; + } + + INIT_HLIST_NODE(&key->node); + memcpy(&key->addr, addr, (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr)); + key->prefixlen = cmd.prefix; + key->family = family; + key->keyflags = cmd.keyflags; + key->sndid = cmd.sndid; + key->rcvid = cmd.rcvid; + + ret = tcp_ao_parse_crypto(&cmd, key); + if (ret < 0) + goto err_free_sock; + + tcp_ao_link_mkt(ao_info, key); + if (first) { + sk_gso_disable(sk); + rcu_assign_pointer(tcp_sk(sk)->ao_info, ao_info); + } + + if (cmd.set_current) + WRITE_ONCE(ao_info->current_key, key); + if (cmd.set_rnext) + WRITE_ONCE(ao_info->rnext_key, key); + return 0; + +err_free_sock: + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + tcp_sigpool_release(key->tcp_sigpool_id); + kfree_sensitive(key); +err_free_ao: + if (first) + kfree(ao_info); + return ret; +} + +static int tcp_ao_delete_key(struct sock *sk, struct tcp_ao_info *ao_info, + struct tcp_ao_key *key, + struct tcp_ao_key *new_current, + struct tcp_ao_key *new_rnext) +{ + int err; + + hlist_del_rcu(&key->node); + + /* At this moment another CPU could have looked this key up + * while it was unlinked from the list. Wait for RCU grace period, + * after which the key is off-list and can't be looked up again; + * the rx path [just before RCU came] might have used it and set it + * as current_key (very unlikely). + */ + synchronize_rcu(); + if (new_current) + WRITE_ONCE(ao_info->current_key, new_current); + if (new_rnext) + WRITE_ONCE(ao_info->rnext_key, new_rnext); + + if (unlikely(READ_ONCE(ao_info->current_key) == key || + READ_ONCE(ao_info->rnext_key) == key)) { + err = -EBUSY; + goto add_key; + } + + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + call_rcu(&key->rcu, tcp_ao_key_free_rcu); + + return 0; +add_key: + hlist_add_head_rcu(&key->node, &ao_info->head); + return err; +} + +static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, + sockptr_t optval, int optlen) +{ + struct tcp_ao_key *key, *new_current = NULL, *new_rnext = NULL; + struct tcp_ao_info *ao_info; + union tcp_ao_addr *addr; + struct tcp_ao_del cmd; + int addr_len; + __u8 prefix; + u16 port; + int err; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen); + if (err) + return err; + + if (cmd.reserved != 0 || cmd.reserved2 != 0) + return -EINVAL; + + if (cmd.set_current || cmd.set_rnext) { + if (!tcp_ao_can_set_current_rnext(sk)) + return -EINVAL; + } + + ao_info = setsockopt_ao_info(sk); + if (IS_ERR(ao_info)) + return PTR_ERR(ao_info); + if (!ao_info) + return -ENOENT; + + /* For sockets in TCP_CLOSED it's possible set keys that aren't + * matching the future peer (address/VRF/etc), + * tcp_ao_connect_init() will choose a correct matching MKT + * if there's any. + */ + if (cmd.set_current) { + new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1); + if (!new_current) + return -ENOENT; + } + if (cmd.set_rnext) { + new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext); + if (!new_rnext) + return -ENOENT; + } + + if (family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.addr; + + addr = (union tcp_ao_addr *)&sin->sin_addr; + addr_len = sizeof(struct in_addr); + port = ntohs(sin->sin_port); + } else { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.addr; + struct in6_addr *addr6 = &sin6->sin6_addr; + + if (ipv6_addr_v4mapped(addr6)) { + addr = (union tcp_ao_addr *)&addr6->s6_addr32[3]; + addr_len = sizeof(struct in_addr); + family = AF_INET; + } else { + addr = (union tcp_ao_addr *)addr6; + addr_len = sizeof(struct in6_addr); + } + port = ntohs(sin6->sin6_port); + } + prefix = cmd.prefix; + + /* Currently matching is not performed on port (or port ranges) */ + if (port != 0) + return -EINVAL; + + /* We could choose random present key here for current/rnext + * but that's less predictable. Let's be strict and don't + * allow removing a key that's in use. RFC5925 doesn't + * specify how-to coordinate key removal, but says: + * "It is presumed that an MKT affecting a particular + * connection cannot be destroyed during an active connection" + */ + hlist_for_each_entry_rcu(key, &ao_info->head, node) { + if (cmd.sndid != key->sndid || + cmd.rcvid != key->rcvid) + continue; + + if (family != key->family || + prefix != key->prefixlen || + memcmp(addr, &key->addr, addr_len)) + continue; + + if (key == new_current || key == new_rnext) + continue; + + return tcp_ao_delete_key(sk, ao_info, key, + new_current, new_rnext); + } + return -ENOENT; +} + +static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, + sockptr_t optval, int optlen) +{ + struct tcp_ao_key *new_current = NULL, *new_rnext = NULL; + struct tcp_ao_info *ao_info; + struct tcp_ao_info_opt cmd; + bool first = false; + int err; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen); + if (err) + return err; + + if (cmd.set_current || cmd.set_rnext) { + if (!tcp_ao_can_set_current_rnext(sk)) + return -EINVAL; + } + + if (cmd.reserved != 0) + return -EINVAL; + + ao_info = setsockopt_ao_info(sk); + if (IS_ERR(ao_info)) + return PTR_ERR(ao_info); + if (!ao_info) { + ao_info = tcp_ao_alloc_info(GFP_KERNEL); + if (!ao_info) + return -ENOMEM; + first = true; + } + + /* For sockets in TCP_CLOSED it's possible set keys that aren't + * matching the future peer (address/port/VRF/etc), + * tcp_ao_connect_init() will choose a correct matching MKT + * if there's any. + */ + if (cmd.set_current) { + new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1); + if (!new_current) { + err = -ENOENT; + goto out; + } + } + if (cmd.set_rnext) { + new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext); + if (!new_rnext) { + err = -ENOENT; + goto out; + } + } + + ao_info->ao_required = cmd.ao_required; + if (new_current) + WRITE_ONCE(ao_info->current_key, new_current); + if (new_rnext) + WRITE_ONCE(ao_info->rnext_key, new_rnext); + if (first) { + sk_gso_disable(sk); + rcu_assign_pointer(tcp_sk(sk)->ao_info, ao_info); + } + return 0; +out: + if (first) + kfree(ao_info); + return err; +} + +int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, + sockptr_t optval, int optlen) +{ + if (WARN_ON_ONCE(family != AF_INET && family != AF_INET6)) + return -EAFNOSUPPORT; + + switch (cmd) { + case TCP_AO_ADD_KEY: + return tcp_ao_add_cmd(sk, family, optval, optlen); + case TCP_AO_DEL_KEY: + return tcp_ao_del_cmd(sk, family, optval, optlen); + case TCP_AO_INFO: + return tcp_ao_info_cmd(sk, family, optval, optlen); + default: + WARN_ON_ONCE(1); + return -EINVAL; + } +} + +int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen) +{ + return tcp_parse_ao(sk, cmd, AF_INET, optval, optlen); +} + diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7d81e90b6f5c..a4746c27f38a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2271,11 +2271,16 @@ const struct inet_connection_sock_af_ops ipv4_specific = { }; EXPORT_SYMBOL(ipv4_specific); -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_parse = tcp_v4_parse_md5_keys, +#endif +#ifdef CONFIG_TCP_AO + .ao_parse = tcp_v4_parse_ao, +#endif }; #endif @@ -2290,7 +2295,7 @@ static int tcp_v4_init_sock(struct sock *sk) icsk->icsk_af_ops = &ipv4_specific; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; #endif @@ -2341,6 +2346,7 @@ void tcp_v4_destroy_sock(struct sock *sk) rcu_assign_pointer(tp->md5sig_info, NULL); } #endif + tcp_ao_destroy_sock(sk); /* Clean up a referenced TCP bind bucket. */ if (inet_csk(sk)->icsk_bind_hash) diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 3036a45e8a1e..d283c59df4c1 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -52,4 +52,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o ifneq ($(CONFIG_IPV6),) obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o obj-y += mcast_snoop.o +obj-$(CONFIG_TCP_AO) += tcp_ao.o endif diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c new file mode 100644 index 000000000000..049ddbabe049 --- /dev/null +++ b/net/ipv6/tcp_ao.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * INET An implementation of the TCP Authentication Option (TCP-AO). + * See RFC5925. + * + * Authors: Dmitry Safonov + * Francesco Ruggeri + * Salam Noureddine + */ +#include + +#include +#include + +int tcp_v6_parse_ao(struct sock *sk, int cmd, + sockptr_t optval, int optlen) +{ + return tcp_parse_ao(sk, cmd, AF_INET6, optval, optlen); +} diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ee53dad20a59..30bd17d03239 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -76,16 +76,9 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static const struct inet_connection_sock_af_ops ipv6_mapped; const struct inet_connection_sock_af_ops ipv6_specific; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; -#else -static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, - const struct in6_addr *addr, - int l3index) -{ - return NULL; -} #endif /* Helper returning the inet6 address from a given tcp socket. @@ -239,7 +232,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif @@ -252,7 +245,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_specific; #endif goto failure; @@ -769,7 +762,13 @@ clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } - +#else /* CONFIG_TCP_MD5SIG */ +static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, + const struct in6_addr *addr, + int l3index) +{ + return NULL; +} #endif static void tcp_v6_init_req(struct request_sock *req, @@ -1228,7 +1227,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (sk_is_mptcp(newsk)) mptcpv6_handle_mapped(newsk, true); newsk->sk_backlog_rcv = tcp_v4_do_rcv; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif @@ -1896,11 +1895,16 @@ const struct inet_connection_sock_af_ops ipv6_specific = { .mtu_reduced = tcp_v6_mtu_reduced, }; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, .md5_parse = tcp_v6_parse_md5_keys, +#endif +#ifdef CONFIG_TCP_AO + .ao_parse = tcp_v6_parse_ao, +#endif }; #endif @@ -1922,11 +1926,16 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .mtu_reduced = tcp_v4_mtu_reduced, }; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_parse = tcp_v6_parse_md5_keys, +#endif +#ifdef CONFIG_TCP_AO + .ao_parse = tcp_v6_parse_ao, +#endif }; #endif @@ -1941,7 +1950,7 @@ static int tcp_v6_init_sock(struct sock *sk) icsk->icsk_af_ops = &ipv6_specific; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; #endif -- cgit v1.2.3 From 0aadc73995d08f6b0dc061c14a564ffa46f5914e Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:21:56 +0100 Subject: net/tcp: Prevent TCP-MD5 with TCP-AO being set Be as conservative as possible: if there is TCP-MD5 key for a given peer regardless of L3 interface - don't allow setting TCP-AO key for the same peer. According to RFC5925, TCP-AO is supposed to replace TCP-MD5 and there can't be any switch between both on any connected tuple. Later it can be relaxed, if there's a use, but in the beginning restrict any intersection. Note: it's still should be possible to set both TCP-MD5 and TCP-AO keys on a listening socket for *different* peers. Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp.h | 43 +++++++++++++++++++++++++++++++++++++++++-- include/net/tcp_ao.h | 13 +++++++++++++ net/ipv4/tcp_ao.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 14 +++++++++++--- net/ipv4/tcp_output.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/tcp_ao.c | 17 +++++++++++++++++ net/ipv6/tcp_ipv6.c | 26 ++++++++++++++++++++++---- 7 files changed, 198 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index ff204471d451..0272117511ea 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1778,6 +1778,7 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags); +void tcp_clear_md5_list(struct sock *sk); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); @@ -1786,14 +1787,23 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, extern struct static_key_false_deferred tcp_md5_needed; struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, - int family); + int family, bool any_l3index); static inline struct tcp_md5sig_key * tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family) { if (!static_branch_unlikely(&tcp_md5_needed.key)) return NULL; - return __tcp_md5_do_lookup(sk, l3index, addr, family); + return __tcp_md5_do_lookup(sk, l3index, addr, family, false); +} + +static inline struct tcp_md5sig_key * +tcp_md5_do_lookup_any_l3index(const struct sock *sk, + const union tcp_md5_addr *addr, int family) +{ + if (!static_branch_unlikely(&tcp_md5_needed.key)) + return NULL; + return __tcp_md5_do_lookup(sk, 0, addr, family, true); } enum skb_drop_reason @@ -1811,6 +1821,13 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, return NULL; } +static inline struct tcp_md5sig_key * +tcp_md5_do_lookup_any_l3index(const struct sock *sk, + const union tcp_md5_addr *addr, int family) +{ + return NULL; +} + static inline enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, @@ -2177,6 +2194,9 @@ struct tcp_sock_af_ops { #endif #ifdef CONFIG_TCP_AO int (*ao_parse)(struct sock *sk, int optname, sockptr_t optval, int optlen); + struct tcp_ao_key *(*ao_lookup)(const struct sock *sk, + struct sock *addr_sk, + int sndid, int rcvid); #endif }; @@ -2588,4 +2608,23 @@ static inline u64 tcp_transmit_time(const struct sock *sk) return 0; } +static inline bool tcp_ao_required(struct sock *sk, const void *saddr, + int family) +{ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao_info; + struct tcp_ao_key *ao_key; + + ao_info = rcu_dereference_check(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao_info) + return false; + + ao_key = tcp_ao_do_lookup(sk, saddr, family, -1, -1); + if (ao_info->ao_required || ao_key) + return true; +#endif + return false; +} + #endif /* _TCP_H */ diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index a81e40fd255a..3c7f576376f9 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -92,11 +92,24 @@ struct tcp_ao_info { int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, sockptr_t optval, int optlen); void tcp_ao_destroy_sock(struct sock *sk); +struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, + const union tcp_ao_addr *addr, + int family, int sndid, int rcvid); /* ipv4 specific functions */ int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, + int sndid, int rcvid); /* ipv6 specific functions */ int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, + struct sock *addr_sk, int sndid, int rcvid); #else +static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, + const union tcp_ao_addr *addr, int family, int sndid, int rcvid) +{ + return NULL; +} + static inline void tcp_ao_destroy_sock(struct sock *sk) { } diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 3c2d005a37ce..ee23356101f4 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -116,6 +116,13 @@ static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, return NULL; } +struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, + const union tcp_ao_addr *addr, + int family, int sndid, int rcvid) +{ + return __tcp_ao_do_lookup(sk, addr, family, U8_MAX, sndid, rcvid); +} + static struct tcp_ao_info *tcp_ao_alloc_info(gfp_t flags) { struct tcp_ao_info *ao; @@ -162,6 +169,14 @@ void tcp_ao_destroy_sock(struct sock *sk) kfree_rcu(ao, rcu); } +struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, + int sndid, int rcvid) +{ + union tcp_ao_addr *addr = (union tcp_ao_addr *)&addr_sk->sk_daddr; + + return tcp_ao_do_lookup(sk, addr, AF_INET, sndid, rcvid); +} + static bool tcp_ao_can_set_current_rnext(struct sock *sk) { /* There aren't current/rnext keys on TCP_LISTEN sockets */ @@ -497,6 +512,10 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, return -EINVAL; } + /* Don't allow keys for peers that have a matching TCP-MD5 key */ + if (tcp_md5_do_lookup_any_l3index(sk, addr, family)) + return -EKEYREJECTED; + ao_info = setsockopt_ao_info(sk); if (IS_ERR(ao_info)) return PTR_ERR(ao_info); @@ -698,6 +717,31 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, return -ENOENT; } +/* cmd.ao_required makes a socket TCP-AO only. + * Don't allow any md5 keys for any l3intf on the socket together with it. + * Restricting it early in setsockopt() removes a check for + * ao_info->ao_required on inbound tcp segment fast-path. + */ +static int tcp_ao_required_verify(struct sock *sk) +{ +#ifdef CONFIG_TCP_MD5SIG + const struct tcp_md5sig_info *md5sig; + + if (!static_branch_unlikely(&tcp_md5_needed.key)) + return 0; + + md5sig = rcu_dereference_check(tcp_sk(sk)->md5sig_info, + lockdep_sock_is_held(sk)); + if (!md5sig) + return 0; + + if (rcu_dereference_check(hlist_first_rcu(&md5sig->head), + lockdep_sock_is_held(sk))) + return 1; +#endif + return 0; +} + static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, sockptr_t optval, int optlen) { @@ -732,6 +776,9 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, first = true; } + if (cmd.ao_required && tcp_ao_required_verify(sk)) + return -EKEYREJECTED; + /* For sockets in TCP_CLOSED it's possible set keys that aren't * matching the future peer (address/port/VRF/etc), * tcp_ao_connect_init() will choose a correct matching MKT diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a4746c27f38a..698e58a3ccec 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1082,7 +1082,7 @@ static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key * /* Find the Key structure for an address. */ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, - int family) + int family, bool any_l3index) { const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; @@ -1101,7 +1101,8 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, lockdep_sock_is_held(sk)) { if (key->family != family) continue; - if (key->flags & TCP_MD5SIG_FLAG_IFINDEX && key->l3index != l3index) + if (!any_l3index && key->flags & TCP_MD5SIG_FLAG_IFINDEX && + key->l3index != l3index) continue; if (family == AF_INET) { mask = inet_make_mask(key->prefixlen); @@ -1313,7 +1314,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, } EXPORT_SYMBOL(tcp_md5_do_del); -static void tcp_clear_md5_list(struct sock *sk) +void tcp_clear_md5_list(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; @@ -1383,6 +1384,12 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; + /* Don't allow keys for peers that have a matching TCP-AO key. + * See the comment in tcp_ao_add_cmd() + */ + if (tcp_ao_required(sk, addr, AF_INET)) + return -EKEYREJECTED; + return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } @@ -2279,6 +2286,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { .md5_parse = tcp_v4_parse_md5_keys, #endif #ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v4_ao_lookup, .ao_parse = tcp_v4_parse_ao, #endif }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ca4d7594efd4..1b90107f7038 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3931,6 +3931,53 @@ int tcp_connect(struct sock *sk) tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL); +#if defined(CONFIG_TCP_MD5SIG) && defined(CONFIG_TCP_AO) + /* Has to be checked late, after setting daddr/saddr/ops. + * Return error if the peer has both a md5 and a tcp-ao key + * configured as this is ambiguous. + */ + if (unlikely(rcu_dereference_protected(tp->md5sig_info, + lockdep_sock_is_held(sk)))) { + bool needs_ao = !!tp->af_specific->ao_lookup(sk, sk, -1, -1); + bool needs_md5 = !!tp->af_specific->md5_lookup(sk, sk); + struct tcp_ao_info *ao_info; + + ao_info = rcu_dereference_check(tp->ao_info, + lockdep_sock_is_held(sk)); + if (ao_info) { + /* This is an extra check: tcp_ao_required() in + * tcp_v{4,6}_parse_md5_keys() should prevent adding + * md5 keys on ao_required socket. + */ + needs_ao |= ao_info->ao_required; + WARN_ON_ONCE(ao_info->ao_required && needs_md5); + } + if (needs_md5 && needs_ao) + return -EKEYREJECTED; + + /* If we have a matching md5 key and no matching tcp-ao key + * then free up ao_info if allocated. + */ + if (needs_md5) { + tcp_ao_destroy_sock(sk); + } else if (needs_ao) { + tcp_clear_md5_list(sk); + kfree(rcu_replace_pointer(tp->md5sig_info, NULL, + lockdep_sock_is_held(sk))); + } + } +#endif +#ifdef CONFIG_TCP_AO + if (unlikely(rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)))) { + /* Don't allow connecting if ao is configured but no + * matching key is found. + */ + if (!tp->af_specific->ao_lookup(sk, sk, -1, -1)) + return -EKEYREJECTED; + } +#endif + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c index 049ddbabe049..0640acaee67b 100644 --- a/net/ipv6/tcp_ao.c +++ b/net/ipv6/tcp_ao.c @@ -12,6 +12,23 @@ #include #include +static struct tcp_ao_key *tcp_v6_ao_do_lookup(const struct sock *sk, + const struct in6_addr *addr, + int sndid, int rcvid) +{ + return tcp_ao_do_lookup(sk, (union tcp_ao_addr *)addr, AF_INET6, + sndid, rcvid); +} + +struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, + struct sock *addr_sk, + int sndid, int rcvid) +{ + struct in6_addr *addr = &addr_sk->sk_v6_daddr; + + return tcp_v6_ao_do_lookup(sk, addr, sndid, rcvid); +} + int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 30bd17d03239..70a3842f47fa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -600,6 +600,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; + union tcp_ao_addr *addr; int l3index = 0; u8 prefixlen; u8 flags; @@ -654,13 +655,28 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; - if (ipv6_addr_v4mapped(&sin6->sin6_addr)) - return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { + addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; + + /* Don't allow keys for peers that have a matching TCP-AO key. + * See the comment in tcp_ao_add_cmd() + */ + if (tcp_ao_required(sk, addr, AF_INET)) + return -EKEYREJECTED; + return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); + } + + addr = (union tcp_md5_addr *)&sin6->sin6_addr; + + /* Don't allow keys for peers that have a matching TCP-AO key. + * See the comment in tcp_ao_add_cmd() + */ + if (tcp_ao_required(sk, addr, AF_INET6)) + return -EKEYREJECTED; - return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, - AF_INET6, prefixlen, l3index, flags, + return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } @@ -1903,6 +1919,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { .md5_parse = tcp_v6_parse_md5_keys, #endif #ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v6_ao_lookup, .ao_parse = tcp_v6_parse_ao, #endif }; @@ -1934,6 +1951,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { .md5_parse = tcp_v6_parse_md5_keys, #endif #ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v6_ao_lookup, .ao_parse = tcp_v6_parse_ao, #endif }; -- cgit v1.2.3 From 7c2ffaf21bd67f73d21560995ce17eaf5fc1d37f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:21:57 +0100 Subject: net/tcp: Calculate TCP-AO traffic keys Add traffic key calculation the way it's described in RFC5926. Wire it up to tcp_finish_connect() and cache the new keys straight away on already established TCP connections. Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp.h | 3 + include/net/tcp_ao.h | 51 ++++++++++++- net/ipv4/tcp_ao.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_input.c | 2 + net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 2 + net/ipv6/tcp_ao.c | 50 ++++++++++++ net/ipv6/tcp_ipv6.c | 1 + 8 files changed, 314 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 0272117511ea..b72c46cf229b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2197,6 +2197,9 @@ struct tcp_sock_af_ops { struct tcp_ao_key *(*ao_lookup)(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid); + int (*ao_calc_key_sk)(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send); #endif }; diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 3c7f576376f9..b021a811511b 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -89,8 +89,32 @@ struct tcp_ao_info { }; #ifdef CONFIG_TCP_AO +/* TCP-AO structures and functions */ + +struct tcp4_ao_context { + __be32 saddr; + __be32 daddr; + __be16 sport; + __be16 dport; + __be32 sisn; + __be32 disn; +}; + +struct tcp6_ao_context { + struct in6_addr saddr; + struct in6_addr daddr; + __be16 sport; + __be16 dport; + __be32 sisn; + __be32 disn; +}; + +struct tcp_sigpool; + int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, sockptr_t optval, int optlen); +int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, + unsigned int len, struct tcp_sigpool *hp); void tcp_ao_destroy_sock(struct sock *sk); struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, const union tcp_ao_addr *addr, @@ -99,11 +123,22 @@ struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid); +int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send); /* ipv6 specific functions */ -int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, __be32 sisn, + __be32 disn, bool send); struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid); -#else +int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +void tcp_ao_established(struct sock *sk); +void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb); +void tcp_ao_connect_init(struct sock *sk); + +#else /* CONFIG_TCP_AO */ + static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, const union tcp_ao_addr *addr, int family, int sndid, int rcvid) { @@ -113,6 +148,18 @@ static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, static inline void tcp_ao_destroy_sock(struct sock *sk) { } + +static inline void tcp_ao_established(struct sock *sk) +{ +} + +static inline void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb) +{ +} + +static inline void tcp_ao_connect_init(struct sock *sk) +{ +} #endif #endif /* _TCP_AO_H */ diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index ee23356101f4..e478341fc336 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -16,6 +16,34 @@ #include #include +int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, + unsigned int len, struct tcp_sigpool *hp) +{ + struct scatterlist sg; + int ret; + + if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp->req), + mkt->key, mkt->keylen)) + goto clear_hash; + + ret = crypto_ahash_init(hp->req); + if (ret) + goto clear_hash; + + sg_init_one(&sg, ctx, len); + ahash_request_set_crypt(hp->req, &sg, key, len); + crypto_ahash_update(hp->req); + + ret = crypto_ahash_final(hp->req); + if (ret) + goto clear_hash; + + return 0; +clear_hash: + memset(key, 0, tcp_ao_digest_size(mkt)); + return 1; +} + /* Optimized version of tcp_ao_do_lookup(): only for sockets for which * it's known that the keys in ao_info are matching peer's * family/address/VRF/etc. @@ -169,6 +197,71 @@ void tcp_ao_destroy_sock(struct sock *sk) kfree_rcu(ao, rcu); } +/* 4 tuple and ISNs are expected in NBO */ +static int tcp_v4_ao_calc_key(struct tcp_ao_key *mkt, u8 *key, + __be32 saddr, __be32 daddr, + __be16 sport, __be16 dport, + __be32 sisn, __be32 disn) +{ + /* See RFC5926 3.1.1 */ + struct kdf_input_block { + u8 counter; + u8 label[6]; + struct tcp4_ao_context ctx; + __be16 outlen; + } __packed * tmp; + struct tcp_sigpool hp; + int err; + + err = tcp_sigpool_start(mkt->tcp_sigpool_id, &hp); + if (err) + return err; + + tmp = hp.scratch; + tmp->counter = 1; + memcpy(tmp->label, "TCP-AO", 6); + tmp->ctx.saddr = saddr; + tmp->ctx.daddr = daddr; + tmp->ctx.sport = sport; + tmp->ctx.dport = dport; + tmp->ctx.sisn = sisn; + tmp->ctx.disn = disn; + tmp->outlen = htons(tcp_ao_digest_size(mkt) * 8); /* in bits */ + + err = tcp_ao_calc_traffic_key(mkt, key, tmp, sizeof(*tmp), &hp); + tcp_sigpool_end(&hp); + + return err; +} + +int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send) +{ + if (send) + return tcp_v4_ao_calc_key(mkt, key, sk->sk_rcv_saddr, + sk->sk_daddr, htons(sk->sk_num), + sk->sk_dport, sisn, disn); + else + return tcp_v4_ao_calc_key(mkt, key, sk->sk_daddr, + sk->sk_rcv_saddr, sk->sk_dport, + htons(sk->sk_num), disn, sisn); +} + +static int tcp_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send) +{ + if (mkt->family == AF_INET) + return tcp_v4_ao_calc_key_sk(mkt, key, sk, sisn, disn, send); +#if IS_ENABLED(CONFIG_IPV6) + else if (mkt->family == AF_INET6) + return tcp_v6_ao_calc_key_sk(mkt, key, sk, sisn, disn, send); +#endif + else + return -EOPNOTSUPP; +} + struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid) { @@ -177,6 +270,113 @@ struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, return tcp_ao_do_lookup(sk, addr, AF_INET, sndid, rcvid); } +static int tcp_ao_cache_traffic_keys(const struct sock *sk, + struct tcp_ao_info *ao, + struct tcp_ao_key *ao_key) +{ + u8 *traffic_key = snd_other_key(ao_key); + int ret; + + ret = tcp_ao_calc_key_sk(ao_key, traffic_key, sk, + ao->lisn, ao->risn, true); + if (ret) + return ret; + + traffic_key = rcv_other_key(ao_key); + ret = tcp_ao_calc_key_sk(ao_key, traffic_key, sk, + ao->lisn, ao->risn, false); + return ret; +} + +void tcp_ao_connect_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_ao_info *ao_info; + union tcp_ao_addr *addr; + struct tcp_ao_key *key; + int family; + + ao_info = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)); + if (!ao_info) + return; + + /* Remove all keys that don't match the peer */ + family = sk->sk_family; + if (family == AF_INET) + addr = (union tcp_ao_addr *)&sk->sk_daddr; +#if IS_ENABLED(CONFIG_IPV6) + else if (family == AF_INET6) + addr = (union tcp_ao_addr *)&sk->sk_v6_daddr; +#endif + else + return; + + hlist_for_each_entry_rcu(key, &ao_info->head, node) { + if (!tcp_ao_key_cmp(key, addr, key->prefixlen, family, -1, -1)) + continue; + + if (key == ao_info->current_key) + ao_info->current_key = NULL; + if (key == ao_info->rnext_key) + ao_info->rnext_key = NULL; + hlist_del_rcu(&key->node); + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + call_rcu(&key->rcu, tcp_ao_key_free_rcu); + } + + key = tp->af_specific->ao_lookup(sk, sk, -1, -1); + if (key) { + /* if current_key or rnext_key were not provided, + * use the first key matching the peer + */ + if (!ao_info->current_key) + ao_info->current_key = key; + if (!ao_info->rnext_key) + ao_info->rnext_key = key; + tp->tcp_header_len += tcp_ao_len(key); + + ao_info->lisn = htonl(tp->write_seq); + } else { + /* Can't happen: tcp_connect() verifies that there's + * at least one tcp-ao key that matches the remote peer. + */ + WARN_ON_ONCE(1); + rcu_assign_pointer(tp->ao_info, NULL); + kfree(ao_info); + } +} + +void tcp_ao_established(struct sock *sk) +{ + struct tcp_ao_info *ao; + struct tcp_ao_key *key; + + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao) + return; + + hlist_for_each_entry_rcu(key, &ao->head, node) + tcp_ao_cache_traffic_keys(sk, ao, key); +} + +void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_ao_info *ao; + struct tcp_ao_key *key; + + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao) + return; + + WRITE_ONCE(ao->risn, tcp_hdr(skb)->seq); + + hlist_for_each_entry_rcu(key, &ao->head, node) + tcp_ao_cache_traffic_keys(sk, ao, key); +} + static bool tcp_ao_can_set_current_rnext(struct sock *sk) { /* There aren't current/rnext keys on TCP_LISTEN sockets */ @@ -558,6 +758,12 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, if (ret < 0) goto err_free_sock; + /* Change this condition if we allow adding keys in states + * like close_wait, syn_sent or fin_wait... + */ + if (sk->sk_state == TCP_ESTABLISHED) + tcp_ao_cache_traffic_keys(sk, ao_info, key); + tcp_ao_link_mkt(ao_info, key); if (first) { sk_gso_disable(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 00d04ab68958..6ee0342b5338 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6151,6 +6151,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + tcp_ao_finish_connect(sk, skb); tcp_set_state(sk, TCP_ESTABLISHED); icsk->icsk_ack.lrcvtime = tcp_jiffies32; @@ -6648,6 +6649,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) skb); WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); } + tcp_ao_established(sk); smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); sk->sk_state_change(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 698e58a3ccec..3c73b5829377 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2288,6 +2288,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v4_ao_lookup, .ao_parse = tcp_v4_parse_ao, + .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, #endif }; #endif diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1b90107f7038..9fbf1b2e2025 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3749,6 +3749,8 @@ static void tcp_connect_init(struct sock *sk) if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps)) tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; + tcp_ao_connect_init(sk); + /* If user gave his TCP_MAXSEG, record it to clamp */ if (tp->rx_opt.user_mss) tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c index 0640acaee67b..9ab594fadbd9 100644 --- a/net/ipv6/tcp_ao.c +++ b/net/ipv6/tcp_ao.c @@ -12,6 +12,56 @@ #include #include +static int tcp_v6_ao_calc_key(struct tcp_ao_key *mkt, u8 *key, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + __be16 sport, __be16 dport, + __be32 sisn, __be32 disn) +{ + struct kdf_input_block { + u8 counter; + u8 label[6]; + struct tcp6_ao_context ctx; + __be16 outlen; + } __packed * tmp; + struct tcp_sigpool hp; + int err; + + err = tcp_sigpool_start(mkt->tcp_sigpool_id, &hp); + if (err) + return err; + + tmp = hp.scratch; + tmp->counter = 1; + memcpy(tmp->label, "TCP-AO", 6); + tmp->ctx.saddr = *saddr; + tmp->ctx.daddr = *daddr; + tmp->ctx.sport = sport; + tmp->ctx.dport = dport; + tmp->ctx.sisn = sisn; + tmp->ctx.disn = disn; + tmp->outlen = htons(tcp_ao_digest_size(mkt) * 8); /* in bits */ + + err = tcp_ao_calc_traffic_key(mkt, key, tmp, sizeof(*tmp), &hp); + tcp_sigpool_end(&hp); + + return err; +} + +int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, __be32 sisn, + __be32 disn, bool send) +{ + if (send) + return tcp_v6_ao_calc_key(mkt, key, &sk->sk_v6_rcv_saddr, + &sk->sk_v6_daddr, htons(sk->sk_num), + sk->sk_dport, sisn, disn); + else + return tcp_v6_ao_calc_key(mkt, key, &sk->sk_v6_daddr, + &sk->sk_v6_rcv_saddr, sk->sk_dport, + htons(sk->sk_num), disn, sisn); +} + static struct tcp_ao_key *tcp_v6_ao_do_lookup(const struct sock *sk, const struct in6_addr *addr, int sndid, int rcvid) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 70a3842f47fa..074e16fe00e0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1921,6 +1921,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v6_ao_lookup, .ao_parse = tcp_v6_parse_ao, + .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, #endif }; #endif -- cgit v1.2.3 From 1e03d32bea8e782b7d31769c25a5fae8a5044488 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:21:58 +0100 Subject: net/tcp: Add TCP-AO sign to outgoing packets Using precalculated traffic keys, sign TCP segments as prescribed by RFC5925. Per RFC, TCP header options are included in sign calculation: "The TCP header, by default including options, and where the TCP checksum and TCP-AO MAC fields are set to zero, all in network- byte order." (5.1.3) tcp_ao_hash_header() has exclude_options parameter to optionally exclude TCP header from hash calculation, as described in RFC5925 (9.1), this is needed for interaction with middleboxes that may change "some TCP options". This is wired up to AO key flags and setsockopt() later. Similarly to TCP-MD5 hash TCP segment fragments. From this moment a user can start sending TCP-AO signed segments with one of crypto ahash algorithms from supported by Linux kernel. It can have a user-specified MAC length, to either save TCP option header space or provide higher protection using a longer signature. The inbound segments are not yet verified, TCP-AO option is ignored and they are accepted. Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp.h | 64 ++++++++++++++++ include/net/tcp_ao.h | 23 ++++++ net/ipv4/tcp_ao.c | 199 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 112 ++++++++++++++++++---------- net/ipv6/tcp_ao.c | 28 +++++++ net/ipv6/tcp_ipv6.c | 2 + 7 files changed, 391 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index b72c46cf229b..96e83159f1be 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -195,6 +195,7 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_AO 29 /* Authentication Option (RFC5925) */ #define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */ #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ #define TCPOPT_EXP 254 /* Experimental */ @@ -2200,6 +2201,9 @@ struct tcp_sock_af_ops { int (*ao_calc_key_sk)(struct tcp_ao_key *mkt, u8 *key, const struct sock *sk, __be32 sisn, __be32 disn, bool send); + int (*calc_ao_hash)(char *location, struct tcp_ao_key *ao, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); #endif }; @@ -2253,6 +2257,66 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, } #endif +struct tcp_key { + union { + struct tcp_ao_key *ao_key; + struct tcp_md5sig_key *md5_key; + }; + enum { + TCP_KEY_NONE = 0, + TCP_KEY_MD5, + TCP_KEY_AO, + } type; +}; + +static inline void tcp_get_current_key(const struct sock *sk, + struct tcp_key *out) +{ +#if defined(CONFIG_TCP_AO) || defined(CONFIG_TCP_MD5SIG) + const struct tcp_sock *tp = tcp_sk(sk); +#endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; + + ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held(sk)); + if (ao) { + out->ao_key = READ_ONCE(ao->current_key); + out->type = TCP_KEY_AO; + return; + } +#endif +#ifdef CONFIG_TCP_MD5SIG + if (static_branch_unlikely(&tcp_md5_needed.key) && + rcu_access_pointer(tp->md5sig_info)) { + out->md5_key = tp->af_specific->md5_lookup(sk, sk); + if (out->md5_key) { + out->type = TCP_KEY_MD5; + return; + } + } +#endif + out->type = TCP_KEY_NONE; +} + +static inline bool tcp_key_is_md5(const struct tcp_key *key) +{ +#ifdef CONFIG_TCP_MD5SIG + if (static_branch_unlikely(&tcp_md5_needed.key) && + key->type == TCP_KEY_MD5) + return true; +#endif + return false; +} + +static inline bool tcp_key_is_ao(const struct tcp_key *key) +{ +#ifdef CONFIG_TCP_AO + if (key->type == TCP_KEY_AO) + return true; +#endif + return false; +} + int tcpv4_offload_init(void); void tcp_v4_init(void); diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index b021a811511b..0b86bc05d8cf 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -111,6 +111,13 @@ struct tcp6_ao_context { struct tcp_sigpool; +int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_ao_key *key, struct tcphdr *th, + __u8 *hash_location); +int tcp_ao_hash_skb(unsigned short int family, + char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, sockptr_t optval, int optlen); int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, @@ -126,12 +133,21 @@ struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, const struct sock *sk, __be32 sisn, __be32 disn, bool send); +int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); /* ipv6 specific functions */ +int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp, + const struct in6_addr *daddr, + const struct in6_addr *saddr, int nbytes); int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, const struct sock *sk, __be32 sisn, __be32 disn, bool send); struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid); +int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); void tcp_ao_established(struct sock *sk); void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb); @@ -139,6 +155,13 @@ void tcp_ao_connect_init(struct sock *sk); #else /* CONFIG_TCP_AO */ +static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_ao_key *key, struct tcphdr *th, + __u8 *hash_location) +{ + return 0; +} + static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, const union tcp_ao_addr *addr, int family, int sndid, int rcvid) { diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index e478341fc336..007f29a2531f 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -262,6 +262,171 @@ static int tcp_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, return -EOPNOTSUPP; } +static int tcp_v4_ao_hash_pseudoheader(struct tcp_sigpool *hp, + __be32 daddr, __be32 saddr, + int nbytes) +{ + struct tcp4_pseudohdr *bp; + struct scatterlist sg; + + bp = hp->scratch; + bp->saddr = saddr; + bp->daddr = daddr; + bp->pad = 0; + bp->protocol = IPPROTO_TCP; + bp->len = cpu_to_be16(nbytes); + + sg_init_one(&sg, bp, sizeof(*bp)); + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->req); +} + +static int tcp_ao_hash_pseudoheader(unsigned short int family, + const struct sock *sk, + const struct sk_buff *skb, + struct tcp_sigpool *hp, int nbytes) +{ + const struct tcphdr *th = tcp_hdr(skb); + + /* TODO: Can we rely on checksum being zero to mean outbound pkt? */ + if (!th->check) { + if (family == AF_INET) + return tcp_v4_ao_hash_pseudoheader(hp, sk->sk_daddr, + sk->sk_rcv_saddr, skb->len); +#if IS_ENABLED(CONFIG_IPV6) + else if (family == AF_INET6) + return tcp_v6_ao_hash_pseudoheader(hp, &sk->sk_v6_daddr, + &sk->sk_v6_rcv_saddr, skb->len); +#endif + else + return -EAFNOSUPPORT; + } + + if (family == AF_INET) { + const struct iphdr *iph = ip_hdr(skb); + + return tcp_v4_ao_hash_pseudoheader(hp, iph->daddr, + iph->saddr, skb->len); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + const struct ipv6hdr *iph = ipv6_hdr(skb); + + return tcp_v6_ao_hash_pseudoheader(hp, &iph->daddr, + &iph->saddr, skb->len); +#endif + } + return -EAFNOSUPPORT; +} + +/* tcp_ao_hash_sne(struct tcp_sigpool *hp) + * @hp - used for hashing + * @sne - sne value + */ +static int tcp_ao_hash_sne(struct tcp_sigpool *hp, u32 sne) +{ + struct scatterlist sg; + __be32 *bp; + + bp = (__be32 *)hp->scratch; + *bp = htonl(sne); + + sg_init_one(&sg, bp, sizeof(*bp)); + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->req); +} + +static int tcp_ao_hash_header(struct tcp_sigpool *hp, + const struct tcphdr *th, + bool exclude_options, u8 *hash, + int hash_offset, int hash_len) +{ + int err, len = th->doff << 2; + struct scatterlist sg; + u8 *hdr = hp->scratch; + + /* We are not allowed to change tcphdr, make a local copy */ + if (exclude_options) { + len = sizeof(*th) + sizeof(struct tcp_ao_hdr) + hash_len; + memcpy(hdr, th, sizeof(*th)); + memcpy(hdr + sizeof(*th), + (u8 *)th + hash_offset - sizeof(struct tcp_ao_hdr), + sizeof(struct tcp_ao_hdr)); + memset(hdr + sizeof(*th) + sizeof(struct tcp_ao_hdr), + 0, hash_len); + ((struct tcphdr *)hdr)->check = 0; + } else { + len = th->doff << 2; + memcpy(hdr, th, len); + /* zero out tcp-ao hash */ + ((struct tcphdr *)hdr)->check = 0; + memset(hdr + hash_offset, 0, hash_len); + } + + sg_init_one(&sg, hdr, len); + ahash_request_set_crypt(hp->req, &sg, NULL, len); + err = crypto_ahash_update(hp->req); + WARN_ON_ONCE(err != 0); + return err; +} + +int tcp_ao_hash_skb(unsigned short int family, + char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne) +{ + const struct tcphdr *th = tcp_hdr(skb); + int tkey_len = tcp_ao_digest_size(key); + struct tcp_sigpool hp; + void *hash_buf = NULL; + + hash_buf = kmalloc(tkey_len, GFP_ATOMIC); + if (!hash_buf) + goto clear_hash_noput; + + if (tcp_sigpool_start(key->tcp_sigpool_id, &hp)) + goto clear_hash_noput; + + if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp.req), tkey, tkey_len)) + goto clear_hash; + + /* For now use sha1 by default. Depends on alg in tcp_ao_key */ + if (crypto_ahash_init(hp.req)) + goto clear_hash; + + if (tcp_ao_hash_sne(&hp, sne)) + goto clear_hash; + if (tcp_ao_hash_pseudoheader(family, sk, skb, &hp, skb->len)) + goto clear_hash; + if (tcp_ao_hash_header(&hp, th, false, + ao_hash, hash_offset, tcp_ao_maclen(key))) + goto clear_hash; + if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) + goto clear_hash; + ahash_request_set_crypt(hp.req, NULL, hash_buf, 0); + if (crypto_ahash_final(hp.req)) + goto clear_hash; + + memcpy(ao_hash, hash_buf, tcp_ao_maclen(key)); + tcp_sigpool_end(&hp); + kfree(hash_buf); + return 0; + +clear_hash: + tcp_sigpool_end(&hp); +clear_hash_noput: + memset(ao_hash, 0, tcp_ao_maclen(key)); + kfree(hash_buf); + return 1; +} + +int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne) +{ + return tcp_ao_hash_skb(AF_INET, ao_hash, key, sk, skb, + tkey, hash_offset, sne); +} + struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid) { @@ -270,6 +435,40 @@ struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, return tcp_ao_do_lookup(sk, addr, AF_INET, sndid, rcvid); } +int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_ao_key *key, struct tcphdr *th, + __u8 *hash_location) +{ + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_ao_info *ao; + void *tkey_buf = NULL; + u8 *traffic_key; + + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + traffic_key = snd_other_key(key); + if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { + __be32 disn; + + if (!(tcb->tcp_flags & TCPHDR_ACK)) { + disn = 0; + tkey_buf = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC); + if (!tkey_buf) + return -ENOMEM; + traffic_key = tkey_buf; + } else { + disn = ao->risn; + } + tp->af_specific->ao_calc_key_sk(key, traffic_key, + sk, ao->lisn, disn, true); + } + tp->af_specific->calc_ao_hash(hash_location, key, sk, skb, traffic_key, + hash_location - (u8 *)th, 0); + kfree(tkey_buf); + return 0; +} + static int tcp_ao_cache_traffic_keys(const struct sock *sk, struct tcp_ao_info *ao, struct tcp_ao_key *ao_key) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3c73b5829377..b002f6497d19 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2287,6 +2287,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { #endif #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v4_ao_lookup, + .calc_ao_hash = tcp_v4_ao_hash_skb, .ao_parse = tcp_v4_parse_ao, .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, #endif diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9fbf1b2e2025..fa3a630f3629 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -422,6 +422,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_FAST_OPEN_COOKIE BIT(8) #define OPTION_SMC BIT(9) #define OPTION_MPTCP BIT(10) +#define OPTION_AO BIT(11) static void smc_options_write(__be32 *ptr, u16 *options) { @@ -614,19 +615,43 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, * (but it may well be that other scenarios fail similarly). */ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, - struct tcp_out_options *opts) + struct tcp_out_options *opts, + struct tcp_key *key) { __be32 *ptr = (__be32 *)(th + 1); u16 options = opts->options; /* mungable copy */ - if (unlikely(OPTION_MD5 & options)) { + if (tcp_key_is_md5(key)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); /* overload cookie hash location */ opts->hash_location = (__u8 *)ptr; ptr += 4; - } + } else if (tcp_key_is_ao(key)) { +#ifdef CONFIG_TCP_AO + struct tcp_ao_key *rnext_key; + struct tcp_ao_info *ao_info; + u8 maclen; + ao_info = rcu_dereference_check(tp->ao_info, + lockdep_sock_is_held(&tp->inet_conn.icsk_inet.sk)); + rnext_key = READ_ONCE(ao_info->rnext_key); + if (WARN_ON_ONCE(!rnext_key)) + goto out_ao; + maclen = tcp_ao_maclen(key->ao_key); + *ptr++ = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + (rnext_key->rcvid)); + opts->hash_location = (__u8 *)ptr; + ptr += maclen / sizeof(*ptr); + if (unlikely(maclen % sizeof(*ptr))) { + memset(ptr, TCPOPT_NOP, sizeof(*ptr)); + ptr++; + } +out_ao: +#endif + } if (unlikely(opts->mss)) { *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | @@ -767,23 +792,25 @@ static void mptcp_set_option_cond(const struct request_sock *req, */ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) + struct tcp_key *key) { struct tcp_sock *tp = tcp_sk(sk); unsigned int remaining = MAX_TCP_OPTION_SPACE; struct tcp_fastopen_request *fastopen = tp->fastopen_req; + bool timestamps; - *md5 = NULL; -#ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key) && - rcu_access_pointer(tp->md5sig_info)) { - *md5 = tp->af_specific->md5_lookup(sk, sk); - if (*md5) { - opts->options |= OPTION_MD5; - remaining -= TCPOLEN_MD5SIG_ALIGNED; + /* Better than switch (key.type) as it has static branches */ + if (tcp_key_is_md5(key)) { + timestamps = false; + opts->options |= OPTION_MD5; + remaining -= TCPOLEN_MD5SIG_ALIGNED; + } else { + timestamps = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps); + if (tcp_key_is_ao(key)) { + opts->options |= OPTION_AO; + remaining -= tcp_ao_len(key->ao_key); } } -#endif /* We always get an MSS option. The option bytes which will be seen in * normal data packets should timestamps be used, must be in the MSS @@ -797,7 +824,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { + if (likely(timestamps)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; @@ -922,7 +949,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, */ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) + struct tcp_key *key) { struct tcp_sock *tp = tcp_sk(sk); unsigned int size = 0; @@ -930,17 +957,14 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb opts->options = 0; - *md5 = NULL; -#ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key) && - rcu_access_pointer(tp->md5sig_info)) { - *md5 = tp->af_specific->md5_lookup(sk, sk); - if (*md5) { - opts->options |= OPTION_MD5; - size += TCPOLEN_MD5SIG_ALIGNED; - } + /* Better than switch (key.type) as it has static branches */ + if (tcp_key_is_md5(key)) { + opts->options |= OPTION_MD5; + size += TCPOLEN_MD5SIG_ALIGNED; + } else if (tcp_key_is_ao(key)) { + opts->options |= OPTION_AO; + size += tcp_ao_len(key->ao_key); } -#endif if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; @@ -1245,7 +1269,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, struct tcp_out_options opts; unsigned int tcp_options_size, tcp_header_size; struct sk_buff *oskb = NULL; - struct tcp_md5sig_key *md5; + struct tcp_key key; struct tcphdr *th; u64 prior_wstamp; int err; @@ -1277,11 +1301,11 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tcb = TCP_SKB_CB(skb); memset(&opts, 0, sizeof(opts)); + tcp_get_current_key(sk, &key); if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { - tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); + tcp_options_size = tcp_syn_options(sk, skb, &opts, &key); } else { - tcp_options_size = tcp_established_options(sk, skb, &opts, - &md5); + tcp_options_size = tcp_established_options(sk, skb, &opts, &key); /* Force a PSH flag on all (GSO) packets to expedite GRO flush * at receiver : This slightly improve GRO performance. * Note that we do not force the PSH flag for non GSO packets, @@ -1362,16 +1386,25 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, th->window = htons(min(tp->rcv_wnd, 65535U)); } - tcp_options_write(th, tp, &opts); + tcp_options_write(th, tp, &opts, &key); + if (tcp_key_is_md5(&key)) { #ifdef CONFIG_TCP_MD5SIG - /* Calculate the MD5 hash, as we have all we need now */ - if (md5) { + /* Calculate the MD5 hash, as we have all we need now */ sk_gso_disable(sk); tp->af_specific->calc_md5_hash(opts.hash_location, - md5, sk, skb); - } + key.md5_key, sk, skb); #endif + } else if (tcp_key_is_ao(&key)) { + int err; + + err = tcp_ao_transmit_skb(sk, skb, key.ao_key, th, + opts.hash_location); + if (err) { + kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); + return -ENOMEM; + } + } /* BPF prog is the last one writing header option */ bpf_skops_write_hdr_opt(sk, skb, NULL, NULL, 0, &opts); @@ -1804,7 +1837,7 @@ unsigned int tcp_current_mss(struct sock *sk) u32 mss_now; unsigned int header_len; struct tcp_out_options opts; - struct tcp_md5sig_key *md5; + struct tcp_key key; mss_now = tp->mss_cache; @@ -1813,8 +1846,8 @@ unsigned int tcp_current_mss(struct sock *sk) if (mtu != inet_csk(sk)->icsk_pmtu_cookie) mss_now = tcp_sync_mss(sk, mtu); } - - header_len = tcp_established_options(sk, NULL, &opts, &md5) + + tcp_get_current_key(sk, &key); + header_len = tcp_established_options(sk, NULL, &opts, &key) + sizeof(struct tcphdr); /* The mss_cache is sized based on tp->tcp_header_len, which assumes * some common options. If this is an odd packet (because we have SACK @@ -3613,6 +3646,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *md5 = NULL; struct tcp_out_options opts; + struct tcp_key key = {}; struct sk_buff *skb; int tcp_header_size; struct tcphdr *th; @@ -3667,6 +3701,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); + if (md5) + key.type = TCP_KEY_MD5; #endif skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4); /* bpf program will be interested in the tcp_flags */ @@ -3693,7 +3729,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rsk_rcv_wnd, 65535U)); - tcp_options_write(th, NULL, &opts); + tcp_options_write(th, NULL, &opts, &key); th->doff = (tcp_header_size >> 2); TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c index 9ab594fadbd9..d08735b6f3c5 100644 --- a/net/ipv6/tcp_ao.c +++ b/net/ipv6/tcp_ao.c @@ -7,6 +7,7 @@ * Francesco Ruggeri * Salam Noureddine */ +#include #include #include @@ -79,6 +80,33 @@ struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, return tcp_v6_ao_do_lookup(sk, addr, sndid, rcvid); } +int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp, + const struct in6_addr *daddr, + const struct in6_addr *saddr, int nbytes) +{ + struct tcp6_pseudohdr *bp; + struct scatterlist sg; + + bp = hp->scratch; + /* 1. TCP pseudo-header (RFC2460) */ + bp->saddr = *saddr; + bp->daddr = *daddr; + bp->len = cpu_to_be32(nbytes); + bp->protocol = cpu_to_be32(IPPROTO_TCP); + + sg_init_one(&sg, bp, sizeof(*bp)); + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->req); +} + +int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne) +{ + return tcp_ao_hash_skb(AF_INET6, ao_hash, key, sk, skb, tkey, + hash_offset, sne); +} + int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 074e16fe00e0..301b2498d793 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1920,6 +1920,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { #endif #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v6_ao_lookup, + .calc_ao_hash = tcp_v6_ao_hash_skb, .ao_parse = tcp_v6_parse_ao, .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, #endif @@ -1953,6 +1954,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { #endif #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v6_ao_lookup, + .calc_ao_hash = tcp_v4_ao_hash_skb, .ao_parse = tcp_v6_parse_ao, #endif }; -- cgit v1.2.3 From f7dca36fc54afa2eb76bff8d0589a2ef18caea91 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:21:59 +0100 Subject: net/tcp: Add tcp_parse_auth_options() Introduce a helper that: (1) shares the common code with TCP-MD5 header options parsing (2) looks for hash signature only once for both TCP-MD5 and TCP-AO (3) fails with -EEXIST if any TCP sign option is present twice, see RFC5925 (2.2): ">> A single TCP segment MUST NOT have more than one TCP-AO in its options sequence. When multiple TCP-AOs appear, TCP MUST discard the segment." Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/dropreason-core.h | 6 ++++++ include/net/tcp.h | 24 +++++++++++++++++++++++- include/net/tcp_ao.h | 17 ++++++++++++++++- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_input.c | 39 +++++++++++++++++++++++++++++---------- net/ipv4/tcp_ipv4.c | 15 ++++++++++----- net/ipv6/tcp_ipv6.c | 11 +++++++---- 7 files changed, 93 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 845dce805de7..3af4464a9c5b 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -20,6 +20,7 @@ FN(IP_NOPROTO) \ FN(SOCKET_RCVBUFF) \ FN(PROTO_MEM) \ + FN(TCP_AUTH_HDR) \ FN(TCP_MD5NOTFOUND) \ FN(TCP_MD5UNEXPECTED) \ FN(TCP_MD5FAILURE) \ @@ -142,6 +143,11 @@ enum skb_drop_reason { * drop out of udp_memory_allocated. */ SKB_DROP_REASON_PROTO_MEM, + /** + * @SKB_DROP_REASON_TCP_AUTH_HDR: TCP-MD5 or TCP-AO hashes are met + * twice or set incorrectly. + */ + SKB_DROP_REASON_TCP_AUTH_HDR, /** * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected, * corresponding to LINUX_MIB_TCPMD5NOTFOUND diff --git a/include/net/tcp.h b/include/net/tcp.h index 96e83159f1be..423807ae3e37 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -438,7 +438,6 @@ int tcp_mmap(struct file *file, struct socket *sock, void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); -const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* * BPF SKB-less helpers @@ -2675,6 +2674,29 @@ static inline u64 tcp_transmit_time(const struct sock *sk) return 0; } +static inline int tcp_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const struct tcp_ao_hdr **aoh) +{ + const u8 *md5_tmp, *ao_tmp; + int ret; + + ret = tcp_do_parse_auth_options(th, &md5_tmp, &ao_tmp); + if (ret) + return ret; + + if (md5_hash) + *md5_hash = md5_tmp; + + if (aoh) { + if (!ao_tmp) + *aoh = NULL; + else + *aoh = (struct tcp_ao_hdr *)(ao_tmp - 2); + } + + return 0; +} + static inline bool tcp_ao_required(struct sock *sk, const void *saddr, int family) { diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 0b86bc05d8cf..fdd2f5091b98 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -152,7 +152,9 @@ int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); void tcp_ao_established(struct sock *sk); void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb); void tcp_ao_connect_init(struct sock *sk); - +void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, + struct tcp_request_sock *treq, + unsigned short int family); #else /* CONFIG_TCP_AO */ static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, @@ -185,4 +187,17 @@ static inline void tcp_ao_connect_init(struct sock *sk) } #endif +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) +int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash); +#else +static inline int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash) +{ + *md5_hash = NULL; + *ao_hash = NULL; + return 0; +} +#endif + #endif /* _TCP_AO_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b6faee8a1e67..369e2a41bc1b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4398,7 +4398,8 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, l3index = sdif ? dif : 0; hash_expected = tcp_md5_do_lookup(sk, l3index, saddr, family); - hash_location = tcp_parse_md5sig_option(th); + if (tcp_parse_auth_options(th, &hash_location, NULL)) + return SKB_DROP_REASON_TCP_AUTH_HDR; /* We've parsed the options - do we have a hash? */ if (!hash_expected && !hash_location) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6ee0342b5338..fc42b172abf6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4255,39 +4255,58 @@ static bool tcp_fast_parse_options(const struct net *net, return true; } -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) /* - * Parse MD5 Signature option + * Parse Signature options */ -const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) +int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash) { int length = (th->doff << 2) - sizeof(*th); const u8 *ptr = (const u8 *)(th + 1); + unsigned int minlen = TCPOLEN_MD5SIG; + + if (IS_ENABLED(CONFIG_TCP_AO)) + minlen = sizeof(struct tcp_ao_hdr) + 1; + + *md5_hash = NULL; + *ao_hash = NULL; /* If not enough data remaining, we can short cut */ - while (length >= TCPOLEN_MD5SIG) { + while (length >= minlen) { int opcode = *ptr++; int opsize; switch (opcode) { case TCPOPT_EOL: - return NULL; + return 0; case TCPOPT_NOP: length--; continue; default: opsize = *ptr++; if (opsize < 2 || opsize > length) - return NULL; - if (opcode == TCPOPT_MD5SIG) - return opsize == TCPOLEN_MD5SIG ? ptr : NULL; + return -EINVAL; + if (opcode == TCPOPT_MD5SIG) { + if (opsize != TCPOLEN_MD5SIG) + return -EINVAL; + if (unlikely(*md5_hash || *ao_hash)) + return -EEXIST; + *md5_hash = ptr; + } else if (opcode == TCPOPT_AO) { + if (opsize <= sizeof(struct tcp_ao_hdr)) + return -EINVAL; + if (unlikely(*md5_hash || *ao_hash)) + return -EEXIST; + *ao_hash = ptr; + } } ptr += opsize - 2; length -= opsize; } - return NULL; + return 0; } -EXPORT_SYMBOL(tcp_parse_md5sig_option); +EXPORT_SYMBOL(tcp_do_parse_auth_options); #endif /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b002f6497d19..83e069d0f778 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -670,7 +670,9 @@ EXPORT_SYMBOL(tcp_v4_send_check); * Exception: precedence violation. We do not implement it in any case. */ -#ifdef CONFIG_TCP_MD5SIG +#ifdef CONFIG_TCP_AO +#define OPTION_BYTES MAX_TCP_OPTION_SPACE +#elif defined(CONFIG_TCP_MD5SIG) #define OPTION_BYTES TCPOLEN_MD5SIG_ALIGNED #else #define OPTION_BYTES sizeof(__be32) @@ -685,8 +687,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) } rep; struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG + const __u8 *md5_hash_location = NULL; struct tcp_md5sig_key *key = NULL; - const __u8 *hash_location = NULL; unsigned char newhash[16]; int genhash; struct sock *sk1 = NULL; @@ -727,8 +729,11 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, NULL)) + return; + rcu_read_lock(); - hash_location = tcp_parse_md5sig_option(th); if (sk && sk_fullsock(sk)) { const union tcp_md5_addr *addr; int l3index; @@ -739,7 +744,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); - } else if (hash_location) { + } else if (md5_hash_location) { const union tcp_md5_addr *addr; int sdif = tcp_v4_sdif(skb); int dif = inet_iif(skb); @@ -771,7 +776,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); - if (genhash || memcmp(hash_location, newhash, 16) != 0) + if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) goto out; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 301b2498d793..5dd016bdf44b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -990,7 +990,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) u32 seq = 0, ack_seq = 0; struct tcp_md5sig_key *key = NULL; #ifdef CONFIG_TCP_MD5SIG - const __u8 *hash_location = NULL; + const __u8 *md5_hash_location = NULL; unsigned char newhash[16]; int genhash; struct sock *sk1 = NULL; @@ -1012,8 +1012,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(th, &md5_hash_location, NULL)) + return; + rcu_read_lock(); - hash_location = tcp_parse_md5sig_option(th); if (sk && sk_fullsock(sk)) { int l3index; @@ -1022,7 +1025,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) */ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); - } else if (hash_location) { + } else if (md5_hash_location) { int dif = tcp_v6_iif_l3_slave(skb); int sdif = tcp_v6_sdif(skb); int l3index; @@ -1051,7 +1054,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) goto out; genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); - if (genhash || memcmp(hash_location, newhash, 16) != 0) + if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) goto out; } #endif -- cgit v1.2.3 From ba7783ad45c8f0fb7a70640f6b6fcdc54ed48412 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:00 +0100 Subject: net/tcp: Add AO sign to RST packets Wire up sending resets to TCP-AO hashing. Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp.h | 7 +++- include/net/tcp_ao.h | 12 ++++++ net/ipv4/tcp_ao.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/tcp_ipv4.c | 69 +++++++++++++++++++++++++++------- net/ipv6/tcp_ipv6.c | 98 +++++++++++++++++++++++++++++++++++-------------- 5 files changed, 245 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 423807ae3e37..17b3ecd2760f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2258,7 +2258,12 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, struct tcp_key { union { - struct tcp_ao_key *ao_key; + struct { + struct tcp_ao_key *ao_key; + char *traffic_key; + u32 sne; + u8 rcv_next; + }; struct tcp_md5sig_key *md5_key; }; enum { diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index fdd2f5091b98..629ab0365b83 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -120,12 +120,24 @@ int tcp_ao_hash_skb(unsigned short int family, const u8 *tkey, int hash_offset, u32 sne); int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, sockptr_t optval, int optlen); +struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, + int sndid, int rcvid); int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, unsigned int len, struct tcp_sigpool *hp); void tcp_ao_destroy_sock(struct sock *sk); struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, const union tcp_ao_addr *addr, int family, int sndid, int rcvid); +int tcp_ao_hash_hdr(unsigned short family, char *ao_hash, + struct tcp_ao_key *key, const u8 *tkey, + const union tcp_ao_addr *daddr, + const union tcp_ao_addr *saddr, + const struct tcphdr *th, u32 sne); +int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, int l3index, + struct tcp_ao_key **key, char **traffic_key, + bool *allocated_traffic_key, u8 *keyid, u32 *sne); + /* ipv4 specific functions */ int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 007f29a2531f..b8afe78ff057 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -48,8 +48,8 @@ clear_hash: * it's known that the keys in ao_info are matching peer's * family/address/VRF/etc. */ -static struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, - int sndid, int rcvid) +struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, + int sndid, int rcvid) { struct tcp_ao_key *key; @@ -369,6 +369,66 @@ static int tcp_ao_hash_header(struct tcp_sigpool *hp, return err; } +int tcp_ao_hash_hdr(unsigned short int family, char *ao_hash, + struct tcp_ao_key *key, const u8 *tkey, + const union tcp_ao_addr *daddr, + const union tcp_ao_addr *saddr, + const struct tcphdr *th, u32 sne) +{ + int tkey_len = tcp_ao_digest_size(key); + int hash_offset = ao_hash - (char *)th; + struct tcp_sigpool hp; + void *hash_buf = NULL; + + hash_buf = kmalloc(tkey_len, GFP_ATOMIC); + if (!hash_buf) + goto clear_hash_noput; + + if (tcp_sigpool_start(key->tcp_sigpool_id, &hp)) + goto clear_hash_noput; + + if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp.req), tkey, tkey_len)) + goto clear_hash; + + if (crypto_ahash_init(hp.req)) + goto clear_hash; + + if (tcp_ao_hash_sne(&hp, sne)) + goto clear_hash; + if (family == AF_INET) { + if (tcp_v4_ao_hash_pseudoheader(&hp, daddr->a4.s_addr, + saddr->a4.s_addr, th->doff * 4)) + goto clear_hash; +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + if (tcp_v6_ao_hash_pseudoheader(&hp, &daddr->a6, + &saddr->a6, th->doff * 4)) + goto clear_hash; +#endif + } else { + WARN_ON_ONCE(1); + goto clear_hash; + } + if (tcp_ao_hash_header(&hp, th, false, + ao_hash, hash_offset, tcp_ao_maclen(key))) + goto clear_hash; + ahash_request_set_crypt(hp.req, NULL, hash_buf, 0); + if (crypto_ahash_final(hp.req)) + goto clear_hash; + + memcpy(ao_hash, hash_buf, tcp_ao_maclen(key)); + tcp_sigpool_end(&hp); + kfree(hash_buf); + return 0; + +clear_hash: + tcp_sigpool_end(&hp); +clear_hash_noput: + memset(ao_hash, 0, tcp_ao_maclen(key)); + kfree(hash_buf); + return 1; +} + int tcp_ao_hash_skb(unsigned short int family, char *ao_hash, struct tcp_ao_key *key, const struct sock *sk, const struct sk_buff *skb, @@ -435,6 +495,44 @@ struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, return tcp_ao_do_lookup(sk, addr, AF_INET, sndid, rcvid); } +int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, int l3index, + struct tcp_ao_key **key, char **traffic_key, + bool *allocated_traffic_key, u8 *keyid, u32 *sne) +{ + struct tcp_ao_key *rnext_key; + struct tcp_ao_info *ao_info; + + *allocated_traffic_key = false; + /* If there's no socket - than initial sisn/disn are unknown. + * Drop the segment. RFC5925 (7.7) advises to require graceful + * restart [RFC4724]. Alternatively, the RFC5925 advises to + * save/restore traffic keys before/after reboot. + * Linux TCP-AO support provides TCP_AO_ADD_KEY and TCP_AO_REPAIR + * options to restore a socket post-reboot. + */ + if (!sk) + return -ENOTCONN; + + if ((1 << sk->sk_state) & + (TCPF_LISTEN | TCPF_NEW_SYN_RECV | TCPF_TIME_WAIT)) + return -1; + + ao_info = rcu_dereference(tcp_sk(sk)->ao_info); + if (!ao_info) + return -ENOENT; + + *key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); + if (!*key) + return -ENOENT; + *traffic_key = snd_other_key(*key); + rnext_key = READ_ONCE(ao_info->rnext_key); + *keyid = rnext_key->rcvid; + *sne = 0; + + return 0; +} + int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, struct tcp_ao_key *key, struct tcphdr *th, __u8 *hash_location) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 83e069d0f778..71e1cbb0020b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -657,6 +657,52 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_v4_send_check); +#define REPLY_OPTIONS_LEN (MAX_TCP_OPTION_SPACE / sizeof(__be32)) + +static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, + struct ip_reply_arg *arg, struct tcphdr *reply, + __be32 reply_options[REPLY_OPTIONS_LEN]) +{ +#ifdef CONFIG_TCP_AO + int sdif = tcp_v4_sdif(skb); + int dif = inet_iif(skb); + int l3index = sdif ? dif : 0; + bool allocated_traffic_key; + struct tcp_ao_key *key; + char *traffic_key; + bool drop = true; + u32 ao_sne = 0; + u8 keyid; + + rcu_read_lock(); + if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, + &key, &traffic_key, &allocated_traffic_key, + &keyid, &ao_sne)) + goto out; + + reply_options[0] = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key) << 16) | + (aoh->rnext_keyid << 8) | keyid); + arg->iov[0].iov_len += round_up(tcp_ao_len(key), 4); + reply->doff = arg->iov[0].iov_len / 4; + + if (tcp_ao_hash_hdr(AF_INET, (char *)&reply_options[1], + key, traffic_key, + (union tcp_ao_addr *)&ip_hdr(skb)->saddr, + (union tcp_ao_addr *)&ip_hdr(skb)->daddr, + reply, ao_sne)) + goto out; + drop = false; +out: + rcu_read_unlock(); + if (allocated_traffic_key) + kfree(traffic_key); + return drop; +#else + return true; +#endif +} + /* * This routine will send an RST to the other tcp. * @@ -670,28 +716,21 @@ EXPORT_SYMBOL(tcp_v4_send_check); * Exception: precedence violation. We do not implement it in any case. */ -#ifdef CONFIG_TCP_AO -#define OPTION_BYTES MAX_TCP_OPTION_SPACE -#elif defined(CONFIG_TCP_MD5SIG) -#define OPTION_BYTES TCPOLEN_MD5SIG_ALIGNED -#else -#define OPTION_BYTES sizeof(__be32) -#endif - static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; - __be32 opt[OPTION_BYTES / sizeof(__be32)]; + __be32 opt[REPLY_OPTIONS_LEN]; } rep; + const __u8 *md5_hash_location = NULL; + const struct tcp_ao_hdr *aoh; struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG - const __u8 *md5_hash_location = NULL; struct tcp_md5sig_key *key = NULL; unsigned char newhash[16]; - int genhash; struct sock *sk1 = NULL; + int genhash; #endif u64 transmit_time = 0; struct sock *ctl_sk; @@ -728,11 +767,15 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); -#ifdef CONFIG_TCP_MD5SIG + /* Invalid TCP option size or twice included auth */ - if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, NULL)) + if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh)) return; + if (aoh && tcp_v4_ao_sign_reset(sk, skb, aoh, &arg, &rep.th, rep.opt)) + return; + +#ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); if (sk && sk_fullsock(sk)) { const union tcp_md5_addr *addr; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5dd016bdf44b..45b81e230438 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -854,8 +854,8 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, - int oif, struct tcp_md5sig_key *key, int rst, - u8 tclass, __be32 label, u32 priority, u32 txhash) + int oif, int rst, u8 tclass, __be32 label, + u32 priority, u32 txhash, struct tcp_key *key) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -870,13 +870,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; -#ifdef CONFIG_TCP_MD5SIG - if (key) + if (tcp_key_is_md5(key)) tot_len += TCPOLEN_MD5SIG_ALIGNED; -#endif + if (tcp_key_is_ao(key)) + tot_len += tcp_ao_len(key->ao_key); #ifdef CONFIG_MPTCP - if (rst && !key) { + if (rst && !tcp_key_is_md5(key)) { mrst = mptcp_reset_option(skb); if (mrst) @@ -917,14 +917,28 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 *topt++ = mrst; #ifdef CONFIG_TCP_MD5SIG - if (key) { + if (tcp_key_is_md5(key)) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); - tcp_v6_md5_hash_hdr((__u8 *)topt, key, + tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, t1); } #endif +#ifdef CONFIG_TCP_AO + if (tcp_key_is_ao(key)) { + *topt++ = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + (key->rcv_next)); + + tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, + key->traffic_key, + (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, + (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, + t1, key->sne); + } +#endif memset(&fl6, 0, sizeof(fl6)); fl6.daddr = ipv6_hdr(skb)->saddr; @@ -987,19 +1001,23 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); - u32 seq = 0, ack_seq = 0; - struct tcp_md5sig_key *key = NULL; -#ifdef CONFIG_TCP_MD5SIG const __u8 *md5_hash_location = NULL; - unsigned char newhash[16]; - int genhash; - struct sock *sk1 = NULL; +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) + bool allocated_traffic_key = false; #endif + const struct tcp_ao_hdr *aoh; + struct tcp_key key = {}; + u32 seq = 0, ack_seq = 0; __be32 label = 0; u32 priority = 0; struct net *net; u32 txhash = 0; int oif = 0; +#ifdef CONFIG_TCP_MD5SIG + unsigned char newhash[16]; + int genhash; + struct sock *sk1 = NULL; +#endif if (th->rst) return; @@ -1011,12 +1029,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) return; net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); -#ifdef CONFIG_TCP_MD5SIG /* Invalid TCP option size or twice included auth */ - if (tcp_parse_auth_options(th, &md5_hash_location, NULL)) + if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) return; - +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_lock(); +#endif +#ifdef CONFIG_TCP_MD5SIG if (sk && sk_fullsock(sk)) { int l3index; @@ -1024,7 +1043,9 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) * in an L3 domain and inet_iif is set to it. */ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; - key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); + key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); + if (key.md5_key) + key.type = TCP_KEY_MD5; } else if (md5_hash_location) { int dif = tcp_v6_iif_l3_slave(skb); int sdif = tcp_v6_sdif(skb); @@ -1049,11 +1070,12 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) */ l3index = tcp_v6_sdif(skb) ? dif : 0; - key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); - if (!key) + key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); + if (!key.md5_key) goto out; + key.type = TCP_KEY_MD5; - genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); + genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) goto out; } @@ -1065,6 +1087,20 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); +#ifdef CONFIG_TCP_AO + if (aoh) { + int l3index; + + l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, + &key.ao_key, &key.traffic_key, + &allocated_traffic_key, + &key.rcv_next, &key.sne)) + goto out; + key.type = TCP_KEY_AO; + } +#endif + if (sk) { oif = sk->sk_bound_dev_if; if (sk_fullsock(sk)) { @@ -1084,22 +1120,30 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) label = ip6_flowlabel(ipv6h); } - tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, - ipv6_get_dsfield(ipv6h), label, priority, txhash); + tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, + ipv6_get_dsfield(ipv6h), label, priority, txhash, + &key); -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) out: + if (allocated_traffic_key) + kfree(key.traffic_key); rcu_read_unlock(); #endif } static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, u8 tclass, + struct tcp_md5sig_key *md5_key, u8 tclass, __be32 label, u32 priority, u32 txhash) { - tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, - tclass, label, priority, txhash); + struct tcp_key key = { + .md5_key = md5_key, + .type = md5_key ? TCP_KEY_MD5 : TCP_KEY_NONE, + }; + + tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, + tclass, label, priority, txhash, &key); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) -- cgit v1.2.3 From decde2586b34b99684faff1eab41e5c496c27fb6 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:01 +0100 Subject: net/tcp: Add TCP-AO sign to twsk Add support for sockets in time-wait state. ao_info as well as all keys are inherited on transition to time-wait socket. The lifetime of ao_info is now protected by ref counter, so that tcp_ao_destroy_sock() will destruct it only when the last user is gone. Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++ include/net/tcp_ao.h | 11 ++++-- net/ipv4/tcp_ao.c | 49 +++++++++++++++++++++----- net/ipv4/tcp_ipv4.c | 92 ++++++++++++++++++++++++++++++++++++++---------- net/ipv4/tcp_minisocks.c | 4 ++- net/ipv4/tcp_output.c | 2 +- net/ipv6/tcp_ipv6.c | 72 +++++++++++++++++++++++++++---------- 7 files changed, 183 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 64e7b560fa79..eec6e7e5312e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -514,6 +514,9 @@ struct tcp_timewait_sock { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_info __rcu *ao_info; +#endif }; static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 629ab0365b83..971d7edcda9c 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -85,6 +85,7 @@ struct tcp_ao_info { __unused :31; __be32 lisn; __be32 risn; + refcount_t refcnt; /* Protects twsk destruction */ struct rcu_head rcu; }; @@ -124,7 +125,8 @@ struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, int sndid, int rcvid); int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, unsigned int len, struct tcp_sigpool *hp); -void tcp_ao_destroy_sock(struct sock *sk); +void tcp_ao_destroy_sock(struct sock *sk, bool twsk); +void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp); struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, const union tcp_ao_addr *addr, int family, int sndid, int rcvid); @@ -182,7 +184,7 @@ static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, return NULL; } -static inline void tcp_ao_destroy_sock(struct sock *sk) +static inline void tcp_ao_destroy_sock(struct sock *sk, bool twsk) { } @@ -194,6 +196,11 @@ static inline void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb) { } +static inline void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, + struct tcp_sock *tp) +{ +} + static inline void tcp_ao_connect_init(struct sock *sk) { } diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index b8afe78ff057..7c4e2f42845a 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -159,6 +159,7 @@ static struct tcp_ao_info *tcp_ao_alloc_info(gfp_t flags) if (!ao) return NULL; INIT_HLIST_HEAD(&ao->head); + refcount_set(&ao->refcnt, 1); return ao; } @@ -176,27 +177,54 @@ static void tcp_ao_key_free_rcu(struct rcu_head *head) kfree_sensitive(key); } -void tcp_ao_destroy_sock(struct sock *sk) +void tcp_ao_destroy_sock(struct sock *sk, bool twsk) { struct tcp_ao_info *ao; struct tcp_ao_key *key; struct hlist_node *n; - ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1); - tcp_sk(sk)->ao_info = NULL; + if (twsk) { + ao = rcu_dereference_protected(tcp_twsk(sk)->ao_info, 1); + tcp_twsk(sk)->ao_info = NULL; + } else { + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1); + tcp_sk(sk)->ao_info = NULL; + } - if (!ao) + if (!ao || !refcount_dec_and_test(&ao->refcnt)) return; hlist_for_each_entry_safe(key, n, &ao->head, node) { hlist_del_rcu(&key->node); - atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + if (!twsk) + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); call_rcu(&key->rcu, tcp_ao_key_free_rcu); } kfree_rcu(ao, rcu); } +void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp) +{ + struct tcp_ao_info *ao_info = rcu_dereference_protected(tp->ao_info, 1); + + if (ao_info) { + struct tcp_ao_key *key; + struct hlist_node *n; + int omem = 0; + + hlist_for_each_entry_safe(key, n, &ao_info->head, node) { + omem += tcp_ao_sizeof_key(key); + } + + refcount_inc(&ao_info->refcnt); + atomic_sub(omem, &(((struct sock *)tp)->sk_omem_alloc)); + rcu_assign_pointer(tcptw->ao_info, ao_info); + } else { + tcptw->ao_info = NULL; + } +} + /* 4 tuple and ISNs are expected in NBO */ static int tcp_v4_ao_calc_key(struct tcp_ao_key *mkt, u8 *key, __be32 saddr, __be32 daddr, @@ -514,11 +542,13 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, if (!sk) return -ENOTCONN; - if ((1 << sk->sk_state) & - (TCPF_LISTEN | TCPF_NEW_SYN_RECV | TCPF_TIME_WAIT)) + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { return -1; - ao_info = rcu_dereference(tcp_sk(sk)->ao_info); + if (sk->sk_state == TCP_TIME_WAIT) + ao_info = rcu_dereference(tcp_twsk(sk)->ao_info); + else + ao_info = rcu_dereference(tcp_sk(sk)->ao_info); if (!ao_info) return -ENOENT; @@ -910,6 +940,9 @@ static struct tcp_ao_info *setsockopt_ao_info(struct sock *sk) if (sk_fullsock(sk)) { return rcu_dereference_protected(tcp_sk(sk)->ao_info, lockdep_sock_is_held(sk)); + } else if (sk->sk_state == TCP_TIME_WAIT) { + return rcu_dereference_protected(tcp_twsk(sk)->ao_info, + lockdep_sock_is_held(sk)); } return ERR_PTR(-ESOCKTNOSUPPORT); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 71e1cbb0020b..a78112d78d06 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -911,17 +911,13 @@ out: static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, + struct tcp_key *key, int reply_flags, u8 tos, u32 txhash) { const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; - __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) -#ifdef CONFIG_TCP_MD5SIG - + (TCPOLEN_MD5SIG_ALIGNED >> 2) -#endif - ]; + __be32 opt[(MAX_TCP_OPTION_SPACE >> 2)]; } rep; struct net *net = sock_net(sk); struct ip_reply_arg arg; @@ -952,7 +948,7 @@ static void tcp_v4_send_ack(const struct sock *sk, rep.th.window = htons(win); #ifdef CONFIG_TCP_MD5SIG - if (key) { + if (tcp_key_is_md5(key)) { int offset = (tsecr) ? 3 : 0; rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | @@ -963,9 +959,27 @@ static void tcp_v4_send_ack(const struct sock *sk, rep.th.doff = arg.iov[0].iov_len/4; tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], - key, ip_hdr(skb)->saddr, + key->md5_key, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &rep.th); } +#endif +#ifdef CONFIG_TCP_AO + if (tcp_key_is_ao(key)) { + int offset = (tsecr) ? 3 : 0; + + rep.opt[offset++] = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + key->rcv_next); + arg.iov[0].iov_len += round_up(tcp_ao_len(key->ao_key), 4); + rep.th.doff = arg.iov[0].iov_len / 4; + + tcp_ao_hash_hdr(AF_INET, (char *)&rep.opt[offset], + key->ao_key, key->traffic_key, + (union tcp_ao_addr *)&ip_hdr(skb)->saddr, + (union tcp_ao_addr *)&ip_hdr(skb)->daddr, + &rep.th, key->sne); + } #endif arg.flags = reply_flags; arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, @@ -999,18 +1013,50 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + struct tcp_key key = {}; +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao_info; + + /* FIXME: the segment to-be-acked is not verified yet */ + ao_info = rcu_dereference(tcptw->ao_info); + if (ao_info) { + const struct tcp_ao_hdr *aoh; + + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) { + inet_twsk_put(tw); + return; + } + + if (aoh) + key.ao_key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); + } + if (key.ao_key) { + struct tcp_ao_key *rnext_key; + + key.traffic_key = snd_other_key(key.ao_key); + rnext_key = READ_ONCE(ao_info->rnext_key); + key.rcv_next = rnext_key->rcvid; + key.type = TCP_KEY_AO; +#else + if (0) { +#endif +#ifdef CONFIG_TCP_MD5SIG + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + key.md5_key = tcp_twsk_md5_key(tcptw); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_tw_tsval(tcptw), tcptw->tw_ts_recent, - tw->tw_bound_dev_if, - tcp_twsk_md5_key(tcptw), + tw->tw_bound_dev_if, &key, tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, tw->tw_tos, - tw->tw_txhash - ); + tw->tw_txhash); inet_twsk_put(tw); } @@ -1018,8 +1064,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - const union tcp_md5_addr *addr; - int l3index; + struct tcp_key key = {}; /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. @@ -1032,15 +1077,24 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, * exception of segments, MUST be right-shifted by * Rcv.Wind.Shift bits: */ - addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; - l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; +#ifdef CONFIG_TCP_MD5SIG + if (static_branch_unlikely(&tcp_md5_needed.key)) { + const union tcp_md5_addr *addr; + int l3index; + + addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; + l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; + key.md5_key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); + if (key.md5_key) + key.type = TCP_KEY_MD5; + } +#endif tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_rsk_tsval(tcp_rsk(req)), READ_ONCE(req->ts_recent), - 0, - tcp_md5_do_lookup(sk, l3index, addr, AF_INET), + 0, &key, inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos, READ_ONCE(tcp_rsk(req)->txhash)); @@ -2404,7 +2458,7 @@ void tcp_v4_destroy_sock(struct sock *sk) rcu_assign_pointer(tp->md5sig_info, NULL); } #endif - tcp_ao_destroy_sock(sk); + tcp_ao_destroy_sock(sk, false); /* Clean up a referenced TCP bind bucket. */ if (inet_csk(sk)->icsk_bind_hash) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3dcb3fc36e64..6810cf65a322 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -279,7 +279,7 @@ out_free: void tcp_time_wait(struct sock *sk, int state, int timeo) { const struct inet_connection_sock *icsk = inet_csk(sk); - const struct tcp_sock *tp = tcp_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct inet_timewait_sock *tw; @@ -316,6 +316,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) #endif tcp_time_wait_init(sk, tcptw); + tcp_ao_time_wait(tcptw, tp); /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) @@ -370,6 +371,7 @@ void tcp_twsk_destructor(struct sock *sk) call_rcu(&twsk->tw_md5_key->rcu, tcp_md5_twsk_free_rcu); } #endif + tcp_ao_destroy_sock(sk, true); } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fa3a630f3629..de546313a214 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3997,7 +3997,7 @@ int tcp_connect(struct sock *sk) * then free up ao_info if allocated. */ if (needs_md5) { - tcp_ao_destroy_sock(sk); + tcp_ao_destroy_sock(sk, false); } else if (needs_ao) { tcp_clear_md5_list(sk); kfree(rcu_replace_pointer(tp->md5sig_info, NULL, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 45b81e230438..e6c3617976f0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -778,13 +778,6 @@ clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } -#else /* CONFIG_TCP_MD5SIG */ -static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, - const struct in6_addr *addr, - int l3index) -{ - return NULL; -} #endif static void tcp_v6_init_req(struct request_sock *req, @@ -1134,39 +1127,81 @@ out: static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *md5_key, u8 tclass, + struct tcp_key *key, u8 tclass, __be32 label, u32 priority, u32 txhash) { - struct tcp_key key = { - .md5_key = md5_key, - .type = md5_key ? TCP_KEY_MD5 : TCP_KEY_NONE, - }; - tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, - tclass, label, priority, txhash, &key); + tclass, label, priority, txhash, key); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + struct tcp_key key = {}; +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao_info; + + /* FIXME: the segment to-be-acked is not verified yet */ + ao_info = rcu_dereference(tcptw->ao_info); + if (ao_info) { + const struct tcp_ao_hdr *aoh; + + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + goto out; + if (aoh) { + key.ao_key = tcp_ao_established_key(ao_info, + aoh->rnext_keyid, -1); + } + } + if (key.ao_key) { + struct tcp_ao_key *rnext_key; + + key.traffic_key = snd_other_key(key.ao_key); + /* rcv_next switches to our rcv_next */ + rnext_key = READ_ONCE(ao_info->rnext_key); + key.rcv_next = rnext_key->rcvid; + key.type = TCP_KEY_AO; +#else + if (0) { +#endif +#ifdef CONFIG_TCP_MD5SIG + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + key.md5_key = tcp_twsk_md5_key(tcptw); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_tw_tsval(tcptw), - tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), + tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, tw->tw_txhash); +#ifdef CONFIG_TCP_AO +out: +#endif inet_twsk_put(tw); } static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - int l3index; + struct tcp_key key = {}; + +#ifdef CONFIG_TCP_MD5SIG + if (static_branch_unlikely(&tcp_md5_needed.key)) { + int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; - l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, + l3index); + if (key.md5_key) + key.type = TCP_KEY_MD5; + } +#endif /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. @@ -1182,8 +1217,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_rsk_tsval(tcp_rsk(req)), READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), - ipv6_get_dsfield(ipv6_hdr(skb)), 0, + &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0, READ_ONCE(sk->sk_priority), READ_ONCE(tcp_rsk(req)->txhash)); } -- cgit v1.2.3 From 06b22ef29591f625ef877ae00d82192938e29e60 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:02 +0100 Subject: net/tcp: Wire TCP-AO to request sockets Now when the new request socket is created from the listening socket, it's recorded what MKT was used by the peer. tcp_rsk_used_ao() is a new helper for checking if TCP-AO option was used to create the request socket. tcp_ao_copy_all_matching() will copy all keys that match the peer on the request socket, as well as preparing them for the usage (creating traffic keys). Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/linux/tcp.h | 18 ++++ include/net/tcp.h | 6 ++ include/net/tcp_ao.h | 24 +++++ net/ipv4/syncookies.c | 2 + net/ipv4/tcp_ao.c | 264 ++++++++++++++++++++++++++++++++++++++++++++--- net/ipv4/tcp_input.c | 15 +++ net/ipv4/tcp_ipv4.c | 66 ++++++++++-- net/ipv4/tcp_minisocks.c | 10 ++ net/ipv4/tcp_output.c | 37 ++++--- net/ipv6/syncookies.c | 2 + net/ipv6/tcp_ao.c | 38 ++++++- net/ipv6/tcp_ipv6.c | 75 +++++++++++--- 12 files changed, 506 insertions(+), 51 deletions(-) (limited to 'net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index eec6e7e5312e..ec4e9367f5b0 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -166,6 +166,11 @@ struct tcp_request_sock { * after data-in-SYN. */ u8 syn_tos; +#ifdef CONFIG_TCP_AO + u8 ao_keyid; + u8 ao_rcv_next; + u8 maclen; +#endif }; static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) @@ -173,6 +178,19 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) return (struct tcp_request_sock *)req; } +static inline bool tcp_rsk_used_ao(const struct request_sock *req) +{ + /* The real length of MAC is saved in the request socket, + * signing anything with zero-length makes no sense, so here is + * a little hack.. + */ +#ifndef CONFIG_TCP_AO + return false; +#else + return tcp_rsk(req)->maclen != 0; +#endif +} + #define TCP_RMEM_TO_WIN_SCALE 8 struct tcp_sock { diff --git a/include/net/tcp.h b/include/net/tcp.h index 17b3ecd2760f..d0bea102b523 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2216,6 +2216,12 @@ struct tcp_request_sock_ops { const struct sock *sk, const struct sk_buff *skb); #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_key *(*ao_lookup)(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid); + int (*ao_calc_key)(struct tcp_ao_key *mkt, u8 *key, struct request_sock *sk); +#endif #ifdef CONFIG_SYN_COOKIES __u32 (*cookie_init_seq)(const struct sk_buff *skb, __u16 *mss); diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 971d7edcda9c..d2c1ee8bf7b0 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -123,6 +123,9 @@ int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, sockptr_t optval, int optlen); struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, int sndid, int rcvid); +int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, + struct request_sock *req, struct sk_buff *skb, + int family); int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, unsigned int len, struct tcp_sigpool *hp); void tcp_ao_destroy_sock(struct sock *sk, bool twsk); @@ -147,6 +150,11 @@ struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, const struct sock *sk, __be32 sisn, __be32 disn, bool send); +int tcp_v4_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req); +struct tcp_ao_key *tcp_v4_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid); int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, const struct sock *sk, const struct sk_buff *skb, const u8 *tkey, int hash_offset, u32 sne); @@ -154,11 +162,21 @@ int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp, const struct in6_addr *daddr, const struct in6_addr *saddr, int nbytes); +int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, __be32 sisn, __be32 disn); int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, const struct sock *sk, __be32 sisn, __be32 disn, bool send); +int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req); +struct tcp_ao_key *tcp_v6_ao_do_lookup(const struct sock *sk, + const struct in6_addr *addr, + int sndid, int rcvid); struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid); +struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid); int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, const struct sock *sk, const struct sk_buff *skb, const u8 *tkey, int hash_offset, u32 sne); @@ -178,6 +196,12 @@ static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, return 0; } +static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, + struct tcp_request_sock *treq, + unsigned short int family) +{ +} + static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, const union tcp_ao_addr *addr, int family, int sndid, int rcvid) { diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c64334363230..0681d3e82b11 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -400,6 +400,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack = 0; treq->tfo_listener = false; + tcp_ao_syncookie(sk, skb, treq, AF_INET); + if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 7c4e2f42845a..68d81704e14e 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -169,6 +169,23 @@ static void tcp_ao_link_mkt(struct tcp_ao_info *ao, struct tcp_ao_key *mkt) hlist_add_head_rcu(&mkt->node, &ao->head); } +static struct tcp_ao_key *tcp_ao_copy_key(struct sock *sk, + struct tcp_ao_key *key) +{ + struct tcp_ao_key *new_key; + + new_key = sock_kmalloc(sk, tcp_ao_sizeof_key(key), + GFP_ATOMIC); + if (!new_key) + return NULL; + + *new_key = *key; + INIT_HLIST_NODE(&new_key->node); + tcp_sigpool_get(new_key->tcp_sigpool_id); + + return new_key; +} + static void tcp_ao_key_free_rcu(struct rcu_head *head) { struct tcp_ao_key *key = container_of(head, struct tcp_ao_key, rcu); @@ -290,6 +307,42 @@ static int tcp_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, return -EOPNOTSUPP; } +int tcp_v4_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + return tcp_v4_ao_calc_key(mkt, key, + ireq->ir_loc_addr, ireq->ir_rmt_addr, + htons(ireq->ir_num), ireq->ir_rmt_port, + htonl(tcp_rsk(req)->snt_isn), + htonl(tcp_rsk(req)->rcv_isn)); +} + +static int tcp_v4_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, + __be32 sisn, __be32 disn) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + + return tcp_v4_ao_calc_key(mkt, key, iph->saddr, iph->daddr, + th->source, th->dest, sisn, disn); +} + +static int tcp_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, + __be32 sisn, __be32 disn, int family) +{ + if (family == AF_INET) + return tcp_v4_ao_calc_key_skb(mkt, key, skb, sisn, disn); +#if IS_ENABLED(CONFIG_IPV6) + else if (family == AF_INET6) + return tcp_v6_ao_calc_key_skb(mkt, key, skb, sisn, disn); +#endif + return -EAFNOSUPPORT; +} + static int tcp_v4_ao_hash_pseudoheader(struct tcp_sigpool *hp, __be32 daddr, __be32 saddr, int nbytes) @@ -515,6 +568,16 @@ int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, tkey, hash_offset, sne); } +struct tcp_ao_key *tcp_v4_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid) +{ + union tcp_ao_addr *addr = + (union tcp_ao_addr *)&inet_rsk(req)->ir_rmt_addr; + + return tcp_ao_do_lookup(sk, addr, AF_INET, sndid, rcvid); +} + struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid) { @@ -528,7 +591,7 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, struct tcp_ao_key **key, char **traffic_key, bool *allocated_traffic_key, u8 *keyid, u32 *sne) { - struct tcp_ao_key *rnext_key; + const struct tcphdr *th = tcp_hdr(skb); struct tcp_ao_info *ao_info; *allocated_traffic_key = false; @@ -543,23 +606,62 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, return -ENOTCONN; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { - return -1; + unsigned int family = READ_ONCE(sk->sk_family); + union tcp_ao_addr *addr; + __be32 disn, sisn; - if (sk->sk_state == TCP_TIME_WAIT) - ao_info = rcu_dereference(tcp_twsk(sk)->ao_info); - else + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); + + sisn = htonl(tcp_rsk(req)->rcv_isn); + disn = htonl(tcp_rsk(req)->snt_isn); + *sne = 0; + } else { + sisn = th->seq; + disn = 0; + } + if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) + addr = (union tcp_md5_addr *)&ipv6_hdr(skb)->saddr; + else + addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6 && ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + family = AF_INET; +#endif + + sk = sk_const_to_full_sk(sk); ao_info = rcu_dereference(tcp_sk(sk)->ao_info); - if (!ao_info) - return -ENOENT; + if (!ao_info) + return -ENOENT; + *key = tcp_ao_do_lookup(sk, addr, family, -1, aoh->rnext_keyid); + if (!*key) + return -ENOENT; + *traffic_key = kmalloc(tcp_ao_digest_size(*key), GFP_ATOMIC); + if (!*traffic_key) + return -ENOMEM; + *allocated_traffic_key = true; + if (tcp_ao_calc_key_skb(*key, *traffic_key, skb, + sisn, disn, family)) + return -1; + *keyid = (*key)->rcvid; + } else { + struct tcp_ao_key *rnext_key; - *key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); - if (!*key) - return -ENOENT; - *traffic_key = snd_other_key(*key); - rnext_key = READ_ONCE(ao_info->rnext_key); - *keyid = rnext_key->rcvid; - *sne = 0; + if (sk->sk_state == TCP_TIME_WAIT) + ao_info = rcu_dereference(tcp_twsk(sk)->ao_info); + else + ao_info = rcu_dereference(tcp_sk(sk)->ao_info); + if (!ao_info) + return -ENOENT; + *key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); + if (!*key) + return -ENOENT; + *traffic_key = snd_other_key(*key); + rnext_key = READ_ONCE(ao_info->rnext_key); + *keyid = rnext_key->rcvid; + *sne = 0; + } return 0; } @@ -597,6 +699,46 @@ int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, return 0; } +static struct tcp_ao_key *tcp_ao_inbound_lookup(unsigned short int family, + const struct sock *sk, const struct sk_buff *skb, + int sndid, int rcvid) +{ + if (family == AF_INET) { + const struct iphdr *iph = ip_hdr(skb); + + return tcp_ao_do_lookup(sk, (union tcp_ao_addr *)&iph->saddr, + AF_INET, sndid, rcvid); + } else { + const struct ipv6hdr *iph = ipv6_hdr(skb); + + return tcp_ao_do_lookup(sk, (union tcp_ao_addr *)&iph->saddr, + AF_INET6, sndid, rcvid); + } +} + +void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, + struct tcp_request_sock *treq, + unsigned short int family) +{ + const struct tcphdr *th = tcp_hdr(skb); + const struct tcp_ao_hdr *aoh; + struct tcp_ao_key *key; + + treq->maclen = 0; + + if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh) + return; + + key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid); + if (!key) + /* Key not found, continue without TCP-AO */ + return; + + treq->ao_rcv_next = aoh->keyid; + treq->ao_keyid = aoh->rnext_keyid; + treq->maclen = tcp_ao_maclen(key); +} + static int tcp_ao_cache_traffic_keys(const struct sock *sk, struct tcp_ao_info *ao, struct tcp_ao_key *ao_key) @@ -704,6 +846,100 @@ void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb) tcp_ao_cache_traffic_keys(sk, ao, key); } +int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, + struct request_sock *req, struct sk_buff *skb, + int family) +{ + struct tcp_ao_key *key, *new_key, *first_key; + struct tcp_ao_info *new_ao, *ao; + struct hlist_node *key_head; + union tcp_ao_addr *addr; + bool match = false; + int ret = -ENOMEM; + + ao = rcu_dereference(tcp_sk(sk)->ao_info); + if (!ao) + return 0; + + /* New socket without TCP-AO on it */ + if (!tcp_rsk_used_ao(req)) + return 0; + + new_ao = tcp_ao_alloc_info(GFP_ATOMIC); + if (!new_ao) + return -ENOMEM; + new_ao->lisn = htonl(tcp_rsk(req)->snt_isn); + new_ao->risn = htonl(tcp_rsk(req)->rcv_isn); + new_ao->ao_required = ao->ao_required; + + if (family == AF_INET) { + addr = (union tcp_ao_addr *)&newsk->sk_daddr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + addr = (union tcp_ao_addr *)&newsk->sk_v6_daddr; +#endif + } else { + ret = -EAFNOSUPPORT; + goto free_ao; + } + + hlist_for_each_entry_rcu(key, &ao->head, node) { + if (tcp_ao_key_cmp(key, addr, key->prefixlen, family, -1, -1)) + continue; + + new_key = tcp_ao_copy_key(newsk, key); + if (!new_key) + goto free_and_exit; + + tcp_ao_cache_traffic_keys(newsk, new_ao, new_key); + tcp_ao_link_mkt(new_ao, new_key); + match = true; + } + + if (!match) { + /* RFC5925 (7.4.1) specifies that the TCP-AO status + * of a connection is determined on the initial SYN. + * At this point the connection was TCP-AO enabled, so + * it can't switch to being unsigned if peer's key + * disappears on the listening socket. + */ + ret = -EKEYREJECTED; + goto free_and_exit; + } + + key_head = rcu_dereference(hlist_first_rcu(&new_ao->head)); + first_key = hlist_entry_safe(key_head, struct tcp_ao_key, node); + + key = tcp_ao_established_key(new_ao, tcp_rsk(req)->ao_keyid, -1); + if (key) + new_ao->current_key = key; + else + new_ao->current_key = first_key; + + /* set rnext_key */ + key = tcp_ao_established_key(new_ao, -1, tcp_rsk(req)->ao_rcv_next); + if (key) + new_ao->rnext_key = key; + else + new_ao->rnext_key = first_key; + + sk_gso_disable(newsk); + rcu_assign_pointer(tcp_sk(newsk)->ao_info, new_ao); + + return 0; + +free_and_exit: + hlist_for_each_entry_safe(key, key_head, &new_ao->head, node) { + hlist_del(&key->node); + tcp_sigpool_release(key->tcp_sigpool_id); + atomic_sub(tcp_ao_sizeof_key(key), &newsk->sk_omem_alloc); + kfree_sensitive(key); + } +free_ao: + kfree(new_ao); + return ret; +} + static bool tcp_ao_can_set_current_rnext(struct sock *sk) { /* There aren't current/rnext keys on TCP_LISTEN sockets */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fc42b172abf6..de64c0c2fb69 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7045,6 +7045,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct flowi fl; u8 syncookies; +#ifdef CONFIG_TCP_AO + const struct tcp_ao_hdr *aoh; +#endif + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); /* TW buckets are converted to open requests without @@ -7131,6 +7135,17 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, inet_rsk(req)->ecn_ok = 0; } +#ifdef CONFIG_TCP_AO + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + goto drop_and_release; /* Invalid TCP options */ + if (aoh) { + tcp_rsk(req)->maclen = aoh->length - sizeof(struct tcp_ao_hdr); + tcp_rsk(req)->ao_rcv_next = aoh->keyid; + tcp_rsk(req)->ao_keyid = aoh->rnext_keyid; + } else { + tcp_rsk(req)->maclen = 0; + } +#endif tcp_rsk(req)->snt_isn = isn; tcp_rsk(req)->txhash = net_tx_rndhash(); tcp_rsk(req)->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a78112d78d06..2bd56b57f3c9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1072,13 +1072,47 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt; - /* RFC 7323 2.3 - * The window field (SEG.WND) of every outgoing segment, with the - * exception of segments, MUST be right-shifted by - * Rcv.Wind.Shift bits: - */ +#ifdef CONFIG_TCP_AO + if (tcp_rsk_used_ao(req)) { + const union tcp_md5_addr *addr; + const struct tcp_ao_hdr *aoh; + + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + return; + if (!aoh) + return; + + addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; + key.ao_key = tcp_ao_do_lookup(sk, addr, AF_INET, + aoh->rnext_keyid, -1); + if (unlikely(!key.ao_key)) { + /* Send ACK with any matching MKT for the peer */ + key.ao_key = tcp_ao_do_lookup(sk, addr, AF_INET, -1, -1); + /* Matching key disappeared (user removed the key?) + * let the handshake timeout. + */ + if (!key.ao_key) { + net_info_ratelimited("TCP-AO key for (%pI4, %d)->(%pI4, %d) suddenly disappeared, won't ACK new connection\n", + addr, + ntohs(tcp_hdr(skb)->source), + &ip_hdr(skb)->daddr, + ntohs(tcp_hdr(skb)->dest)); + return; + } + } + key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); + if (!key.traffic_key) + return; + + key.type = TCP_KEY_AO; + key.rcv_next = aoh->keyid; + tcp_v4_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); +#else + if (0) { +#endif #ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key)) { + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { const union tcp_md5_addr *addr; int l3index; @@ -1087,8 +1121,14 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, key.md5_key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); if (key.md5_key) key.type = TCP_KEY_MD5; - } #endif + } + + /* RFC 7323 2.3 + * The window field (SEG.WND) of every outgoing segment, with the + * exception of segments, MUST be right-shifted by + * Rcv.Wind.Shift bits: + */ tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, @@ -1098,6 +1138,8 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos, READ_ONCE(tcp_rsk(req)->txhash)); + if (tcp_key_is_ao(&key)) + kfree(key.traffic_key); } /* @@ -1636,6 +1678,10 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .req_md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, #endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v4_ao_lookup_rsk, + .ao_calc_key = tcp_v4_ao_calc_key_rsk, +#endif #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v4_init_sequence, #endif @@ -1737,12 +1783,16 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, /* Copy over the MD5 key from the original socket */ addr = (union tcp_md5_addr *)&newinet->inet_daddr; key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); - if (key) { + if (key && !tcp_rsk_used_ao(req)) { if (tcp_md5_key_copy(newsk, addr, AF_INET, 32, l3index, key)) goto put_and_exit; sk_gso_disable(newsk); } #endif +#ifdef CONFIG_TCP_AO + if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET)) + goto put_and_exit; /* OOM, release back memory */ +#endif if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6810cf65a322..8d941a6e066b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -506,6 +506,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, const struct tcp_sock *oldtp; struct tcp_sock *newtp; u32 seq; +#ifdef CONFIG_TCP_AO + struct tcp_ao_key *ao_key; +#endif if (!newsk) return NULL; @@ -594,6 +597,13 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ #endif +#ifdef CONFIG_TCP_AO + newtp->ao_info = NULL; + ao_key = treq->af_specific->ao_lookup(sk, req, + tcp_rsk(req)->ao_keyid, -1); + if (ao_key) + newtp->tcp_header_len += tcp_ao_len(ao_key); + #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index de546313a214..d44ef88025fb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -615,6 +615,7 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, * (but it may well be that other scenarios fail similarly). */ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, + const struct tcp_request_sock *tcprsk, struct tcp_out_options *opts, struct tcp_key *key) { @@ -629,20 +630,28 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, ptr += 4; } else if (tcp_key_is_ao(key)) { #ifdef CONFIG_TCP_AO - struct tcp_ao_key *rnext_key; - struct tcp_ao_info *ao_info; - u8 maclen; + u8 maclen = tcp_ao_maclen(key->ao_key); - ao_info = rcu_dereference_check(tp->ao_info, + if (tcprsk) { + u8 aolen = maclen + sizeof(struct tcp_ao_hdr); + + *ptr++ = htonl((TCPOPT_AO << 24) | (aolen << 16) | + (tcprsk->ao_keyid << 8) | + (tcprsk->ao_rcv_next)); + } else { + struct tcp_ao_key *rnext_key; + struct tcp_ao_info *ao_info; + + ao_info = rcu_dereference_check(tp->ao_info, lockdep_sock_is_held(&tp->inet_conn.icsk_inet.sk)); - rnext_key = READ_ONCE(ao_info->rnext_key); - if (WARN_ON_ONCE(!rnext_key)) - goto out_ao; - maclen = tcp_ao_maclen(key->ao_key); - *ptr++ = htonl((TCPOPT_AO << 24) | - (tcp_ao_len(key->ao_key) << 16) | - (key->ao_key->sndid << 8) | - (rnext_key->rcvid)); + rnext_key = READ_ONCE(ao_info->rnext_key); + if (WARN_ON_ONCE(!rnext_key)) + goto out_ao; + *ptr++ = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + (rnext_key->rcvid)); + } opts->hash_location = (__u8 *)ptr; ptr += maclen / sizeof(*ptr); if (unlikely(maclen % sizeof(*ptr))) { @@ -1386,7 +1395,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, th->window = htons(min(tp->rcv_wnd, 65535U)); } - tcp_options_write(th, tp, &opts, &key); + tcp_options_write(th, tp, NULL, &opts, &key); if (tcp_key_is_md5(&key)) { #ifdef CONFIG_TCP_MD5SIG @@ -3729,7 +3738,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rsk_rcv_wnd, 65535U)); - tcp_options_write(th, NULL, &opts, &key); + tcp_options_write(th, NULL, NULL, &opts, &key); th->doff = (tcp_header_size >> 2); TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 5014aa663452..ad7a8caa7b2a 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -214,6 +214,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->snt_isn = cookie; treq->ts_off = 0; treq->txhash = net_tx_rndhash(); + tcp_ao_syncookie(sk, skb, treq, AF_INET6); + if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c index d08735b6f3c5..c9a6fa84f6ce 100644 --- a/net/ipv6/tcp_ao.c +++ b/net/ipv6/tcp_ao.c @@ -49,6 +49,17 @@ static int tcp_v6_ao_calc_key(struct tcp_ao_key *mkt, u8 *key, return err; } +int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, + __be32 sisn, __be32 disn) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + + return tcp_v6_ao_calc_key(mkt, key, &iph->saddr, &iph->daddr, + th->source, th->dest, sisn, disn); +} + int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, const struct sock *sk, __be32 sisn, __be32 disn, bool send) @@ -63,9 +74,21 @@ int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, htons(sk->sk_num), disn, sisn); } -static struct tcp_ao_key *tcp_v6_ao_do_lookup(const struct sock *sk, - const struct in6_addr *addr, - int sndid, int rcvid) +int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + return tcp_v6_ao_calc_key(mkt, key, + &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr, + htons(ireq->ir_num), ireq->ir_rmt_port, + htonl(tcp_rsk(req)->snt_isn), + htonl(tcp_rsk(req)->rcv_isn)); +} + +struct tcp_ao_key *tcp_v6_ao_do_lookup(const struct sock *sk, + const struct in6_addr *addr, + int sndid, int rcvid) { return tcp_ao_do_lookup(sk, (union tcp_ao_addr *)addr, AF_INET6, sndid, rcvid); @@ -80,6 +103,15 @@ struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, return tcp_v6_ao_do_lookup(sk, addr, sndid, rcvid); } +struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid) +{ + struct in6_addr *addr = &inet_rsk(req)->ir_v6_rmt_addr; + + return tcp_v6_ao_do_lookup(sk, addr, sndid, rcvid); +} + int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp, const struct in6_addr *daddr, const struct in6_addr *saddr, int nbytes) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e6c3617976f0..b08b177847da 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -836,6 +836,10 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v6_ao_lookup_rsk, + .ao_calc_key = tcp_v6_ao_calc_key_rsk, +#endif #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v6_init_sequence, #endif @@ -1192,16 +1196,54 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, { struct tcp_key key = {}; +#ifdef CONFIG_TCP_AO + if (tcp_rsk_used_ao(req)) { + const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; + const struct tcp_ao_hdr *aoh; + int l3index; + + l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + return; + if (!aoh) + return; + key.ao_key = tcp_v6_ao_do_lookup(sk, addr, aoh->rnext_keyid, -1); + if (unlikely(!key.ao_key)) { + /* Send ACK with any matching MKT for the peer */ + key.ao_key = tcp_v6_ao_do_lookup(sk, addr, -1, -1); + /* Matching key disappeared (user removed the key?) + * let the handshake timeout. + */ + if (!key.ao_key) { + net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", + addr, + ntohs(tcp_hdr(skb)->source), + &ipv6_hdr(skb)->daddr, + ntohs(tcp_hdr(skb)->dest)); + return; + } + } + key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); + if (!key.traffic_key) + return; + + key.type = TCP_KEY_AO; + key.rcv_next = aoh->keyid; + tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); +#else + if (0) { +#endif #ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key)) { + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index); if (key.md5_key) key.type = TCP_KEY_MD5; - } #endif + } /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. @@ -1220,6 +1262,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0, READ_ONCE(sk->sk_priority), READ_ONCE(tcp_rsk(req)->txhash)); + if (tcp_key_is_ao(&key)) + kfree(key.traffic_key); } @@ -1449,19 +1493,26 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * #ifdef CONFIG_TCP_MD5SIG l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); - /* Copy over the MD5 key from the original socket */ - key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); - if (key) { - const union tcp_md5_addr *addr; - - addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; - if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { - inet_csk_prepare_forced_close(newsk); - tcp_done(newsk); - goto out; + if (!tcp_rsk_used_ao(req)) { + /* Copy over the MD5 key from the original socket */ + key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); + if (key) { + const union tcp_md5_addr *addr; + + addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; + if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); + goto out; + } } } #endif +#ifdef CONFIG_TCP_AO + /* Copy over tcp_ao_info if any */ + if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) + goto out; /* OOM */ +#endif if (__inet_inherit_port(sk, newsk) < 0) { inet_csk_prepare_forced_close(newsk); -- cgit v1.2.3 From 9427c6aa3ec92f66b3d38f5d5f7af6b94b648a66 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:03 +0100 Subject: net/tcp: Sign SYN-ACK segments with TCP-AO Similarly to RST segments, wire SYN-ACKs to TCP-AO. tcp_rsk_used_ao() is handy here to check if the request socket used AO and needs a signature on the outgoing segments. Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +++ include/net/tcp_ao.h | 6 +++++ net/ipv4/tcp_ao.c | 22 ++++++++++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 72 +++++++++++++++++++++++++++++++++++++++------------ net/ipv6/tcp_ao.c | 22 ++++++++++++++++ net/ipv6/tcp_ipv6.c | 1 + 7 files changed, 111 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index d0bea102b523..c7f80cc94d71 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2221,6 +2221,9 @@ struct tcp_request_sock_ops { struct request_sock *req, int sndid, int rcvid); int (*ao_calc_key)(struct tcp_ao_key *mkt, u8 *key, struct request_sock *sk); + int (*ao_synack_hash)(char *ao_hash, struct tcp_ao_key *mkt, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne); #endif #ifdef CONFIG_SYN_COOKIES __u32 (*cookie_init_seq)(const struct sk_buff *skb, diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index d2c1ee8bf7b0..1d69978e349a 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -147,6 +147,9 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid); +int tcp_v4_ao_synack_hash(char *ao_hash, struct tcp_ao_key *mkt, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne); int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, const struct sock *sk, __be32 sisn, __be32 disn, bool send); @@ -181,6 +184,9 @@ int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, const struct sock *sk, const struct sk_buff *skb, const u8 *tkey, int hash_offset, u32 sne); int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +int tcp_v6_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne); void tcp_ao_established(struct sock *sk); void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb); void tcp_ao_connect_init(struct sock *sk); diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 68d81704e14e..de3710758d55 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -568,6 +568,28 @@ int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, tkey, hash_offset, sne); } +int tcp_v4_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne) +{ + void *hash_buf = NULL; + int err; + + hash_buf = kmalloc(tcp_ao_digest_size(ao_key), GFP_ATOMIC); + if (!hash_buf) + return -ENOMEM; + + err = tcp_v4_ao_calc_key_rsk(ao_key, hash_buf, req); + if (err) + goto out; + + err = tcp_ao_hash_skb(AF_INET, ao_hash, ao_key, req_to_sk(req), skb, + hash_buf, hash_offset, sne); +out: + kfree(hash_buf); + return err; +} + struct tcp_ao_key *tcp_v4_ao_lookup_rsk(const struct sock *sk, struct request_sock *req, int sndid, int rcvid) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2bd56b57f3c9..bdf0224ae827 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1681,6 +1681,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v4_ao_lookup_rsk, .ao_calc_key = tcp_v4_ao_calc_key_rsk, + .ao_synack_hash = tcp_v4_ao_synack_hash, #endif #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v4_init_sequence, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d44ef88025fb..f558c054cf6e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -886,7 +886,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, struct request_sock *req, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, - const struct tcp_md5sig_key *md5, + const struct tcp_key *key, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type, struct sk_buff *syn_skb) @@ -894,8 +894,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; -#ifdef CONFIG_TCP_MD5SIG - if (md5) { + if (tcp_key_is_md5(key)) { opts->options |= OPTION_MD5; remaining -= TCPOLEN_MD5SIG_ALIGNED; @@ -906,8 +905,11 @@ static unsigned int tcp_synack_options(const struct sock *sk, */ if (synack_type != TCP_SYNACK_COOKIE) ireq->tstamp_ok &= !ireq->sack_ok; + } else if (tcp_key_is_ao(key)) { + opts->options |= OPTION_AO; + remaining -= tcp_ao_len(key->ao_key); + ireq->tstamp_ok &= !ireq->sack_ok; } -#endif /* We always send an MSS option. */ opts->mss = mss; @@ -3653,7 +3655,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, { struct inet_request_sock *ireq = inet_rsk(req); const struct tcp_sock *tp = tcp_sk(sk); - struct tcp_md5sig_key *md5 = NULL; struct tcp_out_options opts; struct tcp_key key = {}; struct sk_buff *skb; @@ -3707,18 +3708,48 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); } -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_lock(); - md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); - if (md5) - key.type = TCP_KEY_MD5; #endif + if (tcp_rsk_used_ao(req)) { +#ifdef CONFIG_TCP_AO + struct tcp_ao_key *ao_key = NULL; + u8 maclen = tcp_rsk(req)->maclen; + u8 keyid = tcp_rsk(req)->ao_keyid; + + ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req), + keyid, -1); + /* If there is no matching key - avoid sending anything, + * especially usigned segments. It could try harder and lookup + * for another peer-matching key, but the peer has requested + * ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here. + */ + if (unlikely(!ao_key || tcp_ao_maclen(ao_key) != maclen)) { + u8 key_maclen = ao_key ? tcp_ao_maclen(ao_key) : 0; + + rcu_read_unlock(); + kfree_skb(skb); + net_warn_ratelimited("TCP-AO: the keyid %u with maclen %u|%u from SYN packet is not present - not sending SYNACK\n", + keyid, maclen, key_maclen); + return NULL; + } + key.ao_key = ao_key; + key.type = TCP_KEY_AO; +#endif + } else { +#ifdef CONFIG_TCP_MD5SIG + key.md5_key = tcp_rsk(req)->af_specific->req_md5_lookup(sk, + req_to_sk(req)); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4); /* bpf program will be interested in the tcp_flags */ TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK; - tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, - foc, synack_type, - syn_skb) + sizeof(*th); + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, + &key, foc, synack_type, syn_skb) + + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -3738,15 +3769,24 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rsk_rcv_wnd, 65535U)); - tcp_options_write(th, NULL, NULL, &opts, &key); + tcp_options_write(th, NULL, tcp_rsk(req), &opts, &key); th->doff = (tcp_header_size >> 2); TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); -#ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ - if (md5) + if (tcp_key_is_md5(&key)) { +#ifdef CONFIG_TCP_MD5SIG tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, - md5, req_to_sk(req), skb); + key.md5_key, req_to_sk(req), skb); +#endif + } else if (tcp_key_is_ao(&key)) { +#ifdef CONFIG_TCP_AO + tcp_rsk(req)->af_specific->ao_synack_hash(opts.hash_location, + key.ao_key, req, skb, + opts.hash_location - (u8 *)th, 0); +#endif + } +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_unlock(); #endif diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c index c9a6fa84f6ce..99753e12c08c 100644 --- a/net/ipv6/tcp_ao.c +++ b/net/ipv6/tcp_ao.c @@ -144,3 +144,25 @@ int tcp_v6_parse_ao(struct sock *sk, int cmd, { return tcp_parse_ao(sk, cmd, AF_INET6, optval, optlen); } + +int tcp_v6_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne) +{ + void *hash_buf = NULL; + int err; + + hash_buf = kmalloc(tcp_ao_digest_size(ao_key), GFP_ATOMIC); + if (!hash_buf) + return -ENOMEM; + + err = tcp_v6_ao_calc_key_rsk(ao_key, hash_buf, req); + if (err) + goto out; + + err = tcp_ao_hash_skb(AF_INET6, ao_hash, ao_key, req_to_sk(req), skb, + hash_buf, hash_offset, sne); +out: + kfree(hash_buf); + return err; +} diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b08b177847da..8c5c96187a72 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -839,6 +839,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v6_ao_lookup_rsk, .ao_calc_key = tcp_v6_ao_calc_key_rsk, + .ao_synack_hash = tcp_v6_ao_synack_hash, #endif #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v6_init_sequence, -- cgit v1.2.3 From 0a3a809089eb1d4a0a2fd0c16b520d603988c859 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:04 +0100 Subject: net/tcp: Verify inbound TCP-AO signed segments Now there is a common function to verify signature on TCP segments: tcp_inbound_hash(). It has checks for all possible cross-interactions with MD5 signs as well as with unsigned segments. The rules from RFC5925 are: (1) Any TCP segment can have at max only one signature. (2) TCP connections can't switch between using TCP-MD5 and TCP-AO. (3) TCP-AO connections can't stop using AO, as well as unsigned connections can't suddenly start using AO. Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/dropreason-core.h | 17 +++++ include/net/tcp.h | 53 +++++++++++++++- include/net/tcp_ao.h | 14 +++++ net/ipv4/tcp.c | 39 +++--------- net/ipv4/tcp_ao.c | 142 ++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 10 +-- net/ipv6/tcp_ao.c | 9 +-- net/ipv6/tcp_ipv6.c | 11 ++-- 8 files changed, 248 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 3af4464a9c5b..7637137ae33e 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -24,6 +24,10 @@ FN(TCP_MD5NOTFOUND) \ FN(TCP_MD5UNEXPECTED) \ FN(TCP_MD5FAILURE) \ + FN(TCP_AONOTFOUND) \ + FN(TCP_AOUNEXPECTED) \ + FN(TCP_AOKEYNOTFOUND) \ + FN(TCP_AOFAILURE) \ FN(SOCKET_BACKLOG) \ FN(TCP_FLAGS) \ FN(TCP_ZEROWINDOW) \ @@ -163,6 +167,19 @@ enum skb_drop_reason { * to LINUX_MIB_TCPMD5FAILURE */ SKB_DROP_REASON_TCP_MD5FAILURE, + /** + * @SKB_DROP_REASON_TCP_AONOTFOUND: no TCP-AO hash and one was expected + */ + SKB_DROP_REASON_TCP_AONOTFOUND, + /** + * @SKB_DROP_REASON_TCP_AOUNEXPECTED: TCP-AO hash is present and it + * was not expected. + */ + SKB_DROP_REASON_TCP_AOUNEXPECTED, + /** @SKB_DROP_REASON_TCP_AOKEYNOTFOUND: TCP-AO key is unknown */ + SKB_DROP_REASON_TCP_AOKEYNOTFOUND, + /** @SKB_DROP_REASON_TCP_AOFAILURE: TCP-AO hash is wrong */ + SKB_DROP_REASON_TCP_AOFAILURE, /** * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog ( * see LINUX_MIB_TCPBACKLOGDROP) diff --git a/include/net/tcp.h b/include/net/tcp.h index c7f80cc94d71..8e1f835bad22 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1809,7 +1809,7 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, - int family, int dif, int sdif); + int family, int l3index, const __u8 *hash_location); #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) @@ -1831,7 +1831,7 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, static inline enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, - int family, int dif, int sdif) + int family, int l3index, const __u8 *hash_location) { return SKB_NOT_DROPPED_YET; } @@ -2730,4 +2730,53 @@ static inline bool tcp_ao_required(struct sock *sk, const void *saddr, return false; } +/* Called with rcu_read_lock() */ +static inline enum skb_drop_reason +tcp_inbound_hash(struct sock *sk, const struct request_sock *req, + const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif) +{ + const struct tcphdr *th = tcp_hdr(skb); + const struct tcp_ao_hdr *aoh; + const __u8 *md5_location; + int l3index; + + /* Invalid option or two times meet any of auth options */ + if (tcp_parse_auth_options(th, &md5_location, &aoh)) + return SKB_DROP_REASON_TCP_AUTH_HDR; + + if (req) { + if (tcp_rsk_used_ao(req) != !!aoh) + return SKB_DROP_REASON_TCP_AOFAILURE; + } + + /* sdif set, means packet ingressed via a device + * in an L3 domain and dif is set to the l3mdev + */ + l3index = sdif ? dif : 0; + + /* Fast path: unsigned segments */ + if (likely(!md5_location && !aoh)) { + /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid + * for the remote peer. On TCP-AO established connection + * the last key is impossible to remove, so there's + * always at least one current_key. + */ + if (tcp_ao_required(sk, saddr, family)) + return SKB_DROP_REASON_TCP_AONOTFOUND; + if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + return SKB_DROP_REASON_TCP_MD5NOTFOUND; + } + return SKB_NOT_DROPPED_YET; + } + + if (aoh) + return tcp_inbound_ao_hash(sk, skb, family, req, aoh); + + return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, + l3index, md5_location); +} + #endif /* _TCP_H */ diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 1d69978e349a..1c7c0a5d1877 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -111,6 +111,9 @@ struct tcp6_ao_context { }; struct tcp_sigpool; +#define TCP_AO_ESTABLISHED (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | \ + TCPF_CLOSE | TCPF_CLOSE_WAIT | \ + TCPF_LAST_ACK | TCPF_CLOSING) int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, struct tcp_ao_key *key, struct tcphdr *th, @@ -130,6 +133,10 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, unsigned int len, struct tcp_sigpool *hp); void tcp_ao_destroy_sock(struct sock *sk, bool twsk); void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp); +enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, + const struct sk_buff *skb, unsigned short int family, + const struct request_sock *req, + const struct tcp_ao_hdr *aoh); struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, const union tcp_ao_addr *addr, int family, int sndid, int rcvid); @@ -208,6 +215,13 @@ static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, { } +static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, + const struct sk_buff *skb, unsigned short int family, + const struct request_sock *req, const struct tcp_ao_hdr *aoh) +{ + return SKB_NOT_DROPPED_YET; +} + static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, const union tcp_ao_addr *addr, int family, int sndid, int rcvid) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 369e2a41bc1b..eb71212a09d8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4375,42 +4375,23 @@ EXPORT_SYMBOL(tcp_md5_hash_key); enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, - int family, int dif, int sdif) + int family, int l3index, const __u8 *hash_location) { - /* - * This gets called for each TCP segment that arrives - * so we want to be efficient. + /* This gets called for each TCP segment that has TCP-MD5 option. * We have 3 drop cases: * o No MD5 hash and one expected. * o MD5 hash and we're not expecting one. * o MD5 hash and its wrong. */ - const __u8 *hash_location = NULL; - struct tcp_md5sig_key *hash_expected; const struct tcphdr *th = tcp_hdr(skb); const struct tcp_sock *tp = tcp_sk(sk); - int genhash, l3index; + struct tcp_md5sig_key *key; u8 newhash[16]; + int genhash; - /* sdif set, means packet ingressed via a device - * in an L3 domain and dif is set to the l3mdev - */ - l3index = sdif ? dif : 0; - - hash_expected = tcp_md5_do_lookup(sk, l3index, saddr, family); - if (tcp_parse_auth_options(th, &hash_location, NULL)) - return SKB_DROP_REASON_TCP_AUTH_HDR; - - /* We've parsed the options - do we have a hash? */ - if (!hash_expected && !hash_location) - return SKB_NOT_DROPPED_YET; + key = tcp_md5_do_lookup(sk, l3index, saddr, family); - if (hash_expected && !hash_location) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - return SKB_DROP_REASON_TCP_MD5NOTFOUND; - } - - if (!hash_expected && hash_location) { + if (!key && hash_location) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } @@ -4420,14 +4401,10 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, * IPv4-mapped case. */ if (family == AF_INET) - genhash = tcp_v4_md5_hash_skb(newhash, - hash_expected, - NULL, skb); + genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); else - genhash = tp->af_specific->calc_md5_hash(newhash, - hash_expected, + genhash = tp->af_specific->calc_md5_hash(newhash, key, NULL, skb); - if (genhash || memcmp(hash_location, newhash, 16) != 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); if (family == AF_INET) { diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index de3710758d55..6c5815713b73 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -761,6 +761,148 @@ void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, treq->maclen = tcp_ao_maclen(key); } +static enum skb_drop_reason +tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, + unsigned short int family, struct tcp_ao_info *info, + const struct tcp_ao_hdr *aoh, struct tcp_ao_key *key, + u8 *traffic_key, u8 *phash, u32 sne) +{ + u8 maclen = aoh->length - sizeof(struct tcp_ao_hdr); + const struct tcphdr *th = tcp_hdr(skb); + void *hash_buf = NULL; + + if (maclen != tcp_ao_maclen(key)) + return SKB_DROP_REASON_TCP_AOFAILURE; + + hash_buf = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC); + if (!hash_buf) + return SKB_DROP_REASON_NOT_SPECIFIED; + + /* XXX: make it per-AF callback? */ + tcp_ao_hash_skb(family, hash_buf, key, sk, skb, traffic_key, + (phash - (u8 *)th), sne); + if (memcmp(phash, hash_buf, maclen)) { + kfree(hash_buf); + return SKB_DROP_REASON_TCP_AOFAILURE; + } + kfree(hash_buf); + return SKB_NOT_DROPPED_YET; +} + +enum skb_drop_reason +tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, + unsigned short int family, const struct request_sock *req, + const struct tcp_ao_hdr *aoh) +{ + const struct tcphdr *th = tcp_hdr(skb); + u8 *phash = (u8 *)(aoh + 1); /* hash goes just after the header */ + struct tcp_ao_info *info; + enum skb_drop_reason ret; + struct tcp_ao_key *key; + __be32 sisn, disn; + u8 *traffic_key; + u32 sne = 0; + + info = rcu_dereference(tcp_sk(sk)->ao_info); + if (!info) + return SKB_DROP_REASON_TCP_AOUNEXPECTED; + + if (unlikely(th->syn)) { + sisn = th->seq; + disn = 0; + } + + /* Fast-path */ + if (likely((1 << sk->sk_state) & TCP_AO_ESTABLISHED)) { + enum skb_drop_reason err; + struct tcp_ao_key *current_key; + + /* Check if this socket's rnext_key matches the keyid in the + * packet. If not we lookup the key based on the keyid + * matching the rcvid in the mkt. + */ + key = READ_ONCE(info->rnext_key); + if (key->rcvid != aoh->keyid) { + key = tcp_ao_established_key(info, -1, aoh->keyid); + if (!key) + goto key_not_found; + } + + /* Delayed retransmitted SYN */ + if (unlikely(th->syn && !th->ack)) + goto verify_hash; + + sne = 0; + /* Established socket, traffic key are cached */ + traffic_key = rcv_other_key(key); + err = tcp_ao_verify_hash(sk, skb, family, info, aoh, key, + traffic_key, phash, sne); + if (err) + return err; + current_key = READ_ONCE(info->current_key); + /* Key rotation: the peer asks us to use new key (RNext) */ + if (unlikely(aoh->rnext_keyid != current_key->sndid)) { + /* If the key is not found we do nothing. */ + key = tcp_ao_established_key(info, aoh->rnext_keyid, -1); + if (key) + /* pairs with tcp_ao_del_cmd */ + WRITE_ONCE(info->current_key, key); + } + return SKB_NOT_DROPPED_YET; + } + + /* Lookup key based on peer address and keyid. + * current_key and rnext_key must not be used on tcp listen + * sockets as otherwise: + * - request sockets would race on those key pointers + * - tcp_ao_del_cmd() allows async key removal + */ + key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid); + if (!key) + goto key_not_found; + + if (th->syn && !th->ack) + goto verify_hash; + + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { + /* Make the initial syn the likely case here */ + if (unlikely(req)) { + sne = 0; + sisn = htonl(tcp_rsk(req)->rcv_isn); + disn = htonl(tcp_rsk(req)->snt_isn); + } else if (unlikely(th->ack && !th->syn)) { + /* Possible syncookie packet */ + sisn = htonl(ntohl(th->seq) - 1); + disn = htonl(ntohl(th->ack_seq) - 1); + sne = 0; + } else if (unlikely(!th->syn)) { + /* no way to figure out initial sisn/disn - drop */ + return SKB_DROP_REASON_TCP_FLAGS; + } + } else if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { + disn = info->lisn; + if (th->syn || th->rst) + sisn = th->seq; + else + sisn = info->risn; + } else { + WARN_ONCE(1, "TCP-AO: Unexpected sk_state %d", sk->sk_state); + return SKB_DROP_REASON_TCP_AOFAILURE; + } +verify_hash: + traffic_key = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC); + if (!traffic_key) + return SKB_DROP_REASON_NOT_SPECIFIED; + tcp_ao_calc_key_skb(key, traffic_key, skb, sisn, disn, family); + ret = tcp_ao_verify_hash(sk, skb, family, info, aoh, key, + traffic_key, phash, sne); + kfree(traffic_key); + return ret; + +key_not_found: + return SKB_DROP_REASON_TCP_AOKEYNOTFOUND; +} + static int tcp_ao_cache_traffic_keys(const struct sock *sk, struct tcp_ao_info *ao, struct tcp_ao_key *ao_key) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bdf0224ae827..f39ccefa78dc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2204,9 +2204,9 @@ process: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) drop_reason = SKB_DROP_REASON_XFRM_POLICY; else - drop_reason = tcp_inbound_md5_hash(sk, skb, - &iph->saddr, &iph->daddr, - AF_INET, dif, sdif); + drop_reason = tcp_inbound_hash(sk, req, skb, + &iph->saddr, &iph->daddr, + AF_INET, dif, sdif); if (unlikely(drop_reason)) { sk_drops_add(sk, skb); reqsk_put(req); @@ -2283,8 +2283,8 @@ process: goto discard_and_relse; } - drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr, - &iph->daddr, AF_INET, dif, sdif); + drop_reason = tcp_inbound_hash(sk, NULL, skb, &iph->saddr, &iph->daddr, + AF_INET, dif, sdif); if (drop_reason) goto discard_and_relse; diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c index 99753e12c08c..8b04611c9078 100644 --- a/net/ipv6/tcp_ao.c +++ b/net/ipv6/tcp_ao.c @@ -53,11 +53,12 @@ int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, const struct sk_buff *skb, __be32 sisn, __be32 disn) { - const struct ipv6hdr *iph = ipv6_hdr(skb); - const struct tcphdr *th = tcp_hdr(skb); + const struct ipv6hdr *iph = ipv6_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); - return tcp_v6_ao_calc_key(mkt, key, &iph->saddr, &iph->daddr, - th->source, th->dest, sisn, disn); + return tcp_v6_ao_calc_key(mkt, key, &iph->saddr, + &iph->daddr, th->source, + th->dest, sisn, disn); } int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8c5c96187a72..d2724383d263 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1785,9 +1785,9 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) drop_reason = SKB_DROP_REASON_XFRM_POLICY; else - drop_reason = tcp_inbound_md5_hash(sk, skb, - &hdr->saddr, &hdr->daddr, - AF_INET6, dif, sdif); + drop_reason = tcp_inbound_hash(sk, req, skb, + &hdr->saddr, &hdr->daddr, + AF_INET6, dif, sdif); if (drop_reason) { sk_drops_add(sk, skb); reqsk_put(req); @@ -1861,8 +1861,8 @@ process: goto discard_and_relse; } - drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, - AF_INET6, dif, sdif); + drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, + AF_INET6, dif, sdif); if (drop_reason) goto discard_and_relse; @@ -2089,6 +2089,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { .ao_lookup = tcp_v6_ao_lookup, .calc_ao_hash = tcp_v4_ao_hash_skb, .ao_parse = tcp_v6_parse_ao, + .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, #endif }; #endif -- cgit v1.2.3 From af09a341dcf63b34ce742295ad1ce876246c5de2 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:05 +0100 Subject: net/tcp: Add TCP-AO segments counters Introduce segment counters that are useful for troubleshooting/debugging as well as for writing tests. Now there are global snmp counters as well as per-socket and per-key. Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/dropreason-core.h | 15 +++++++++++---- include/net/tcp.h | 15 +++++++++++---- include/net/tcp_ao.h | 10 ++++++++++ include/uapi/linux/snmp.h | 4 ++++ include/uapi/linux/tcp.h | 8 +++++++- net/ipv4/proc.c | 4 ++++ net/ipv4/tcp_ao.c | 30 +++++++++++++++++++++++++++--- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- 9 files changed, 77 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 7637137ae33e..3c70ad53a49c 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -168,17 +168,24 @@ enum skb_drop_reason { */ SKB_DROP_REASON_TCP_MD5FAILURE, /** - * @SKB_DROP_REASON_TCP_AONOTFOUND: no TCP-AO hash and one was expected + * @SKB_DROP_REASON_TCP_AONOTFOUND: no TCP-AO hash and one was expected, + * corresponding to LINUX_MIB_TCPAOREQUIRED */ SKB_DROP_REASON_TCP_AONOTFOUND, /** * @SKB_DROP_REASON_TCP_AOUNEXPECTED: TCP-AO hash is present and it - * was not expected. + * was not expected, corresponding to LINUX_MIB_TCPAOKEYNOTFOUND */ SKB_DROP_REASON_TCP_AOUNEXPECTED, - /** @SKB_DROP_REASON_TCP_AOKEYNOTFOUND: TCP-AO key is unknown */ + /** + * @SKB_DROP_REASON_TCP_AOKEYNOTFOUND: TCP-AO key is unknown, + * corresponding to LINUX_MIB_TCPAOKEYNOTFOUND + */ SKB_DROP_REASON_TCP_AOKEYNOTFOUND, - /** @SKB_DROP_REASON_TCP_AOFAILURE: TCP-AO hash is wrong */ + /** + * @SKB_DROP_REASON_TCP_AOFAILURE: TCP-AO hash is wrong, + * corresponding to LINUX_MIB_TCPAOBAD + */ SKB_DROP_REASON_TCP_AOFAILURE, /** * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog ( diff --git a/include/net/tcp.h b/include/net/tcp.h index 8e1f835bad22..50ae1ed244e5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2712,7 +2712,7 @@ static inline int tcp_parse_auth_options(const struct tcphdr *th, } static inline bool tcp_ao_required(struct sock *sk, const void *saddr, - int family) + int family, bool stat_inc) { #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; @@ -2724,8 +2724,13 @@ static inline bool tcp_ao_required(struct sock *sk, const void *saddr, return false; ao_key = tcp_ao_do_lookup(sk, saddr, family, -1, -1); - if (ao_info->ao_required || ao_key) + if (ao_info->ao_required || ao_key) { + if (stat_inc) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOREQUIRED); + atomic64_inc(&ao_info->counters.ao_required); + } return true; + } #endif return false; } @@ -2747,8 +2752,10 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, return SKB_DROP_REASON_TCP_AUTH_HDR; if (req) { - if (tcp_rsk_used_ao(req) != !!aoh) + if (tcp_rsk_used_ao(req) != !!aoh) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); return SKB_DROP_REASON_TCP_AOFAILURE; + } } /* sdif set, means packet ingressed via a device @@ -2763,7 +2770,7 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, * the last key is impossible to remove, so there's * always at least one current_key. */ - if (tcp_ao_required(sk, saddr, family)) + if (tcp_ao_required(sk, saddr, family, true)) return SKB_DROP_REASON_TCP_AONOTFOUND; if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 1c7c0a5d1877..cfb55bd9411b 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -19,6 +19,13 @@ struct tcp_ao_hdr { u8 rnext_keyid; }; +struct tcp_ao_counters { + atomic64_t pkt_good; + atomic64_t pkt_bad; + atomic64_t key_not_found; + atomic64_t ao_required; +}; + struct tcp_ao_key { struct hlist_node node; union tcp_ao_addr addr; @@ -33,6 +40,8 @@ struct tcp_ao_key { u8 rcvid; u8 maclen; struct rcu_head rcu; + atomic64_t pkt_good; + atomic64_t pkt_bad; u8 traffic_keys[]; }; @@ -81,6 +90,7 @@ struct tcp_ao_info { */ struct tcp_ao_key *current_key; struct tcp_ao_key *rnext_key; + struct tcp_ao_counters counters; u32 ao_required :1, __unused :31; __be32 lisn; diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index b2b72886cb6d..3d5ea841bffe 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -297,6 +297,10 @@ enum LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */ LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */ LINUX_MIB_TCPPLBREHASH, /* TCPPLBRehash */ + LINUX_MIB_TCPAOREQUIRED, /* TCPAORequired */ + LINUX_MIB_TCPAOBAD, /* TCPAOBad */ + LINUX_MIB_TCPAOKEYNOTFOUND, /* TCPAOKeyNotFound */ + LINUX_MIB_TCPAOGOOD, /* TCPAOGood */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index fa49f03e62fe..9c48964849d1 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -404,9 +404,15 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */ __u32 set_current :1, /* corresponding ::current_key */ set_rnext :1, /* corresponding ::rnext */ ao_required :1, /* don't accept non-AO connects */ - reserved :29; /* must be 0 */ + set_counters :1, /* set/clear ::pkt_* counters */ + reserved :28; /* must be 0 */ + __u16 reserved2; /* padding, must be 0 */ __u8 current_key; /* KeyID to set as Current_key */ __u8 rnext; /* KeyID to set as Rnext_key */ + __u64 pkt_good; /* verified segments */ + __u64 pkt_bad; /* failed verification */ + __u64 pkt_key_not_found; /* could not find a key to verify */ + __u64 pkt_ao_required; /* segments missing TCP-AO sign */ } __attribute__((aligned(8))); /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a85b0aba3646..f5b37ebc18c0 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -299,6 +299,10 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS), SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE), SNMP_MIB_ITEM("TCPPLBRehash", LINUX_MIB_TCPPLBREHASH), + SNMP_MIB_ITEM("TCPAORequired", LINUX_MIB_TCPAOREQUIRED), + SNMP_MIB_ITEM("TCPAOBad", LINUX_MIB_TCPAOBAD), + SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND), + SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 6c5815713b73..1097e99a9ad6 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -182,6 +182,8 @@ static struct tcp_ao_key *tcp_ao_copy_key(struct sock *sk, *new_key = *key; INIT_HLIST_NODE(&new_key->node); tcp_sigpool_get(new_key->tcp_sigpool_id); + atomic64_set(&new_key->pkt_good, 0); + atomic64_set(&new_key->pkt_bad, 0); return new_key; } @@ -771,8 +773,12 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, const struct tcphdr *th = tcp_hdr(skb); void *hash_buf = NULL; - if (maclen != tcp_ao_maclen(key)) + if (maclen != tcp_ao_maclen(key)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + atomic64_inc(&info->counters.pkt_bad); + atomic64_inc(&key->pkt_bad); return SKB_DROP_REASON_TCP_AOFAILURE; + } hash_buf = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC); if (!hash_buf) @@ -782,9 +788,15 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, tcp_ao_hash_skb(family, hash_buf, key, sk, skb, traffic_key, (phash - (u8 *)th), sne); if (memcmp(phash, hash_buf, maclen)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + atomic64_inc(&info->counters.pkt_bad); + atomic64_inc(&key->pkt_bad); kfree(hash_buf); return SKB_DROP_REASON_TCP_AOFAILURE; } + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOGOOD); + atomic64_inc(&info->counters.pkt_good); + atomic64_inc(&key->pkt_good); kfree(hash_buf); return SKB_NOT_DROPPED_YET; } @@ -804,8 +816,10 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, u32 sne = 0; info = rcu_dereference(tcp_sk(sk)->ao_info); - if (!info) + if (!info) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); return SKB_DROP_REASON_TCP_AOUNEXPECTED; + } if (unlikely(th->syn)) { sisn = th->seq; @@ -900,6 +914,8 @@ verify_hash: return ret; key_not_found: + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); + atomic64_inc(&info->counters.key_not_found); return SKB_DROP_REASON_TCP_AOKEYNOTFOUND; } @@ -1483,6 +1499,8 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, key->keyflags = cmd.keyflags; key->sndid = cmd.sndid; key->rcvid = cmd.rcvid; + atomic64_set(&key->pkt_good, 0); + atomic64_set(&key->pkt_bad, 0); ret = tcp_ao_parse_crypto(&cmd, key); if (ret < 0) @@ -1699,7 +1717,7 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, return -EINVAL; } - if (cmd.reserved != 0) + if (cmd.reserved != 0 || cmd.reserved2 != 0) return -EINVAL; ao_info = setsockopt_ao_info(sk); @@ -1734,6 +1752,12 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, goto out; } } + if (cmd.set_counters) { + atomic64_set(&ao_info->counters.pkt_good, cmd.pkt_good); + atomic64_set(&ao_info->counters.pkt_bad, cmd.pkt_bad); + atomic64_set(&ao_info->counters.key_not_found, cmd.pkt_key_not_found); + atomic64_set(&ao_info->counters.ao_required, cmd.pkt_ao_required); + } ao_info->ao_required = cmd.ao_required; if (new_current) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f39ccefa78dc..ece95d5138e1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1531,7 +1531,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, /* Don't allow keys for peers that have a matching TCP-AO key. * See the comment in tcp_ao_add_cmd() */ - if (tcp_ao_required(sk, addr, AF_INET)) + if (tcp_ao_required(sk, addr, AF_INET, false)) return -EKEYREJECTED; return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d2724383d263..cc899caf348e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -661,7 +661,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, /* Don't allow keys for peers that have a matching TCP-AO key. * See the comment in tcp_ao_add_cmd() */ - if (tcp_ao_required(sk, addr, AF_INET)) + if (tcp_ao_required(sk, addr, AF_INET, false)) return -EKEYREJECTED; return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, @@ -673,7 +673,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, /* Don't allow keys for peers that have a matching TCP-AO key. * See the comment in tcp_ao_add_cmd() */ - if (tcp_ao_required(sk, addr, AF_INET6)) + if (tcp_ao_required(sk, addr, AF_INET6, false)) return -EKEYREJECTED; return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, -- cgit v1.2.3 From 64382c71a5575741933dfdb0cf7162c6e9b8854e Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:06 +0100 Subject: net/tcp: Add TCP-AO SNE support Add Sequence Number Extension (SNE) for TCP-AO. This is needed to protect long-living TCP-AO connections from replaying attacks after sequence number roll-over, see RFC5925 (6.2). Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 22 +++++++++++++++++++++- net/ipv4/tcp_ao.c | 46 +++++++++++++++++++++++++++++++++++++--------- net/ipv4/tcp_input.c | 28 ++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 3 ++- net/ipv4/tcp_minisocks.c | 15 ++++++++++++++- net/ipv6/tcp_ipv6.c | 3 ++- 6 files changed, 104 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index cfb55bd9411b..0c3516d1b968 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -95,6 +95,25 @@ struct tcp_ao_info { __unused :31; __be32 lisn; __be32 risn; + /* Sequence Number Extension (SNE) are upper 4 bytes for SEQ, + * that protect TCP-AO connection from replayed old TCP segments. + * See RFC5925 (6.2). + * In order to get correct SNE, there's a helper tcp_ao_compute_sne(). + * It needs SEQ basis to understand whereabouts are lower SEQ numbers. + * According to that basis vector, it can provide incremented SNE + * when SEQ rolls over or provide decremented SNE when there's + * a retransmitted segment from before-rolling over. + * - for request sockets such basis is rcv_isn/snt_isn, which seems + * good enough as it's unexpected to receive 4 Gbytes on reqsk. + * - for full sockets the basis is rcv_nxt/snd_una. snd_una is + * taken instead of snd_nxt as currently it's easier to track + * in tcp_snd_una_update(), rather than updating SNE in all + * WRITE_ONCE(tp->snd_nxt, ...) + * - for time-wait sockets the basis is tw_rcv_nxt/tw_snd_nxt. + * tw_snd_nxt is not expected to change, while tw_rcv_nxt may. + */ + u32 snd_sne; + u32 rcv_sne; refcount_t refcnt; /* Protects twsk destruction */ struct rcu_head rcu; }; @@ -147,6 +166,7 @@ enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, unsigned short int family, const struct request_sock *req, const struct tcp_ao_hdr *aoh); +u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq); struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, const union tcp_ao_addr *addr, int family, int sndid, int rcvid); @@ -156,7 +176,7 @@ int tcp_ao_hash_hdr(unsigned short family, char *ao_hash, const union tcp_ao_addr *saddr, const struct tcphdr *th, u32 sne); int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, - const struct tcp_ao_hdr *aoh, int l3index, + const struct tcp_ao_hdr *aoh, int l3index, u32 seq, struct tcp_ao_key **key, char **traffic_key, bool *allocated_traffic_key, u8 *keyid, u32 *sne); diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 1097e99a9ad6..7e14bcd4dfd4 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -401,6 +401,21 @@ static int tcp_ao_hash_pseudoheader(unsigned short int family, return -EAFNOSUPPORT; } +u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq) +{ + u32 sne = next_sne; + + if (before(seq, next_seq)) { + if (seq > next_seq) + sne--; + } else { + if (seq < next_seq) + sne++; + } + + return sne; +} + /* tcp_ao_hash_sne(struct tcp_sigpool *hp) * @hp - used for hashing * @sne - sne value @@ -611,7 +626,7 @@ struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, } int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, - const struct tcp_ao_hdr *aoh, int l3index, + const struct tcp_ao_hdr *aoh, int l3index, u32 seq, struct tcp_ao_key **key, char **traffic_key, bool *allocated_traffic_key, u8 *keyid, u32 *sne) { @@ -639,7 +654,7 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, sisn = htonl(tcp_rsk(req)->rcv_isn); disn = htonl(tcp_rsk(req)->snt_isn); - *sne = 0; + *sne = tcp_ao_compute_sne(0, tcp_rsk(req)->snt_isn, seq); } else { sisn = th->seq; disn = 0; @@ -670,11 +685,15 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, *keyid = (*key)->rcvid; } else { struct tcp_ao_key *rnext_key; + u32 snd_basis; - if (sk->sk_state == TCP_TIME_WAIT) + if (sk->sk_state == TCP_TIME_WAIT) { ao_info = rcu_dereference(tcp_twsk(sk)->ao_info); - else + snd_basis = tcp_twsk(sk)->tw_snd_nxt; + } else { ao_info = rcu_dereference(tcp_sk(sk)->ao_info); + snd_basis = tcp_sk(sk)->snd_una; + } if (!ao_info) return -ENOENT; @@ -684,7 +703,8 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, *traffic_key = snd_other_key(*key); rnext_key = READ_ONCE(ao_info->rnext_key); *keyid = rnext_key->rcvid; - *sne = 0; + *sne = tcp_ao_compute_sne(READ_ONCE(ao_info->snd_sne), + snd_basis, seq); } return 0; } @@ -698,6 +718,7 @@ int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, struct tcp_ao_info *ao; void *tkey_buf = NULL; u8 *traffic_key; + u32 sne; ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, lockdep_sock_is_held(sk)); @@ -717,8 +738,10 @@ int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, tp->af_specific->ao_calc_key_sk(key, traffic_key, sk, ao->lisn, disn, true); } + sne = tcp_ao_compute_sne(READ_ONCE(ao->snd_sne), READ_ONCE(tp->snd_una), + ntohl(th->seq)); tp->af_specific->calc_ao_hash(hash_location, key, sk, skb, traffic_key, - hash_location - (u8 *)th, 0); + hash_location - (u8 *)th, sne); kfree(tkey_buf); return 0; } @@ -846,7 +869,8 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, if (unlikely(th->syn && !th->ack)) goto verify_hash; - sne = 0; + sne = tcp_ao_compute_sne(info->rcv_sne, tcp_sk(sk)->rcv_nxt, + ntohl(th->seq)); /* Established socket, traffic key are cached */ traffic_key = rcv_other_key(key); err = tcp_ao_verify_hash(sk, skb, family, info, aoh, key, @@ -881,14 +905,16 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { /* Make the initial syn the likely case here */ if (unlikely(req)) { - sne = 0; + sne = tcp_ao_compute_sne(0, tcp_rsk(req)->rcv_isn, + ntohl(th->seq)); sisn = htonl(tcp_rsk(req)->rcv_isn); disn = htonl(tcp_rsk(req)->snt_isn); } else if (unlikely(th->ack && !th->syn)) { /* Possible syncookie packet */ sisn = htonl(ntohl(th->seq) - 1); disn = htonl(ntohl(th->ack_seq) - 1); - sne = 0; + sne = tcp_ao_compute_sne(0, ntohl(sisn), + ntohl(th->seq)); } else if (unlikely(!th->syn)) { /* no way to figure out initial sisn/disn - drop */ return SKB_DROP_REASON_TCP_FLAGS; @@ -986,6 +1012,7 @@ void tcp_ao_connect_init(struct sock *sk) tp->tcp_header_len += tcp_ao_len(key); ao_info->lisn = htonl(tp->write_seq); + ao_info->snd_sne = 0; } else { /* Can't happen: tcp_connect() verifies that there's * at least one tcp-ao key that matches the remote peer. @@ -1021,6 +1048,7 @@ void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb) return; WRITE_ONCE(ao->risn, tcp_hdr(skb)->seq); + ao->rcv_sne = 0; hlist_for_each_entry_rcu(key, &ao->head, node) tcp_ao_cache_traffic_keys(sk, ao, key); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index de64c0c2fb69..67ca72185f14 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3576,9 +3576,18 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp, static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) { u32 delta = ack - tp->snd_una; +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; +#endif sock_owned_by_me((struct sock *)tp); tp->bytes_acked += delta; +#ifdef CONFIG_TCP_AO + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held((struct sock *)tp)); + if (ao && ack < tp->snd_una) + ao->snd_sne++; +#endif tp->snd_una = ack; } @@ -3586,9 +3595,18 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) { u32 delta = seq - tp->rcv_nxt; +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; +#endif sock_owned_by_me((struct sock *)tp); tp->bytes_received += delta; +#ifdef CONFIG_TCP_AO + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held((struct sock *)tp)); + if (ao && seq < tp->rcv_nxt) + ao->rcv_sne++; +#endif WRITE_ONCE(tp->rcv_nxt, seq); } @@ -6456,6 +6474,16 @@ consume: * simultaneous connect with crossed SYNs. * Particularly, it can be connect to self. */ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; + + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)); + if (ao) { + WRITE_ONCE(ao->risn, th->seq); + ao->rcv_sne = 0; + } +#endif tcp_set_state(sk, TCP_SYN_RECV); if (tp->rx_opt.saw_tstamp) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ece95d5138e1..bdec99707028 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -676,7 +676,7 @@ static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb, u8 keyid; rcu_read_lock(); - if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, + if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, ntohl(reply->seq), &key, &traffic_key, &allocated_traffic_key, &keyid, &ao_sne)) goto out; @@ -1034,6 +1034,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct tcp_ao_key *rnext_key; key.traffic_key = snd_other_key(key.ao_key); + key.sne = READ_ONCE(ao_info->snd_sne); rnext_key = READ_ONCE(ao_info->rnext_key); key.rcv_next = rnext_key->rcvid; key.type = TCP_KEY_AO; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 8d941a6e066b..a9807eeb311c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -51,6 +51,18 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw, return TCP_TW_SUCCESS; } +static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq) +{ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; + + ao = rcu_dereference(tcptw->ao_info); + if (unlikely(ao && seq < tcptw->tw_rcv_nxt)) + WRITE_ONCE(ao->rcv_sne, ao->rcv_sne + 1); +#endif + tcptw->tw_rcv_nxt = seq; +} + /* * * Main purpose of TIME-WAIT state is to close connection gracefully, * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN @@ -136,7 +148,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, /* FIN arrived, enter true time-wait state. */ tw->tw_substate = TCP_TIME_WAIT; - tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; + twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq); + if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent_stamp = ktime_get_seconds(); tcptw->tw_ts_recent = tmp_opt.rcv_tsval; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cc899caf348e..4dcbc13e9ec8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1090,7 +1090,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) int l3index; l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; - if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, + if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, &key.ao_key, &key.traffic_key, &allocated_traffic_key, &key.rcv_next, &key.sne)) @@ -1167,6 +1167,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) /* rcv_next switches to our rcv_next */ rnext_key = READ_ONCE(ao_info->rnext_key); key.rcv_next = rnext_key->rcvid; + key.sne = READ_ONCE(ao_info->snd_sne); key.type = TCP_KEY_AO; #else if (0) { -- cgit v1.2.3 From 2717b5adea9e2558798c30eb0e93c01722edbb0a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:07 +0100 Subject: net/tcp: Add tcp_hash_fail() ratelimited logs Add a helper for logging connection-detailed messages for failed TCP hash verification (both MD5 and AO). Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp.h | 14 ++++++++++++-- include/net/tcp_ao.h | 29 +++++++++++++++++++++++++++++ net/ipv4/tcp.c | 23 +++++++++++++---------- net/ipv4/tcp_ao.c | 7 +++++++ 4 files changed, 61 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 50ae1ed244e5..54226d85feb8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2748,12 +2748,18 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, int l3index; /* Invalid option or two times meet any of auth options */ - if (tcp_parse_auth_options(th, &md5_location, &aoh)) + if (tcp_parse_auth_options(th, &md5_location, &aoh)) { + tcp_hash_fail("TCP segment has incorrect auth options set", + family, skb, ""); return SKB_DROP_REASON_TCP_AUTH_HDR; + } if (req) { if (tcp_rsk_used_ao(req) != !!aoh) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + tcp_hash_fail("TCP connection can't start/end using TCP-AO", + family, skb, "%s", + !aoh ? "missing AO" : "AO signed"); return SKB_DROP_REASON_TCP_AOFAILURE; } } @@ -2770,10 +2776,14 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, * the last key is impossible to remove, so there's * always at least one current_key. */ - if (tcp_ao_required(sk, saddr, family, true)) + if (tcp_ao_required(sk, saddr, family, true)) { + tcp_hash_fail("AO hash is required, but not found", + family, skb, "L3 index %d", l3index); return SKB_DROP_REASON_TCP_AONOTFOUND; + } if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + tcp_hash_fail("MD5 Hash not found", family, skb, ""); return SKB_DROP_REASON_TCP_MD5NOTFOUND; } return SKB_NOT_DROPPED_YET; diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 0c3516d1b968..4da6e3657913 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -118,6 +118,35 @@ struct tcp_ao_info { struct rcu_head rcu; }; +#define tcp_hash_fail(msg, family, skb, fmt, ...) \ +do { \ + const struct tcphdr *th = tcp_hdr(skb); \ + char hdr_flags[5] = {}; \ + char *f = hdr_flags; \ + \ + if (th->fin) \ + *f++ = 'F'; \ + if (th->syn) \ + *f++ = 'S'; \ + if (th->rst) \ + *f++ = 'R'; \ + if (th->ack) \ + *f++ = 'A'; \ + if (f != hdr_flags) \ + *f = ' '; \ + if ((family) == AF_INET) { \ + net_info_ratelimited("%s for (%pI4, %d)->(%pI4, %d) %s" fmt "\n", \ + msg, &ip_hdr(skb)->saddr, ntohs(th->source), \ + &ip_hdr(skb)->daddr, ntohs(th->dest), \ + hdr_flags, ##__VA_ARGS__); \ + } else { \ + net_info_ratelimited("%s for [%pI6c]:%u->[%pI6c]:%u %s" fmt "\n", \ + msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \ + &ipv6_hdr(skb)->daddr, ntohs(th->dest), \ + hdr_flags, ##__VA_ARGS__); \ + } \ +} while (0) + #ifdef CONFIG_TCP_AO /* TCP-AO structures and functions */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eb71212a09d8..1be6467a059a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4383,7 +4383,6 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, * o MD5 hash and we're not expecting one. * o MD5 hash and its wrong. */ - const struct tcphdr *th = tcp_hdr(skb); const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; u8 newhash[16]; @@ -4393,6 +4392,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, if (!key && hash_location) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); + tcp_hash_fail("Unexpected MD5 Hash found", family, skb, ""); return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } @@ -4408,16 +4408,19 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, if (genhash || memcmp(hash_location, newhash, 16) != 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); if (family == AF_INET) { - net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n", - saddr, ntohs(th->source), - daddr, ntohs(th->dest), - genhash ? " tcp_v4_calc_md5_hash failed" - : "", l3index); + tcp_hash_fail("MD5 Hash failed", AF_INET, skb, "%s L3 index %d", + genhash ? "tcp_v4_calc_md5_hash failed" + : "", l3index); } else { - net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", - genhash ? "failed" : "mismatch", - saddr, ntohs(th->source), - daddr, ntohs(th->dest), l3index); + if (genhash) { + tcp_hash_fail("MD5 Hash failed", + AF_INET6, skb, "L3 index %d", + l3index); + } else { + tcp_hash_fail("MD5 Hash mismatch", + AF_INET6, skb, "L3 index %d", + l3index); + } } return SKB_DROP_REASON_TCP_MD5FAILURE; } diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 7e14bcd4dfd4..f76fcb93499d 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -800,6 +800,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad); + tcp_hash_fail("AO hash wrong length", family, skb, + "%u != %d", maclen, tcp_ao_maclen(key)); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -814,6 +816,7 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad); + tcp_hash_fail("AO hash mismatch", family, skb, ""); kfree(hash_buf); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -841,6 +844,8 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, info = rcu_dereference(tcp_sk(sk)->ao_info); if (!info) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); + tcp_hash_fail("AO key not found", family, skb, + "keyid: %u", aoh->keyid); return SKB_DROP_REASON_TCP_AOUNEXPECTED; } @@ -942,6 +947,8 @@ verify_hash: key_not_found: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); atomic64_inc(&info->counters.key_not_found); + tcp_hash_fail("Requested by the peer AO key id not found", + family, skb, ""); return SKB_DROP_REASON_TCP_AOKEYNOTFOUND; } -- cgit v1.2.3 From 953af8e3acb68d2db11937cec3bc5da31de5c12e Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:08 +0100 Subject: net/tcp: Ignore specific ICMPs for TCP-AO connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similarly to IPsec, RFC5925 prescribes: ">> A TCP-AO implementation MUST default to ignore incoming ICMPv4 messages of Type 3 (destination unreachable), Codes 2-4 (protocol unreachable, port unreachable, and fragmentation needed -- ’hard errors’), and ICMPv6 Type 1 (destination unreachable), Code 1 (administratively prohibited) and Code 4 (port unreachable) intended for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN- WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs." A selftest (later in patch series) verifies that this attack is not possible in this TCP-AO implementation. Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 11 ++++++++- include/uapi/linux/snmp.h | 1 + include/uapi/linux/tcp.h | 4 +++- net/ipv4/proc.c | 1 + net/ipv4/tcp_ao.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 7 ++++++ net/ipv6/tcp_ipv6.c | 7 ++++++ 7 files changed, 87 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 4da6e3657913..a9d38b9e8bcb 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -24,6 +24,7 @@ struct tcp_ao_counters { atomic64_t pkt_bad; atomic64_t key_not_found; atomic64_t ao_required; + atomic64_t dropped_icmp; }; struct tcp_ao_key { @@ -92,7 +93,8 @@ struct tcp_ao_info { struct tcp_ao_key *rnext_key; struct tcp_ao_counters counters; u32 ao_required :1, - __unused :31; + accept_icmps :1, + __unused :30; __be32 lisn; __be32 risn; /* Sequence Number Extension (SNE) are upper 4 bytes for SEQ, @@ -191,6 +193,7 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, unsigned int len, struct tcp_sigpool *hp); void tcp_ao_destroy_sock(struct sock *sk, bool twsk); void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp); +bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code); enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, unsigned short int family, const struct request_sock *req, @@ -274,6 +277,12 @@ static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, { } +static inline bool tcp_ao_ignore_icmp(const struct sock *sk, int family, + int type, int code) +{ + return false; +} + static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, unsigned short int family, const struct request_sock *req, const struct tcp_ao_hdr *aoh) diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 3d5ea841bffe..a0819c6a5988 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -301,6 +301,7 @@ enum LINUX_MIB_TCPAOBAD, /* TCPAOBad */ LINUX_MIB_TCPAOKEYNOTFOUND, /* TCPAOKeyNotFound */ LINUX_MIB_TCPAOGOOD, /* TCPAOGood */ + LINUX_MIB_TCPAODROPPEDICMPS, /* TCPAODroppedIcmps */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 9c48964849d1..d8b2ea23f12a 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -405,7 +405,8 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */ set_rnext :1, /* corresponding ::rnext */ ao_required :1, /* don't accept non-AO connects */ set_counters :1, /* set/clear ::pkt_* counters */ - reserved :28; /* must be 0 */ + accept_icmps :1, /* accept incoming ICMPs */ + reserved :27; /* must be 0 */ __u16 reserved2; /* padding, must be 0 */ __u8 current_key; /* KeyID to set as Current_key */ __u8 rnext; /* KeyID to set as Rnext_key */ @@ -413,6 +414,7 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */ __u64 pkt_bad; /* failed verification */ __u64 pkt_key_not_found; /* could not find a key to verify */ __u64 pkt_ao_required; /* segments missing TCP-AO sign */ + __u64 pkt_dropped_icmp; /* ICMPs that were ignored */ } __attribute__((aligned(8))); /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f5b37ebc18c0..5f4654ebff48 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -303,6 +303,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPAOBad", LINUX_MIB_TCPAOBAD), SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND), SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD), + SNMP_MIB_ITEM("TCPAODroppedIcmps", LINUX_MIB_TCPAODROPPEDICMPS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index f76fcb93499d..223af5c9eaf3 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -15,6 +15,7 @@ #include #include +#include int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, unsigned int len, struct tcp_sigpool *hp) @@ -44,6 +45,60 @@ clear_hash: return 1; } +bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code) +{ + bool ignore_icmp = false; + struct tcp_ao_info *ao; + + /* RFC5925, 7.8: + * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4 + * messages of Type 3 (destination unreachable), Codes 2-4 (protocol + * unreachable, port unreachable, and fragmentation needed -- ’hard + * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1 + * (administratively prohibited) and Code 4 (port unreachable) intended + * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN- + * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs. + */ + if (family == AF_INET) { + if (type != ICMP_DEST_UNREACH) + return false; + if (code < ICMP_PROT_UNREACH || code > ICMP_FRAG_NEEDED) + return false; + } else { + if (type != ICMPV6_DEST_UNREACH) + return false; + if (code != ICMPV6_ADM_PROHIBITED && code != ICMPV6_PORT_UNREACH) + return false; + } + + rcu_read_lock(); + switch (sk->sk_state) { + case TCP_TIME_WAIT: + ao = rcu_dereference(tcp_twsk(sk)->ao_info); + break; + case TCP_SYN_SENT: + case TCP_SYN_RECV: + case TCP_LISTEN: + case TCP_NEW_SYN_RECV: + /* RFC5925 specifies to ignore ICMPs *only* on connections + * in synchronized states. + */ + rcu_read_unlock(); + return false; + default: + ao = rcu_dereference(tcp_sk(sk)->ao_info); + } + + if (ao && !ao->accept_icmps) { + ignore_icmp = true; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAODROPPEDICMPS); + atomic64_inc(&ao->counters.dropped_icmp); + } + rcu_read_unlock(); + + return ignore_icmp; +} + /* Optimized version of tcp_ao_do_lookup(): only for sockets for which * it's known that the keys in ao_info are matching peer's * family/address/VRF/etc. @@ -1086,6 +1141,7 @@ int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, new_ao->lisn = htonl(tcp_rsk(req)->snt_isn); new_ao->risn = htonl(tcp_rsk(req)->rcv_isn); new_ao->ao_required = ao->ao_required; + new_ao->accept_icmps = ao->accept_icmps; if (family == AF_INET) { addr = (union tcp_ao_addr *)&newsk->sk_daddr; @@ -1792,9 +1848,11 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, atomic64_set(&ao_info->counters.pkt_bad, cmd.pkt_bad); atomic64_set(&ao_info->counters.key_not_found, cmd.pkt_key_not_found); atomic64_set(&ao_info->counters.ao_required, cmd.pkt_ao_required); + atomic64_set(&ao_info->counters.dropped_icmp, cmd.pkt_dropped_icmp); } ao_info->ao_required = cmd.ao_required; + ao_info->accept_icmps = cmd.accept_icmps; if (new_current) WRITE_ONCE(ao_info->current_key, new_current); if (new_rnext) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bdec99707028..8f98c58e2689 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -494,6 +494,8 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) return -ENOENT; } if (sk->sk_state == TCP_TIME_WAIT) { + /* To increase the counter of ignored icmps for TCP-AO */ + tcp_ao_ignore_icmp(sk, AF_INET, type, code); inet_twsk_put(inet_twsk(sk)); return 0; } @@ -507,6 +509,11 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) return 0; } + if (tcp_ao_ignore_icmp(sk, AF_INET, type, code)) { + sock_put(sk); + return 0; + } + bh_lock_sock(sk); /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4dcbc13e9ec8..fa7050579e9a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -396,6 +396,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (sk->sk_state == TCP_TIME_WAIT) { + /* To increase the counter of ignored icmps for TCP-AO */ + tcp_ao_ignore_icmp(sk, AF_INET6, type, code); inet_twsk_put(inet_twsk(sk)); return 0; } @@ -406,6 +408,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } + if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { + sock_put(sk); + return 0; + } + bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); -- cgit v1.2.3 From 7753c2f0a857bfa6501e67deee03988dd0bcaae7 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:09 +0100 Subject: net/tcp: Add option for TCP-AO to (not) hash header Provide setsockopt() key flag that makes TCP-AO exclude hashing TCP header for peers that match the key. This is needed for interraction with middleboxes that may change TCP options, see RFC5925 (9.2). Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 5 +++++ net/ipv4/tcp_ao.c | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index d8b2ea23f12a..320aab010f9a 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -367,6 +367,11 @@ struct tcp_diag_md5sig { #define TCP_AO_MAXKEYLEN 80 #define TCP_AO_KEYF_IFINDEX (1 << 0) /* L3 ifindex for VRF */ +#define TCP_AO_KEYF_EXCLUDE_OPT (1 << 1) /* "Indicates whether TCP + * options other than TCP-AO + * are included in the MAC + * calculation" + */ struct tcp_ao_add { /* setsockopt(TCP_AO_ADD_KEY) */ struct __kernel_sockaddr_storage addr; /* peer's address for the key */ diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 223af5c9eaf3..10cc6be4d537 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -562,7 +562,8 @@ int tcp_ao_hash_hdr(unsigned short int family, char *ao_hash, WARN_ON_ONCE(1); goto clear_hash; } - if (tcp_ao_hash_header(&hp, th, false, + if (tcp_ao_hash_header(&hp, th, + !!(key->keyflags & TCP_AO_KEYF_EXCLUDE_OPT), ao_hash, hash_offset, tcp_ao_maclen(key))) goto clear_hash; ahash_request_set_crypt(hp.req, NULL, hash_buf, 0); @@ -610,7 +611,8 @@ int tcp_ao_hash_skb(unsigned short int family, goto clear_hash; if (tcp_ao_hash_pseudoheader(family, sk, skb, &hp, skb->len)) goto clear_hash; - if (tcp_ao_hash_header(&hp, th, false, + if (tcp_ao_hash_header(&hp, th, + !!(key->keyflags & TCP_AO_KEYF_EXCLUDE_OPT), ao_hash, hash_offset, tcp_ao_maclen(key))) goto clear_hash; if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) @@ -1454,7 +1456,7 @@ static struct tcp_ao_info *setsockopt_ao_info(struct sock *sk) return ERR_PTR(-ESOCKTNOSUPPORT); } -#define TCP_AO_KEYF_ALL (0) +#define TCP_AO_KEYF_ALL (TCP_AO_KEYF_EXCLUDE_OPT) static struct tcp_ao_key *tcp_ao_key_alloc(struct sock *sk, struct tcp_ao_add *cmd) -- cgit v1.2.3 From ef84703a911f4ee52ca585e8308b7084093941f4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:10 +0100 Subject: net/tcp: Add TCP-AO getsockopt()s Introduce getsockopt(TCP_AO_GET_KEYS) that lets a user get TCP-AO keys and their properties from a socket. The user can provide a filter to match the specific key to be dumped or ::get_all = 1 may be used to dump all keys in one syscall. Add another getsockopt(TCP_AO_INFO) for providing per-socket/per-ao_info stats: packet counters, Current_key/RNext_key and flags like ::ao_required and ::accept_icmps. Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 12 ++ include/uapi/linux/tcp.h | 63 +++++++--- net/ipv4/tcp.c | 13 +++ net/ipv4/tcp_ao.c | 295 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 369 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index a9d38b9e8bcb..061c358a3c8a 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -194,6 +194,8 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, void tcp_ao_destroy_sock(struct sock *sk, bool twsk); void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp); bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code); +int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen); +int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen); enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, unsigned short int family, const struct request_sock *req, @@ -316,6 +318,16 @@ static inline void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, static inline void tcp_ao_connect_init(struct sock *sk) { } + +static inline int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + return -ENOPROTOOPT; +} + +static inline int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + return -ENOPROTOOPT; +} #endif #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 320aab010f9a..201b3cbd6020 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -131,7 +131,8 @@ enum { #define TCP_AO_ADD_KEY 38 /* Add/Set MKT */ #define TCP_AO_DEL_KEY 39 /* Delete MKT */ -#define TCP_AO_INFO 40 /* Modify TCP-AO per-socket options */ +#define TCP_AO_INFO 40 /* Set/list TCP-AO per-socket options */ +#define TCP_AO_GET_KEYS 41 /* List MKT(s) */ #define TCP_REPAIR_ON 1 #define TCP_REPAIR_OFF 0 @@ -405,21 +406,55 @@ struct tcp_ao_del { /* setsockopt(TCP_AO_DEL_KEY) */ __u8 keyflags; /* see TCP_AO_KEYF_ */ } __attribute__((aligned(8))); -struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */ - __u32 set_current :1, /* corresponding ::current_key */ - set_rnext :1, /* corresponding ::rnext */ - ao_required :1, /* don't accept non-AO connects */ - set_counters :1, /* set/clear ::pkt_* counters */ - accept_icmps :1, /* accept incoming ICMPs */ +struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO), getsockopt(TCP_AO_INFO) */ + /* Here 'in' is for setsockopt(), 'out' is for getsockopt() */ + __u32 set_current :1, /* in/out: corresponding ::current_key */ + set_rnext :1, /* in/out: corresponding ::rnext */ + ao_required :1, /* in/out: don't accept non-AO connects */ + set_counters :1, /* in: set/clear ::pkt_* counters */ + accept_icmps :1, /* in/out: accept incoming ICMPs */ reserved :27; /* must be 0 */ __u16 reserved2; /* padding, must be 0 */ - __u8 current_key; /* KeyID to set as Current_key */ - __u8 rnext; /* KeyID to set as Rnext_key */ - __u64 pkt_good; /* verified segments */ - __u64 pkt_bad; /* failed verification */ - __u64 pkt_key_not_found; /* could not find a key to verify */ - __u64 pkt_ao_required; /* segments missing TCP-AO sign */ - __u64 pkt_dropped_icmp; /* ICMPs that were ignored */ + __u8 current_key; /* in/out: KeyID of Current_key */ + __u8 rnext; /* in/out: keyid of RNext_key */ + __u64 pkt_good; /* in/out: verified segments */ + __u64 pkt_bad; /* in/out: failed verification */ + __u64 pkt_key_not_found; /* in/out: could not find a key to verify */ + __u64 pkt_ao_required; /* in/out: segments missing TCP-AO sign */ + __u64 pkt_dropped_icmp; /* in/out: ICMPs that were ignored */ +} __attribute__((aligned(8))); + +struct tcp_ao_getsockopt { /* getsockopt(TCP_AO_GET_KEYS) */ + struct __kernel_sockaddr_storage addr; /* in/out: dump keys for peer + * with this address/prefix + */ + char alg_name[64]; /* out: crypto hash algorithm */ + __u8 key[TCP_AO_MAXKEYLEN]; + __u32 nkeys; /* in: size of the userspace buffer + * @optval, measured in @optlen - the + * sizeof(struct tcp_ao_getsockopt) + * out: number of keys that matched + */ + __u16 is_current :1, /* in: match and dump Current_key, + * out: the dumped key is Current_key + */ + + is_rnext :1, /* in: match and dump RNext_key, + * out: the dumped key is RNext_key + */ + get_all :1, /* in: dump all keys */ + reserved :13; /* padding, must be 0 */ + __u8 sndid; /* in/out: dump keys with SendID */ + __u8 rcvid; /* in/out: dump keys with RecvID */ + __u8 prefix; /* in/out: dump keys with address/prefix */ + __u8 maclen; /* out: key's length of authentication + * code (hash) + */ + __u8 keyflags; /* in/out: see TCP_AO_KEYF_ */ + __u8 keylen; /* out: length of ::key */ + __s32 ifindex; /* in/out: L3 dev index for VRF */ + __u64 pkt_good; /* out: verified segments */ + __u64 pkt_bad; /* out: segments that failed verification */ } __attribute__((aligned(8))); /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1be6467a059a..ce33eee9d0f2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4284,6 +4284,19 @@ zerocopy_rcv_out: return err; } #endif + case TCP_AO_GET_KEYS: + case TCP_AO_INFO: { + int err; + + sockopt_lock_sock(sk); + if (optname == TCP_AO_GET_KEYS) + err = tcp_ao_get_mkts(sk, optval, optlen); + else + err = tcp_ao_get_sock_info(sk, optval, optlen); + sockopt_release_sock(sk); + + return err; + } default: return -ENOPROTOOPT; } diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 10cc6be4d537..cbc1ee0f5b9a 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1894,3 +1894,298 @@ int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen) return tcp_parse_ao(sk, cmd, AF_INET, optval, optlen); } +/* tcp_ao_copy_mkts_to_user(ao_info, optval, optlen) + * + * @ao_info: struct tcp_ao_info on the socket that + * socket getsockopt(TCP_AO_GET_KEYS) is executed on + * @optval: pointer to array of tcp_ao_getsockopt structures in user space. + * Must be != NULL. + * @optlen: pointer to size of tcp_ao_getsockopt structure. + * Must be != NULL. + * + * Return value: 0 on success, a negative error number otherwise. + * + * optval points to an array of tcp_ao_getsockopt structures in user space. + * optval[0] is used as both input and output to getsockopt. It determines + * which keys are returned by the kernel. + * optval[0].nkeys is the size of the array in user space. On return it contains + * the number of keys matching the search criteria. + * If tcp_ao_getsockopt::get_all is set, then all keys in the socket are + * returned, otherwise only keys matching + * in optval[0] are returned. + * optlen is also used as both input and output. The user provides the size + * of struct tcp_ao_getsockopt in user space, and the kernel returns the size + * of the structure in kernel space. + * The size of struct tcp_ao_getsockopt may differ between user and kernel. + * There are three cases to consider: + * * If usize == ksize, then keys are copied verbatim. + * * If usize < ksize, then the userspace has passed an old struct to a + * newer kernel. The rest of the trailing bytes in optval[0] + * (ksize - usize) are interpreted as 0 by the kernel. + * * If usize > ksize, then the userspace has passed a new struct to an + * older kernel. The trailing bytes unknown to the kernel (usize - ksize) + * are checked to ensure they are zeroed, otherwise -E2BIG is returned. + * On return the kernel fills in min(usize, ksize) in each entry of the array. + * The layout of the fields in the user and kernel structures is expected to + * be the same (including in the 32bit vs 64bit case). + */ +static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info, + sockptr_t optval, sockptr_t optlen) +{ + struct tcp_ao_getsockopt opt_in, opt_out; + struct tcp_ao_key *key, *current_key; + bool do_address_matching = true; + union tcp_ao_addr *addr = NULL; + unsigned int max_keys; /* maximum number of keys to copy to user */ + size_t out_offset = 0; + size_t bytes_to_write; /* number of bytes to write to user level */ + int err, user_len; + u32 matched_keys; /* keys from ao_info matched so far */ + int optlen_out; + __be16 port = 0; + + if (copy_from_sockptr(&user_len, optlen, sizeof(int))) + return -EFAULT; + + if (user_len <= 0) + return -EINVAL; + + memset(&opt_in, 0, sizeof(struct tcp_ao_getsockopt)); + err = copy_struct_from_sockptr(&opt_in, sizeof(opt_in), + optval, user_len); + if (err < 0) + return err; + + if (opt_in.pkt_good || opt_in.pkt_bad) + return -EINVAL; + + if (opt_in.reserved != 0) + return -EINVAL; + + max_keys = opt_in.nkeys; + + if (opt_in.get_all || opt_in.is_current || opt_in.is_rnext) { + if (opt_in.get_all && (opt_in.is_current || opt_in.is_rnext)) + return -EINVAL; + do_address_matching = false; + } + + switch (opt_in.addr.ss_family) { + case AF_INET: { + struct sockaddr_in *sin; + __be32 mask; + + sin = (struct sockaddr_in *)&opt_in.addr; + port = sin->sin_port; + addr = (union tcp_ao_addr *)&sin->sin_addr; + + if (opt_in.prefix > 32) + return -EINVAL; + + if (ntohl(sin->sin_addr.s_addr) == INADDR_ANY && + opt_in.prefix != 0) + return -EINVAL; + + mask = inet_make_mask(opt_in.prefix); + if (sin->sin_addr.s_addr & ~mask) + return -EINVAL; + + break; + } + case AF_INET6: { + struct sockaddr_in6 *sin6; + struct in6_addr *addr6; + + sin6 = (struct sockaddr_in6 *)&opt_in.addr; + addr = (union tcp_ao_addr *)&sin6->sin6_addr; + addr6 = &sin6->sin6_addr; + port = sin6->sin6_port; + + /* We don't have to change family and @addr here if + * ipv6_addr_v4mapped() like in key adding: + * tcp_ao_key_cmp() does it. Do the sanity checks though. + */ + if (opt_in.prefix != 0) { + if (ipv6_addr_v4mapped(addr6)) { + __be32 mask, addr4 = addr6->s6_addr32[3]; + + if (opt_in.prefix > 32 || + ntohl(addr4) == INADDR_ANY) + return -EINVAL; + mask = inet_make_mask(opt_in.prefix); + if (addr4 & ~mask) + return -EINVAL; + } else { + struct in6_addr pfx; + + if (ipv6_addr_any(addr6) || + opt_in.prefix > 128) + return -EINVAL; + + ipv6_addr_prefix(&pfx, addr6, opt_in.prefix); + if (ipv6_addr_cmp(&pfx, addr6)) + return -EINVAL; + } + } else if (!ipv6_addr_any(addr6)) { + return -EINVAL; + } + break; + } + case 0: + if (!do_address_matching) + break; + fallthrough; + default: + return -EAFNOSUPPORT; + } + + if (!do_address_matching) { + /* We could just ignore those, but let's do stricter checks */ + if (addr || port) + return -EINVAL; + if (opt_in.prefix || opt_in.sndid || opt_in.rcvid) + return -EINVAL; + } + + bytes_to_write = min_t(int, user_len, sizeof(struct tcp_ao_getsockopt)); + matched_keys = 0; + /* May change in RX, while we're dumping, pre-fetch it */ + current_key = READ_ONCE(ao_info->current_key); + + hlist_for_each_entry_rcu(key, &ao_info->head, node) { + if (opt_in.get_all) + goto match; + + if (opt_in.is_current || opt_in.is_rnext) { + if (opt_in.is_current && key == current_key) + goto match; + if (opt_in.is_rnext && key == ao_info->rnext_key) + goto match; + continue; + } + + if (tcp_ao_key_cmp(key, addr, opt_in.prefix, + opt_in.addr.ss_family, + opt_in.sndid, opt_in.rcvid) != 0) + continue; +match: + matched_keys++; + if (matched_keys > max_keys) + continue; + + memset(&opt_out, 0, sizeof(struct tcp_ao_getsockopt)); + + if (key->family == AF_INET) { + struct sockaddr_in *sin_out = (struct sockaddr_in *)&opt_out.addr; + + sin_out->sin_family = key->family; + sin_out->sin_port = 0; + memcpy(&sin_out->sin_addr, &key->addr, sizeof(struct in_addr)); + } else { + struct sockaddr_in6 *sin6_out = (struct sockaddr_in6 *)&opt_out.addr; + + sin6_out->sin6_family = key->family; + sin6_out->sin6_port = 0; + memcpy(&sin6_out->sin6_addr, &key->addr, sizeof(struct in6_addr)); + } + opt_out.sndid = key->sndid; + opt_out.rcvid = key->rcvid; + opt_out.prefix = key->prefixlen; + opt_out.keyflags = key->keyflags; + opt_out.is_current = (key == current_key); + opt_out.is_rnext = (key == ao_info->rnext_key); + opt_out.nkeys = 0; + opt_out.maclen = key->maclen; + opt_out.keylen = key->keylen; + opt_out.pkt_good = atomic64_read(&key->pkt_good); + opt_out.pkt_bad = atomic64_read(&key->pkt_bad); + memcpy(&opt_out.key, key->key, key->keylen); + tcp_sigpool_algo(key->tcp_sigpool_id, opt_out.alg_name, 64); + + /* Copy key to user */ + if (copy_to_sockptr_offset(optval, out_offset, + &opt_out, bytes_to_write)) + return -EFAULT; + out_offset += user_len; + } + + optlen_out = (int)sizeof(struct tcp_ao_getsockopt); + if (copy_to_sockptr(optlen, &optlen_out, sizeof(int))) + return -EFAULT; + + out_offset = offsetof(struct tcp_ao_getsockopt, nkeys); + if (copy_to_sockptr_offset(optval, out_offset, + &matched_keys, sizeof(u32))) + return -EFAULT; + + return 0; +} + +int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + struct tcp_ao_info *ao_info; + + ao_info = setsockopt_ao_info(sk); + if (IS_ERR(ao_info)) + return PTR_ERR(ao_info); + if (!ao_info) + return -ENOENT; + + return tcp_ao_copy_mkts_to_user(ao_info, optval, optlen); +} + +int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + struct tcp_ao_info_opt out, in = {}; + struct tcp_ao_key *current_key; + struct tcp_ao_info *ao; + int err, len; + + if (copy_from_sockptr(&len, optlen, sizeof(int))) + return -EFAULT; + + if (len <= 0) + return -EINVAL; + + /* Copying this "in" only to check ::reserved, ::reserved2, + * that may be needed to extend (struct tcp_ao_info_opt) and + * what getsockopt() provides in future. + */ + err = copy_struct_from_sockptr(&in, sizeof(in), optval, len); + if (err) + return err; + + if (in.reserved != 0 || in.reserved2 != 0) + return -EINVAL; + + ao = setsockopt_ao_info(sk); + if (IS_ERR(ao)) + return PTR_ERR(ao); + if (!ao) + return -ENOENT; + + memset(&out, 0, sizeof(out)); + out.ao_required = ao->ao_required; + out.accept_icmps = ao->accept_icmps; + out.pkt_good = atomic64_read(&ao->counters.pkt_good); + out.pkt_bad = atomic64_read(&ao->counters.pkt_bad); + out.pkt_key_not_found = atomic64_read(&ao->counters.key_not_found); + out.pkt_ao_required = atomic64_read(&ao->counters.ao_required); + out.pkt_dropped_icmp = atomic64_read(&ao->counters.dropped_icmp); + + current_key = READ_ONCE(ao->current_key); + if (current_key) { + out.set_current = 1; + out.current_key = current_key->sndid; + } + if (ao->rnext_key) { + out.set_rnext = 1; + out.rnext = ao->rnext_key->rcvid; + } + + if (copy_to_sockptr(optval, &out, min_t(int, len, sizeof(out)))) + return -EFAULT; + + return 0; +} + -- cgit v1.2.3 From d6732b95b6fbbc6d5bb9d2f809e275763640c4a2 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:11 +0100 Subject: net/tcp: Allow asynchronous delete for TCP-AO keys (MKTs) Delete becomes very, very fast - almost free, but after setsockopt() syscall returns, the key is still alive until next RCU grace period. Which is fine for listen sockets as userspace needs to be aware of setsockopt(TCP_AO) and accept() race and resolve it with verification by getsockopt() after TCP connection was accepted. The benchmark results (on non-loaded box, worse with more RCU work pending): > ok 33 Worst case delete 16384 keys: min=5ms max=10ms mean=6.93904ms stddev=0.263421 > ok 34 Add a new key 16384 keys: min=1ms max=4ms mean=2.17751ms stddev=0.147564 > ok 35 Remove random-search 16384 keys: min=5ms max=10ms mean=6.50243ms stddev=0.254999 > ok 36 Remove async 16384 keys: min=0ms max=0ms mean=0.0296107ms stddev=0.0172078 Co-developed-by: Francesco Ruggeri Signed-off-by: Francesco Ruggeri Co-developed-by: Salam Noureddine Signed-off-by: Salam Noureddine Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 3 ++- net/ipv4/tcp_ao.c | 21 ++++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 201b3cbd6020..be34d7c5c531 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -396,7 +396,8 @@ struct tcp_ao_del { /* setsockopt(TCP_AO_DEL_KEY) */ __s32 ifindex; /* L3 dev index for VRF */ __u32 set_current :1, /* corresponding ::current_key */ set_rnext :1, /* corresponding ::rnext */ - reserved :30; /* must be 0 */ + del_async :1, /* only valid for listen sockets */ + reserved :29; /* must be 0 */ __u16 reserved2; /* padding, must be 0 */ __u8 prefix; /* peer's address prefix */ __u8 sndid; /* SendID for outgoing segments */ diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index cbc1ee0f5b9a..acbeb635fe29 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1628,7 +1628,7 @@ err_free_ao: } static int tcp_ao_delete_key(struct sock *sk, struct tcp_ao_info *ao_info, - struct tcp_ao_key *key, + bool del_async, struct tcp_ao_key *key, struct tcp_ao_key *new_current, struct tcp_ao_key *new_rnext) { @@ -1636,11 +1636,24 @@ static int tcp_ao_delete_key(struct sock *sk, struct tcp_ao_info *ao_info, hlist_del_rcu(&key->node); + /* Support for async delete on listening sockets: as they don't + * need current_key/rnext_key maintaining, we don't need to check + * them and we can just free all resources in RCU fashion. + */ + if (del_async) { + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + call_rcu(&key->rcu, tcp_ao_key_free_rcu); + return 0; + } + /* At this moment another CPU could have looked this key up * while it was unlinked from the list. Wait for RCU grace period, * after which the key is off-list and can't be looked up again; * the rx path [just before RCU came] might have used it and set it * as current_key (very unlikely). + * Free the key with next RCU grace period (in case it was + * current_key before tcp_ao_current_rnext() might have + * changed it in forced-delete). */ synchronize_rcu(); if (new_current) @@ -1711,6 +1724,8 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, if (!new_rnext) return -ENOENT; } + if (cmd.del_async && sk->sk_state != TCP_LISTEN) + return -EINVAL; if (family == AF_INET) { struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.addr; @@ -1758,8 +1773,8 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, if (key == new_current || key == new_rnext) continue; - return tcp_ao_delete_key(sk, ao_info, key, - new_current, new_rnext); + return tcp_ao_delete_key(sk, ao_info, cmd.del_async, key, + new_current, new_rnext); } return -ENOENT; } -- cgit v1.2.3 From 67fa83f7c86a86913ab9cd5a13b4bebd8d2ebb43 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:12 +0100 Subject: net/tcp: Add static_key for TCP-AO Similarly to TCP-MD5, add a static key to TCP-AO that is patched out when there are no keys on a machine and dynamically enabled with the first setsockopt(TCP_AO) adds a key on any socket. The static key is as well dynamically disabled later when the socket is destructed. The lifetime of enabled static key here is the same as ao_info: it is enabled on allocation, passed over from full socket to twsk and destructed when ao_info is scheduled for destruction. Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp.h | 24 ++++++++++++++++-------- include/net/tcp_ao.h | 2 ++ net/ipv4/tcp_ao.c | 22 ++++++++++++++++++++++ net/ipv4/tcp_input.c | 42 ++++++++++++++++++++++++++++-------------- net/ipv4/tcp_ipv4.c | 25 ++++++++++++++----------- net/ipv6/tcp_ipv6.c | 25 ++++++++++++++----------- 6 files changed, 96 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 54226d85feb8..e3617c433cf1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2288,14 +2288,18 @@ static inline void tcp_get_current_key(const struct sock *sk, #if defined(CONFIG_TCP_AO) || defined(CONFIG_TCP_MD5SIG) const struct tcp_sock *tp = tcp_sk(sk); #endif -#ifdef CONFIG_TCP_AO - struct tcp_ao_info *ao; - ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held(sk)); - if (ao) { - out->ao_key = READ_ONCE(ao->current_key); - out->type = TCP_KEY_AO; - return; +#ifdef CONFIG_TCP_AO + if (static_branch_unlikely(&tcp_ao_needed.key)) { + struct tcp_ao_info *ao; + + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)); + if (ao) { + out->ao_key = READ_ONCE(ao->current_key); + out->type = TCP_KEY_AO; + return; + } } #endif #ifdef CONFIG_TCP_MD5SIG @@ -2324,7 +2328,8 @@ static inline bool tcp_key_is_md5(const struct tcp_key *key) static inline bool tcp_key_is_ao(const struct tcp_key *key) { #ifdef CONFIG_TCP_AO - if (key->type == TCP_KEY_AO) + if (static_branch_unlikely(&tcp_ao_needed.key) && + key->type == TCP_KEY_AO) return true; #endif return false; @@ -2718,6 +2723,9 @@ static inline bool tcp_ao_required(struct sock *sk, const void *saddr, struct tcp_ao_info *ao_info; struct tcp_ao_key *ao_key; + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return false; + ao_info = rcu_dereference_check(tcp_sk(sk)->ao_info, lockdep_sock_is_held(sk)); if (!ao_info) diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 061c358a3c8a..a38408072ea8 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -151,6 +151,8 @@ do { \ #ifdef CONFIG_TCP_AO /* TCP-AO structures and functions */ +#include +extern struct static_key_false_deferred tcp_ao_needed; struct tcp4_ao_context { __be32 saddr; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index acbeb635fe29..ffce8ca60ff2 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -17,6 +17,8 @@ #include #include +DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_ao_needed, HZ); + int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, unsigned int len, struct tcp_sigpool *hp) { @@ -50,6 +52,9 @@ bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code) bool ignore_icmp = false; struct tcp_ao_info *ao; + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return false; + /* RFC5925, 7.8: * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4 * messages of Type 3 (destination unreachable), Codes 2-4 (protocol @@ -185,6 +190,9 @@ static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, struct tcp_ao_key *key; struct tcp_ao_info *ao; + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return NULL; + ao = rcu_dereference_check(tcp_sk(sk)->ao_info, lockdep_sock_is_held(sk)); if (!ao) @@ -276,6 +284,7 @@ void tcp_ao_destroy_sock(struct sock *sk, bool twsk) } kfree_rcu(ao, rcu); + static_branch_slow_dec_deferred(&tcp_ao_needed); } void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp) @@ -1180,6 +1189,11 @@ int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, goto free_and_exit; } + if (!static_key_fast_inc_not_disabled(&tcp_ao_needed.key.key)) { + ret = -EUSERS; + goto free_and_exit; + } + key_head = rcu_dereference(hlist_first_rcu(&new_ao->head)); first_key = hlist_entry_safe(key_head, struct tcp_ao_key, node); @@ -1607,6 +1621,10 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, tcp_ao_link_mkt(ao_info, key); if (first) { + if (!static_branch_inc(&tcp_ao_needed.key)) { + ret = -EUSERS; + goto err_free_sock; + } sk_gso_disable(sk); rcu_assign_pointer(tcp_sk(sk)->ao_info, ao_info); } @@ -1875,6 +1893,10 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, if (new_rnext) WRITE_ONCE(ao_info->rnext_key, new_rnext); if (first) { + if (!static_branch_inc(&tcp_ao_needed.key)) { + err = -EUSERS; + goto out; + } sk_gso_disable(sk); rcu_assign_pointer(tcp_sk(sk)->ao_info, ao_info); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 67ca72185f14..50aaa1527150 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3572,41 +3572,55 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp, (ack_seq == tp->snd_wl1 && (nwin > tp->snd_wnd || !nwin)); } -/* If we update tp->snd_una, also update tp->bytes_acked */ -static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) +static void tcp_snd_sne_update(struct tcp_sock *tp, u32 ack) { - u32 delta = ack - tp->snd_una; #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao; -#endif - sock_owned_by_me((struct sock *)tp); - tp->bytes_acked += delta; -#ifdef CONFIG_TCP_AO + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return; + ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held((struct sock *)tp)); if (ao && ack < tp->snd_una) ao->snd_sne++; #endif +} + +/* If we update tp->snd_una, also update tp->bytes_acked */ +static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) +{ + u32 delta = ack - tp->snd_una; + + sock_owned_by_me((struct sock *)tp); + tp->bytes_acked += delta; + tcp_snd_sne_update(tp, ack); tp->snd_una = ack; } -/* If we update tp->rcv_nxt, also update tp->bytes_received */ -static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) +static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq) { - u32 delta = seq - tp->rcv_nxt; #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao; -#endif - sock_owned_by_me((struct sock *)tp); - tp->bytes_received += delta; -#ifdef CONFIG_TCP_AO + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return; + ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held((struct sock *)tp)); if (ao && seq < tp->rcv_nxt) ao->rcv_sne++; #endif +} + +/* If we update tp->rcv_nxt, also update tp->bytes_received */ +static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) +{ + u32 delta = seq - tp->rcv_nxt; + + sock_owned_by_me((struct sock *)tp); + tp->bytes_received += delta; + tcp_rcv_sne_update(tp, seq); WRITE_ONCE(tp->rcv_nxt, seq); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8f98c58e2689..18c5595e3814 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1024,18 +1024,20 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; - /* FIXME: the segment to-be-acked is not verified yet */ - ao_info = rcu_dereference(tcptw->ao_info); - if (ao_info) { - const struct tcp_ao_hdr *aoh; + if (static_branch_unlikely(&tcp_ao_needed.key)) { + /* FIXME: the segment to-be-acked is not verified yet */ + ao_info = rcu_dereference(tcptw->ao_info); + if (ao_info) { + const struct tcp_ao_hdr *aoh; + + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) { + inet_twsk_put(tw); + return; + } - if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) { - inet_twsk_put(tw); - return; + if (aoh) + key.ao_key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); } - - if (aoh) - key.ao_key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); } if (key.ao_key) { struct tcp_ao_key *rnext_key; @@ -1081,7 +1083,8 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_sk(sk)->snd_nxt; #ifdef CONFIG_TCP_AO - if (tcp_rsk_used_ao(req)) { + if (static_branch_unlikely(&tcp_ao_needed.key) && + tcp_rsk_used_ao(req)) { const union tcp_md5_addr *addr; const struct tcp_ao_hdr *aoh; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fa7050579e9a..b5936294dba2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1154,17 +1154,19 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; - /* FIXME: the segment to-be-acked is not verified yet */ - ao_info = rcu_dereference(tcptw->ao_info); - if (ao_info) { - const struct tcp_ao_hdr *aoh; + if (static_branch_unlikely(&tcp_ao_needed.key)) { - /* Invalid TCP option size or twice included auth */ - if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) - goto out; - if (aoh) { - key.ao_key = tcp_ao_established_key(ao_info, - aoh->rnext_keyid, -1); + /* FIXME: the segment to-be-acked is not verified yet */ + ao_info = rcu_dereference(tcptw->ao_info); + if (ao_info) { + const struct tcp_ao_hdr *aoh; + + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + goto out; + if (aoh) + key.ao_key = tcp_ao_established_key(ao_info, + aoh->rnext_keyid, -1); } } if (key.ao_key) { @@ -1206,7 +1208,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct tcp_key key = {}; #ifdef CONFIG_TCP_AO - if (tcp_rsk_used_ao(req)) { + if (static_branch_unlikely(&tcp_ao_needed.key) && + tcp_rsk_used_ao(req)) { const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; const struct tcp_ao_hdr *aoh; int l3index; -- cgit v1.2.3 From 248411b8cb8974a1e1c8e43123c1e682fbd64969 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:13 +0100 Subject: net/tcp: Wire up l3index to TCP-AO Similarly how TCP_MD5SIG_FLAG_IFINDEX works for TCP-MD5, TCP_AO_KEYF_IFINDEX is an AO-key flag that binds that MKT to a specified by L3 ifinndex. Similarly, without this flag the key will work in the default VRF l3index = 0 for connections. To prevent AO-keys from overlapping, it's restricted to add key B for a socket that has key A, which have the same sndid/rcvid and one of the following is true: - !(A.keyflags & TCP_AO_KEYF_IFINDEX) or !(B.keyflags & TCP_AO_KEYF_IFINDEX) so that any key is non-bound to a VRF - A.l3index == B.l3index both want to work for the same VRF Additionally, it's restricted to match TCP-MD5 keys for the same peer the following way: |--------------|--------------------|----------------|---------------| | | MD5 key without | MD5 key | MD5 key | | | l3index | l3index=0 | l3index=N | |--------------|--------------------|----------------|---------------| | TCP-AO key | | | | | without | reject | reject | reject | | l3index | | | | |--------------|--------------------|----------------|---------------| | TCP-AO key | | | | | l3index=0 | reject | reject | allow | |--------------|--------------------|----------------|---------------| | TCP-AO key | | | | | l3index=N | reject | allow | reject | |--------------|--------------------|----------------|---------------| This is done with the help of tcp_md5_do_lookup_any_l3index() to reject adding AO key without TCP_AO_KEYF_IFINDEX if there's TCP-MD5 in any VRF. This is important for case where sysctl_tcp_l3mdev_accept = 1 Similarly, for TCP-AO lookups tcp_ao_do_lookup() may be used with l3index < 0, so that __tcp_ao_key_cmp() will match TCP-AO key in any VRF. Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp.h | 11 ++-- include/net/tcp_ao.h | 18 +++--- net/ipv4/syncookies.c | 6 +- net/ipv4/tcp_ao.c | 170 +++++++++++++++++++++++++++++++++++++------------- net/ipv4/tcp_ipv4.c | 10 ++- net/ipv6/syncookies.c | 5 +- net/ipv6/tcp_ao.c | 21 +++---- net/ipv6/tcp_ipv6.c | 15 +++-- 8 files changed, 177 insertions(+), 79 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index e3617c433cf1..d2f0736b76b8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2717,7 +2717,7 @@ static inline int tcp_parse_auth_options(const struct tcphdr *th, } static inline bool tcp_ao_required(struct sock *sk, const void *saddr, - int family, bool stat_inc) + int family, int l3index, bool stat_inc) { #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; @@ -2731,7 +2731,7 @@ static inline bool tcp_ao_required(struct sock *sk, const void *saddr, if (!ao_info) return false; - ao_key = tcp_ao_do_lookup(sk, saddr, family, -1, -1); + ao_key = tcp_ao_do_lookup(sk, l3index, saddr, family, -1, -1); if (ao_info->ao_required || ao_key) { if (stat_inc) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOREQUIRED); @@ -2784,21 +2784,22 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, * the last key is impossible to remove, so there's * always at least one current_key. */ - if (tcp_ao_required(sk, saddr, family, true)) { + if (tcp_ao_required(sk, saddr, family, l3index, true)) { tcp_hash_fail("AO hash is required, but not found", family, skb, "L3 index %d", l3index); return SKB_DROP_REASON_TCP_AONOTFOUND; } if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - tcp_hash_fail("MD5 Hash not found", family, skb, ""); + tcp_hash_fail("MD5 Hash not found", + family, skb, "L3 index %d", l3index); return SKB_DROP_REASON_TCP_MD5NOTFOUND; } return SKB_NOT_DROPPED_YET; } if (aoh) - return tcp_inbound_ao_hash(sk, skb, family, req, aoh); + return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh); return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, l3index, md5_location); diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index a38408072ea8..edd6748b2cfa 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -33,6 +33,7 @@ struct tcp_ao_key { u8 key[TCP_AO_MAXKEYLEN] __tcp_ao_key_align; unsigned int tcp_sigpool_id; unsigned int digest_size; + int l3index; u8 prefixlen; u8 family; u8 keylen; @@ -200,10 +201,10 @@ int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen); int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen); enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, unsigned short int family, - const struct request_sock *req, + const struct request_sock *req, int l3index, const struct tcp_ao_hdr *aoh); u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq); -struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, +struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index, const union tcp_ao_addr *addr, int family, int sndid, int rcvid); int tcp_ao_hash_hdr(unsigned short family, char *ao_hash, @@ -245,9 +246,6 @@ int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, __be32 disn, bool send); int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, struct request_sock *req); -struct tcp_ao_key *tcp_v6_ao_do_lookup(const struct sock *sk, - const struct in6_addr *addr, - int sndid, int rcvid); struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid); struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk, @@ -265,7 +263,7 @@ void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb); void tcp_ao_connect_init(struct sock *sk); void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, struct tcp_request_sock *treq, - unsigned short int family); + unsigned short int family, int l3index); #else /* CONFIG_TCP_AO */ static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, @@ -277,7 +275,7 @@ static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, struct tcp_request_sock *treq, - unsigned short int family) + unsigned short int family, int l3index) { } @@ -289,13 +287,15 @@ static inline bool tcp_ao_ignore_icmp(const struct sock *sk, int family, static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, unsigned short int family, - const struct request_sock *req, const struct tcp_ao_hdr *aoh) + const struct request_sock *req, int l3index, + const struct tcp_ao_hdr *aoh) { return SKB_NOT_DROPPED_YET; } static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, - const union tcp_ao_addr *addr, int family, int sndid, int rcvid) + int l3index, const union tcp_ao_addr *addr, + int family, int sndid, int rcvid) { return NULL; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 0681d3e82b11..98b25e5d147b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -344,6 +344,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) __u8 rcv_wscale; struct flowi4 fl4; u32 tsoff = 0; + int l3index; if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -400,13 +401,14 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack = 0; treq->tfo_listener = false; - tcp_ao_syncookie(sk, skb, treq, AF_INET); - if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; ireq->ir_iif = inet_request_bound_dev_if(sk, skb); + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + tcp_ao_syncookie(sk, skb, treq, AF_INET, l3index); + /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index ffce8ca60ff2..b5ac3e73e1da 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -136,7 +136,7 @@ static int ipv4_prefix_cmp(const struct in_addr *addr1, return memcmp(&a1, &a2, sizeof(a1)); } -static int __tcp_ao_key_cmp(const struct tcp_ao_key *key, +static int __tcp_ao_key_cmp(const struct tcp_ao_key *key, int l3index, const union tcp_ao_addr *addr, u8 prefixlen, int family, int sndid, int rcvid) { @@ -144,6 +144,10 @@ static int __tcp_ao_key_cmp(const struct tcp_ao_key *key, return (key->sndid > sndid) ? 1 : -1; if (rcvid >= 0 && key->rcvid != rcvid) return (key->rcvid > rcvid) ? 1 : -1; + if (l3index >= 0 && (key->keyflags & TCP_AO_KEYF_IFINDEX)) { + if (key->l3index != l3index) + return (key->l3index > l3index) ? 1 : -1; + } if (family == AF_UNSPEC) return 0; @@ -168,7 +172,7 @@ static int __tcp_ao_key_cmp(const struct tcp_ao_key *key, return -1; } -static int tcp_ao_key_cmp(const struct tcp_ao_key *key, +static int tcp_ao_key_cmp(const struct tcp_ao_key *key, int l3index, const union tcp_ao_addr *addr, u8 prefixlen, int family, int sndid, int rcvid) { @@ -176,14 +180,16 @@ static int tcp_ao_key_cmp(const struct tcp_ao_key *key, if (family == AF_INET6 && ipv6_addr_v4mapped(&addr->a6)) { __be32 addr4 = addr->a6.s6_addr32[3]; - return __tcp_ao_key_cmp(key, (union tcp_ao_addr *)&addr4, + return __tcp_ao_key_cmp(key, l3index, + (union tcp_ao_addr *)&addr4, prefixlen, AF_INET, sndid, rcvid); } #endif - return __tcp_ao_key_cmp(key, addr, prefixlen, family, sndid, rcvid); + return __tcp_ao_key_cmp(key, l3index, addr, + prefixlen, family, sndid, rcvid); } -static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, +static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, int l3index, const union tcp_ao_addr *addr, int family, u8 prefix, int sndid, int rcvid) { @@ -201,17 +207,18 @@ static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, hlist_for_each_entry_rcu(key, &ao->head, node) { u8 prefixlen = min(prefix, key->prefixlen); - if (!tcp_ao_key_cmp(key, addr, prefixlen, family, sndid, rcvid)) + if (!tcp_ao_key_cmp(key, l3index, addr, prefixlen, + family, sndid, rcvid)) return key; } return NULL; } -struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, +struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index, const union tcp_ao_addr *addr, int family, int sndid, int rcvid) { - return __tcp_ao_do_lookup(sk, addr, family, U8_MAX, sndid, rcvid); + return __tcp_ao_do_lookup(sk, l3index, addr, family, U8_MAX, sndid, rcvid); } static struct tcp_ao_info *tcp_ao_alloc_info(gfp_t flags) @@ -677,18 +684,22 @@ struct tcp_ao_key *tcp_v4_ao_lookup_rsk(const struct sock *sk, struct request_sock *req, int sndid, int rcvid) { - union tcp_ao_addr *addr = - (union tcp_ao_addr *)&inet_rsk(req)->ir_rmt_addr; + struct inet_request_sock *ireq = inet_rsk(req); + union tcp_ao_addr *addr = (union tcp_ao_addr *)&ireq->ir_rmt_addr; + int l3index; - return tcp_ao_do_lookup(sk, addr, AF_INET, sndid, rcvid); + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + return tcp_ao_do_lookup(sk, l3index, addr, AF_INET, sndid, rcvid); } struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid) { + int l3index = l3mdev_master_ifindex_by_index(sock_net(sk), + addr_sk->sk_bound_dev_if); union tcp_ao_addr *addr = (union tcp_ao_addr *)&addr_sk->sk_daddr; - return tcp_ao_do_lookup(sk, addr, AF_INET, sndid, rcvid); + return tcp_ao_do_lookup(sk, l3index, addr, AF_INET, sndid, rcvid); } int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, @@ -738,7 +749,8 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, ao_info = rcu_dereference(tcp_sk(sk)->ao_info); if (!ao_info) return -ENOENT; - *key = tcp_ao_do_lookup(sk, addr, family, -1, aoh->rnext_keyid); + *key = tcp_ao_do_lookup(sk, l3index, addr, family, + -1, aoh->rnext_keyid); if (!*key) return -ENOENT; *traffic_key = kmalloc(tcp_ao_digest_size(*key), GFP_ATOMIC); @@ -814,24 +826,26 @@ int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, static struct tcp_ao_key *tcp_ao_inbound_lookup(unsigned short int family, const struct sock *sk, const struct sk_buff *skb, - int sndid, int rcvid) + int sndid, int rcvid, int l3index) { if (family == AF_INET) { const struct iphdr *iph = ip_hdr(skb); - return tcp_ao_do_lookup(sk, (union tcp_ao_addr *)&iph->saddr, - AF_INET, sndid, rcvid); + return tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)&iph->saddr, + AF_INET, sndid, rcvid); } else { const struct ipv6hdr *iph = ipv6_hdr(skb); - return tcp_ao_do_lookup(sk, (union tcp_ao_addr *)&iph->saddr, - AF_INET6, sndid, rcvid); + return tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)&iph->saddr, + AF_INET6, sndid, rcvid); } } void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, struct tcp_request_sock *treq, - unsigned short int family) + unsigned short int family, int l3index) { const struct tcphdr *th = tcp_hdr(skb); const struct tcp_ao_hdr *aoh; @@ -842,7 +856,7 @@ void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh) return; - key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid); + key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index); if (!key) /* Key not found, continue without TCP-AO */ return; @@ -856,7 +870,7 @@ static enum skb_drop_reason tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, unsigned short int family, struct tcp_ao_info *info, const struct tcp_ao_hdr *aoh, struct tcp_ao_key *key, - u8 *traffic_key, u8 *phash, u32 sne) + u8 *traffic_key, u8 *phash, u32 sne, int l3index) { u8 maclen = aoh->length - sizeof(struct tcp_ao_hdr); const struct tcphdr *th = tcp_hdr(skb); @@ -867,7 +881,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad); tcp_hash_fail("AO hash wrong length", family, skb, - "%u != %d", maclen, tcp_ao_maclen(key)); + "%u != %d L3index: %d", maclen, + tcp_ao_maclen(key), l3index); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -882,7 +897,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad); - tcp_hash_fail("AO hash mismatch", family, skb, ""); + tcp_hash_fail("AO hash mismatch", family, skb, + "L3index: %d", l3index); kfree(hash_buf); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -896,7 +912,7 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, unsigned short int family, const struct request_sock *req, - const struct tcp_ao_hdr *aoh) + int l3index, const struct tcp_ao_hdr *aoh) { const struct tcphdr *th = tcp_hdr(skb); u8 *phash = (u8 *)(aoh + 1); /* hash goes just after the header */ @@ -911,7 +927,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, if (!info) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); tcp_hash_fail("AO key not found", family, skb, - "keyid: %u", aoh->keyid); + "keyid: %u L3index: %d", aoh->keyid, l3index); return SKB_DROP_REASON_TCP_AOUNEXPECTED; } @@ -945,7 +961,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, /* Established socket, traffic key are cached */ traffic_key = rcv_other_key(key); err = tcp_ao_verify_hash(sk, skb, family, info, aoh, key, - traffic_key, phash, sne); + traffic_key, phash, sne, l3index); if (err) return err; current_key = READ_ONCE(info->current_key); @@ -966,7 +982,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, * - request sockets would race on those key pointers * - tcp_ao_del_cmd() allows async key removal */ - key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid); + key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index); if (!key) goto key_not_found; @@ -1006,7 +1022,7 @@ verify_hash: return SKB_DROP_REASON_NOT_SPECIFIED; tcp_ao_calc_key_skb(key, traffic_key, skb, sisn, disn, family); ret = tcp_ao_verify_hash(sk, skb, family, info, aoh, key, - traffic_key, phash, sne); + traffic_key, phash, sne, l3index); kfree(traffic_key); return ret; @@ -1014,7 +1030,7 @@ key_not_found: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); atomic64_inc(&info->counters.key_not_found); tcp_hash_fail("Requested by the peer AO key id not found", - family, skb, ""); + family, skb, "L3index: %d", l3index); return SKB_DROP_REASON_TCP_AOKEYNOTFOUND; } @@ -1042,7 +1058,7 @@ void tcp_ao_connect_init(struct sock *sk) struct tcp_ao_info *ao_info; union tcp_ao_addr *addr; struct tcp_ao_key *key; - int family; + int family, l3index; ao_info = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held(sk)); @@ -1059,9 +1075,11 @@ void tcp_ao_connect_init(struct sock *sk) #endif else return; + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), + sk->sk_bound_dev_if); hlist_for_each_entry_rcu(key, &ao_info->head, node) { - if (!tcp_ao_key_cmp(key, addr, key->prefixlen, family, -1, -1)) + if (!tcp_ao_key_cmp(key, l3index, addr, key->prefixlen, family, -1, -1)) continue; if (key == ao_info->current_key) @@ -1134,9 +1152,9 @@ int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, struct tcp_ao_key *key, *new_key, *first_key; struct tcp_ao_info *new_ao, *ao; struct hlist_node *key_head; + int l3index, ret = -ENOMEM; union tcp_ao_addr *addr; bool match = false; - int ret = -ENOMEM; ao = rcu_dereference(tcp_sk(sk)->ao_info); if (!ao) @@ -1164,9 +1182,11 @@ int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, ret = -EAFNOSUPPORT; goto free_ao; } + l3index = l3mdev_master_ifindex_by_index(sock_net(newsk), + newsk->sk_bound_dev_if); hlist_for_each_entry_rcu(key, &ao->head, node) { - if (tcp_ao_key_cmp(key, addr, key->prefixlen, family, -1, -1)) + if (tcp_ao_key_cmp(key, l3index, addr, key->prefixlen, family, -1, -1)) continue; new_key = tcp_ao_copy_key(newsk, key); @@ -1470,7 +1490,8 @@ static struct tcp_ao_info *setsockopt_ao_info(struct sock *sk) return ERR_PTR(-ESOCKTNOSUPPORT); } -#define TCP_AO_KEYF_ALL (TCP_AO_KEYF_EXCLUDE_OPT) +#define TCP_AO_KEYF_ALL (TCP_AO_KEYF_IFINDEX | TCP_AO_KEYF_EXCLUDE_OPT) +#define TCP_AO_GET_KEYF_VALID (TCP_AO_KEYF_IFINDEX) static struct tcp_ao_key *tcp_ao_key_alloc(struct sock *sk, struct tcp_ao_add *cmd) @@ -1534,8 +1555,8 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, union tcp_ao_addr *addr; struct tcp_ao_key *key; struct tcp_ao_add cmd; + int ret, l3index = 0; bool first = false; - int ret; if (optlen < sizeof(cmd)) return -EINVAL; @@ -1565,9 +1586,46 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, return -EINVAL; } + if (cmd.ifindex && !(cmd.keyflags & TCP_AO_KEYF_IFINDEX)) + return -EINVAL; + + /* For cmd.tcp_ifindex = 0 the key will apply to the default VRF */ + if (cmd.keyflags & TCP_AO_KEYF_IFINDEX && cmd.ifindex) { + int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + struct net_device *dev; + + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), cmd.ifindex); + if (dev && netif_is_l3_master(dev)) + l3index = dev->ifindex; + rcu_read_unlock(); + + if (!dev || !l3index) + return -EINVAL; + + /* It's still possible to bind after adding keys or even + * re-bind to a different dev (with CAP_NET_RAW). + * So, no reason to return error here, rather try to be + * nice and warn the user. + */ + if (bound_dev_if && bound_dev_if != cmd.ifindex) + net_warn_ratelimited("AO key ifindex %d != sk bound ifindex %d\n", + cmd.ifindex, bound_dev_if); + } + /* Don't allow keys for peers that have a matching TCP-MD5 key */ - if (tcp_md5_do_lookup_any_l3index(sk, addr, family)) - return -EKEYREJECTED; + if (cmd.keyflags & TCP_AO_KEYF_IFINDEX) { + /* Non-_exact version of tcp_md5_do_lookup() will + * as well match keys that aren't bound to a specific VRF + * (that will make them match AO key with + * sysctl_tcp_l3dev_accept = 1 + */ + if (tcp_md5_do_lookup(sk, l3index, addr, family)) + return -EKEYREJECTED; + } else { + if (tcp_md5_do_lookup_any_l3index(sk, addr, family)) + return -EKEYREJECTED; + } ao_info = setsockopt_ao_info(sk); if (IS_ERR(ao_info)) @@ -1584,10 +1642,9 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, * > The IDs of MKTs MUST NOT overlap where their * > TCP connection identifiers overlap. */ - if (__tcp_ao_do_lookup(sk, addr, family, - cmd.prefix, -1, cmd.rcvid)) + if (__tcp_ao_do_lookup(sk, l3index, addr, family, cmd.prefix, -1, cmd.rcvid)) return -EEXIST; - if (__tcp_ao_do_lookup(sk, addr, family, + if (__tcp_ao_do_lookup(sk, l3index, addr, family, cmd.prefix, cmd.sndid, -1)) return -EEXIST; } @@ -1606,6 +1663,7 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, key->keyflags = cmd.keyflags; key->sndid = cmd.sndid; key->rcvid = cmd.rcvid; + key->l3index = l3index; atomic64_set(&key->pkt_good, 0); atomic64_set(&key->pkt_bad, 0); @@ -1694,17 +1752,17 @@ add_key: return err; } +#define TCP_AO_DEL_KEYF_ALL (TCP_AO_KEYF_IFINDEX) static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, sockptr_t optval, int optlen) { struct tcp_ao_key *key, *new_current = NULL, *new_rnext = NULL; + int err, addr_len, l3index = 0; struct tcp_ao_info *ao_info; union tcp_ao_addr *addr; struct tcp_ao_del cmd; - int addr_len; __u8 prefix; u16 port; - int err; if (optlen < sizeof(cmd)) return -EINVAL; @@ -1721,6 +1779,17 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, return -EINVAL; } + if (cmd.keyflags & ~TCP_AO_DEL_KEYF_ALL) + return -EINVAL; + + /* No sanity check for TCP_AO_KEYF_IFINDEX as if a VRF + * was destroyed, there still should be a way to delete keys, + * that were bound to that l3intf. So, fail late at lookup stage + * if there is no key for that ifindex. + */ + if (cmd.ifindex && !(cmd.keyflags & TCP_AO_KEYF_IFINDEX)) + return -EINVAL; + ao_info = setsockopt_ao_info(sk); if (IS_ERR(ao_info)) return PTR_ERR(ao_info); @@ -1788,6 +1857,13 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, memcmp(addr, &key->addr, addr_len)) continue; + if ((cmd.keyflags & TCP_AO_KEYF_IFINDEX) != + (key->keyflags & TCP_AO_KEYF_IFINDEX)) + continue; + + if (key->l3index != l3index) + continue; + if (key == new_current || key == new_rnext) continue; @@ -1973,10 +2049,10 @@ static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info, struct tcp_ao_key *key, *current_key; bool do_address_matching = true; union tcp_ao_addr *addr = NULL; + int err, l3index, user_len; unsigned int max_keys; /* maximum number of keys to copy to user */ size_t out_offset = 0; size_t bytes_to_write; /* number of bytes to write to user level */ - int err, user_len; u32 matched_keys; /* keys from ao_info matched so far */ int optlen_out; __be16 port = 0; @@ -1995,11 +2071,16 @@ static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info, if (opt_in.pkt_good || opt_in.pkt_bad) return -EINVAL; + if (opt_in.keyflags & ~TCP_AO_GET_KEYF_VALID) + return -EINVAL; + if (opt_in.ifindex && !(opt_in.keyflags & TCP_AO_KEYF_IFINDEX)) + return -EINVAL; if (opt_in.reserved != 0) return -EINVAL; max_keys = opt_in.nkeys; + l3index = (opt_in.keyflags & TCP_AO_KEYF_IFINDEX) ? opt_in.ifindex : -1; if (opt_in.get_all || opt_in.is_current || opt_in.is_rnext) { if (opt_in.get_all && (opt_in.is_current || opt_in.is_rnext)) @@ -2101,7 +2182,7 @@ static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info, continue; } - if (tcp_ao_key_cmp(key, addr, opt_in.prefix, + if (tcp_ao_key_cmp(key, l3index, addr, opt_in.prefix, opt_in.addr.ss_family, opt_in.sndid, opt_in.rcvid) != 0) continue; @@ -2134,6 +2215,7 @@ match: opt_out.nkeys = 0; opt_out.maclen = key->maclen; opt_out.keylen = key->keylen; + opt_out.ifindex = key->l3index; opt_out.pkt_good = atomic64_read(&key->pkt_good); opt_out.pkt_bad = atomic64_read(&key->pkt_bad); memcpy(&opt_out.key, key->key, key->keylen); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 18c5595e3814..5f693bbd578d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1087,6 +1087,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk_used_ao(req)) { const union tcp_md5_addr *addr; const struct tcp_ao_hdr *aoh; + int l3index; /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) @@ -1095,11 +1096,12 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, return; addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; - key.ao_key = tcp_ao_do_lookup(sk, addr, AF_INET, + l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; + key.ao_key = tcp_ao_do_lookup(sk, l3index, addr, AF_INET, aoh->rnext_keyid, -1); if (unlikely(!key.ao_key)) { /* Send ACK with any matching MKT for the peer */ - key.ao_key = tcp_ao_do_lookup(sk, addr, AF_INET, -1, -1); + key.ao_key = tcp_ao_do_lookup(sk, l3index, addr, AF_INET, -1, -1); /* Matching key disappeared (user removed the key?) * let the handshake timeout. */ @@ -1493,6 +1495,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, const union tcp_md5_addr *addr; u8 prefixlen = 32; int l3index = 0; + bool l3flag; u8 flags; if (optlen < sizeof(cmd)) @@ -1505,6 +1508,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, return -EINVAL; flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; + l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { @@ -1542,7 +1546,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, /* Don't allow keys for peers that have a matching TCP-AO key. * See the comment in tcp_ao_add_cmd() */ - if (tcp_ao_required(sk, addr, AF_INET, false)) + if (tcp_ao_required(sk, addr, AF_INET, l3flag ? l3index : -1, false)) return -EKEYREJECTED; return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ad7a8caa7b2a..500f6ed3b8cf 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -140,6 +140,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst; __u8 rcv_wscale; u32 tsoff = 0; + int l3index; if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -214,7 +215,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->snt_isn = cookie; treq->ts_off = 0; treq->txhash = net_tx_rndhash(); - tcp_ao_syncookie(sk, skb, treq, AF_INET6); + + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index); if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c index 8b04611c9078..3c09ac26206e 100644 --- a/net/ipv6/tcp_ao.c +++ b/net/ipv6/tcp_ao.c @@ -87,30 +87,29 @@ int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, htonl(tcp_rsk(req)->rcv_isn)); } -struct tcp_ao_key *tcp_v6_ao_do_lookup(const struct sock *sk, - const struct in6_addr *addr, - int sndid, int rcvid) -{ - return tcp_ao_do_lookup(sk, (union tcp_ao_addr *)addr, AF_INET6, - sndid, rcvid); -} - struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, struct sock *addr_sk, int sndid, int rcvid) { + int l3index = l3mdev_master_ifindex_by_index(sock_net(sk), + addr_sk->sk_bound_dev_if); struct in6_addr *addr = &addr_sk->sk_v6_daddr; - return tcp_v6_ao_do_lookup(sk, addr, sndid, rcvid); + return tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, + AF_INET6, sndid, rcvid); } struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk, struct request_sock *req, int sndid, int rcvid) { - struct in6_addr *addr = &inet_rsk(req)->ir_v6_rmt_addr; + struct inet_request_sock *ireq = inet_rsk(req); + struct in6_addr *addr = &ireq->ir_v6_rmt_addr; + int l3index; - return tcp_v6_ao_do_lookup(sk, addr, sndid, rcvid); + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + return tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, + AF_INET6, sndid, rcvid); } int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b5936294dba2..937a02c2e534 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -610,6 +610,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, union tcp_ao_addr *addr; int l3index = 0; u8 prefixlen; + bool l3flag; u8 flags; if (optlen < sizeof(cmd)) @@ -622,6 +623,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, return -EINVAL; flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; + l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { @@ -668,7 +670,8 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, /* Don't allow keys for peers that have a matching TCP-AO key. * See the comment in tcp_ao_add_cmd() */ - if (tcp_ao_required(sk, addr, AF_INET, false)) + if (tcp_ao_required(sk, addr, AF_INET, + l3flag ? l3index : -1, false)) return -EKEYREJECTED; return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, @@ -680,7 +683,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, /* Don't allow keys for peers that have a matching TCP-AO key. * See the comment in tcp_ao_add_cmd() */ - if (tcp_ao_required(sk, addr, AF_INET6, false)) + if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) return -EKEYREJECTED; return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, @@ -1220,10 +1223,14 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, return; if (!aoh) return; - key.ao_key = tcp_v6_ao_do_lookup(sk, addr, aoh->rnext_keyid, -1); + key.ao_key = tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)addr, + AF_INET6, aoh->rnext_keyid, -1); if (unlikely(!key.ao_key)) { /* Send ACK with any matching MKT for the peer */ - key.ao_key = tcp_v6_ao_do_lookup(sk, addr, -1, -1); + key.ao_key = tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)addr, + AF_INET6, -1, -1); /* Matching key disappeared (user removed the key?) * let the handshake timeout. */ -- cgit v1.2.3 From faadfaba5e018ca0f9595f17115ff48416b7b85e Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 23 Oct 2023 20:22:14 +0100 Subject: net/tcp: Add TCP_AO_REPAIR Add TCP_AO_REPAIR setsockopt(), getsockopt(). They let a user to repair TCP-AO ISNs/SNEs. Also let the user hack around when (tp->repair) is on and add ao_info on a socket in any supported state. As SNEs now can be read/written at any moment, use WRITE_ONCE()/READ_ONCE() to set/read them. Signed-off-by: Dmitry Safonov Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 14 ++++++++ include/uapi/linux/tcp.h | 8 +++++ net/ipv4/tcp.c | 24 +++++++++---- net/ipv4/tcp_ao.c | 90 +++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 125 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index edd6748b2cfa..a375a171ef3c 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -199,6 +199,8 @@ void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp); bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code); int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen); int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen); +int tcp_ao_get_repair(struct sock *sk, sockptr_t optval, sockptr_t optlen); +int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen); enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, unsigned short int family, const struct request_sock *req, int l3index, @@ -330,6 +332,18 @@ static inline int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockpt { return -ENOPROTOOPT; } + +static inline int tcp_ao_get_repair(struct sock *sk, + sockptr_t optval, sockptr_t optlen) +{ + return -ENOPROTOOPT; +} + +static inline int tcp_ao_set_repair(struct sock *sk, + sockptr_t optval, unsigned int optlen) +{ + return -ENOPROTOOPT; +} #endif #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index be34d7c5c531..c07e9f90c084 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -133,6 +133,7 @@ enum { #define TCP_AO_DEL_KEY 39 /* Delete MKT */ #define TCP_AO_INFO 40 /* Set/list TCP-AO per-socket options */ #define TCP_AO_GET_KEYS 41 /* List MKT(s) */ +#define TCP_AO_REPAIR 42 /* Get/Set SNEs and ISNs */ #define TCP_REPAIR_ON 1 #define TCP_REPAIR_OFF 0 @@ -458,6 +459,13 @@ struct tcp_ao_getsockopt { /* getsockopt(TCP_AO_GET_KEYS) */ __u64 pkt_bad; /* out: segments that failed verification */ } __attribute__((aligned(8))); +struct tcp_ao_repair { /* {s,g}etsockopt(TCP_AO_REPAIR) */ + __be32 snt_isn; + __be32 rcv_isn; + __u32 snd_sne; + __u32 rcv_sne; +} __attribute__((aligned(8))); + /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ #define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1 diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ce33eee9d0f2..53bcc17c91e4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3593,20 +3593,28 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, __tcp_sock_set_quickack(sk, val); break; + case TCP_AO_REPAIR: + err = tcp_ao_set_repair(sk, optval, optlen); + break; #ifdef CONFIG_TCP_AO case TCP_AO_ADD_KEY: case TCP_AO_DEL_KEY: case TCP_AO_INFO: { /* If this is the first TCP-AO setsockopt() on the socket, - * sk_state has to be LISTEN or CLOSE + * sk_state has to be LISTEN or CLOSE. Allow TCP_REPAIR + * in any state. */ - if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) || - rcu_dereference_protected(tcp_sk(sk)->ao_info, + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) + goto ao_parse; + if (rcu_dereference_protected(tcp_sk(sk)->ao_info, lockdep_sock_is_held(sk))) - err = tp->af_specific->ao_parse(sk, optname, optval, - optlen); - else - err = -EISCONN; + goto ao_parse; + if (tp->repair) + goto ao_parse; + err = -EISCONN; + break; +ao_parse: + err = tp->af_specific->ao_parse(sk, optname, optval, optlen); break; } #endif @@ -4284,6 +4292,8 @@ zerocopy_rcv_out: return err; } #endif + case TCP_AO_REPAIR: + return tcp_ao_get_repair(sk, optval, optlen); case TCP_AO_GET_KEYS: case TCP_AO_INFO: { int err; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index b5ac3e73e1da..6a845e906a1d 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1490,6 +1490,16 @@ static struct tcp_ao_info *setsockopt_ao_info(struct sock *sk) return ERR_PTR(-ESOCKTNOSUPPORT); } +static struct tcp_ao_info *getsockopt_ao_info(struct sock *sk) +{ + if (sk_fullsock(sk)) + return rcu_dereference(tcp_sk(sk)->ao_info); + else if (sk->sk_state == TCP_TIME_WAIT) + return rcu_dereference(tcp_twsk(sk)->ao_info); + + return ERR_PTR(-ESOCKTNOSUPPORT); +} + #define TCP_AO_KEYF_ALL (TCP_AO_KEYF_IFINDEX | TCP_AO_KEYF_EXCLUDE_OPT) #define TCP_AO_GET_KEYF_VALID (TCP_AO_KEYF_IFINDEX) @@ -1671,11 +1681,13 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, if (ret < 0) goto err_free_sock; - /* Change this condition if we allow adding keys in states - * like close_wait, syn_sent or fin_wait... - */ - if (sk->sk_state == TCP_ESTABLISHED) + if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) { tcp_ao_cache_traffic_keys(sk, ao_info, key); + if (first) { + ao_info->current_key = key; + ao_info->rnext_key = key; + } + } tcp_ao_link_mkt(ao_info, key); if (first) { @@ -1926,6 +1938,8 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, if (IS_ERR(ao_info)) return PTR_ERR(ao_info); if (!ao_info) { + if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) + return -EINVAL; ao_info = tcp_ao_alloc_info(GFP_KERNEL); if (!ao_info) return -ENOMEM; @@ -2308,3 +2322,71 @@ int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen) return 0; } +int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_ao_repair cmd; + struct tcp_ao_key *key; + struct tcp_ao_info *ao; + int err; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen); + if (err) + return err; + + if (!tp->repair) + return -EPERM; + + ao = setsockopt_ao_info(sk); + if (IS_ERR(ao)) + return PTR_ERR(ao); + if (!ao) + return -ENOENT; + + WRITE_ONCE(ao->lisn, cmd.snt_isn); + WRITE_ONCE(ao->risn, cmd.rcv_isn); + WRITE_ONCE(ao->snd_sne, cmd.snd_sne); + WRITE_ONCE(ao->rcv_sne, cmd.rcv_sne); + + hlist_for_each_entry_rcu(key, &ao->head, node) + tcp_ao_cache_traffic_keys(sk, ao, key); + + return 0; +} + +int tcp_ao_get_repair(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_ao_repair opt; + struct tcp_ao_info *ao; + int len; + + if (copy_from_sockptr(&len, optlen, sizeof(int))) + return -EFAULT; + + if (len <= 0) + return -EINVAL; + + if (!tp->repair) + return -EPERM; + + rcu_read_lock(); + ao = getsockopt_ao_info(sk); + if (IS_ERR_OR_NULL(ao)) { + rcu_read_unlock(); + return ao ? PTR_ERR(ao) : -ENOENT; + } + + opt.snt_isn = ao->lisn; + opt.rcv_isn = ao->risn; + opt.snd_sne = READ_ONCE(ao->snd_sne); + opt.rcv_sne = READ_ONCE(ao->rcv_sne); + rcu_read_unlock(); + + if (copy_to_sockptr(optval, &opt, min_t(int, len, sizeof(opt)))) + return -EFAULT; + return 0; +} -- cgit v1.2.3 From b9109b5b77f0cb437fe9fd5575e29e944c0b2580 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 25 Oct 2023 15:30:08 +0300 Subject: bridge: mcast: Dump MDB entries even when snooping is disabled Currently, the bridge driver does not dump MDB entries when multicast snooping is disabled although the entries are present in the kernel: # bridge mdb add dev br0 port swp1 grp 239.1.1.1 permanent # bridge mdb show dev br0 dev br0 port swp1 grp 239.1.1.1 permanent dev br0 port br0 grp ff02::6a temp dev br0 port br0 grp ff02::1:ff9d:e61b temp # ip link set dev br0 type bridge mcast_snooping 0 # bridge mdb show dev br0 # ip link set dev br0 type bridge mcast_snooping 1 # bridge mdb show dev br0 dev br0 port swp1 grp 239.1.1.1 permanent dev br0 port br0 grp ff02::6a temp dev br0 port br0 grp ff02::1:ff9d:e61b temp This behavior differs from other netlink dump interfaces that dump entries regardless if they are used or not. For example, VLANs are dumped even when VLAN filtering is disabled: # ip link set dev br0 type bridge vlan_filtering 0 # bridge vlan show dev swp1 port vlan-id swp1 1 PVID Egress Untagged Remove the check and always dump MDB entries: # bridge mdb add dev br0 port swp1 grp 239.1.1.1 permanent # bridge mdb show dev br0 dev br0 port swp1 grp 239.1.1.1 permanent dev br0 port br0 grp ff02::6a temp dev br0 port br0 grp ff02::1:ffeb:1a4d temp # ip link set dev br0 type bridge mcast_snooping 0 # bridge mdb show dev br0 dev br0 port swp1 grp 239.1.1.1 permanent dev br0 port br0 grp ff02::6a temp dev br0 port br0 grp ff02::1:ffeb:1a4d temp # ip link set dev br0 type bridge mcast_snooping 1 # bridge mdb show dev br0 dev br0 port swp1 grp 239.1.1.1 permanent dev br0 port br0 grp ff02::6a temp dev br0 port br0 grp ff02::1:ffeb:1a4d temp Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 7305f5f8215c..fb58bb1b60e8 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -323,9 +323,6 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, struct net_bridge_mdb_entry *mp; struct nlattr *nest, *nest2; - if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) - return 0; - nest = nla_nest_start_noflag(skb, MDBA_MDB); if (nest == NULL) return -EMSGSIZE; -- cgit v1.2.3 From 1b6d993509c13d180b2a9fbfe0ebc48e344348df Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 25 Oct 2023 15:30:09 +0300 Subject: bridge: mcast: Account for missing attributes The 'MDBA_MDB' and 'MDBA_MDB_ENTRY' nest attributes are not accounted for when calculating the size of MDB notifications. Add them along with comments for existing attributes. Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index fb58bb1b60e8..08de94bffc12 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -452,11 +452,18 @@ cancel: static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg) { - size_t nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + - nla_total_size(sizeof(struct br_mdb_entry)) + - nla_total_size(sizeof(u32)); struct net_bridge_group_src *ent; - size_t addr_size = 0; + size_t nlmsg_size, addr_size = 0; + + nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + + /* MDBA_MDB */ + nla_total_size(0) + + /* MDBA_MDB_ENTRY */ + nla_total_size(0) + + /* MDBA_MDB_ENTRY_INFO */ + nla_total_size(sizeof(struct br_mdb_entry)) + + /* MDBA_MDB_EATTR_TIMER */ + nla_total_size(sizeof(u32)); if (!pg) goto out; -- cgit v1.2.3 From 62ef9cba98a2e401b1e8b5dedcc56b735031e744 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 25 Oct 2023 15:30:10 +0300 Subject: bridge: mcast: Factor out a helper for PG entry size calculation Currently, netlink notifications are sent for individual port group entries and not for the entire MDB entry itself. Subsequent patches are going to add MDB get support which will require the bridge driver to reply with an entire MDB entry. Therefore, as a preparation, factor out an helper to calculate the size of an individual port group entry. When determining the size of the reply this helper will be invoked for each port group entry in the MDB entry. No functional changes intended. Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 08de94bffc12..42983f6a0abd 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -450,18 +450,13 @@ cancel: return -EMSGSIZE; } -static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg) +static size_t rtnl_mdb_nlmsg_pg_size(const struct net_bridge_port_group *pg) { struct net_bridge_group_src *ent; size_t nlmsg_size, addr_size = 0; - nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + - /* MDBA_MDB */ - nla_total_size(0) + - /* MDBA_MDB_ENTRY */ - nla_total_size(0) + /* MDBA_MDB_ENTRY_INFO */ - nla_total_size(sizeof(struct br_mdb_entry)) + + nlmsg_size = nla_total_size(sizeof(struct br_mdb_entry)) + /* MDBA_MDB_EATTR_TIMER */ nla_total_size(sizeof(u32)); @@ -511,6 +506,17 @@ out: return nlmsg_size; } +static size_t rtnl_mdb_nlmsg_size(const struct net_bridge_port_group *pg) +{ + return NLMSG_ALIGN(sizeof(struct br_port_msg)) + + /* MDBA_MDB */ + nla_total_size(0) + + /* MDBA_MDB_ENTRY */ + nla_total_size(0) + + /* Port group entry */ + rtnl_mdb_nlmsg_pg_size(pg); +} + void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, -- cgit v1.2.3 From 6d0259dd6c533e4ccc41b40075c1bdfd0f1efbd7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 25 Oct 2023 15:30:11 +0300 Subject: bridge: mcast: Rename MDB entry get function The current name is going to conflict with the upcoming net device operation for the MDB get operation. Rename the function to br_mdb_entry_skb_get(). No functional changes intended. Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_device.c | 2 +- net/bridge/br_input.c | 2 +- net/bridge/br_multicast.c | 5 +++-- net/bridge/br_private.h | 10 ++++++---- 4 files changed, 11 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 9a5ea06236bd..d624710b384a 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -92,7 +92,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } - mdst = br_mdb_get(brmctx, skb, vid); + mdst = br_mdb_entry_skb_get(brmctx, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) br_multicast_flood(mdst, skb, brmctx, false, true); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index c729528b5e85..f21097e73482 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -175,7 +175,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb switch (pkt_type) { case BR_PKT_MULTICAST: - mdst = br_mdb_get(brmctx, skb, vid); + mdst = br_mdb_entry_skb_get(brmctx, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) { if ((mdst && mdst->host_joined) || diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 96d1fc78dd39..d7d021af1029 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -145,8 +145,9 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br, } #endif -struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx, - struct sk_buff *skb, u16 vid) +struct net_bridge_mdb_entry * +br_mdb_entry_skb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, + u16 vid) { struct net_bridge *br = brmctx->br; struct br_ip ip; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 27a7a06660f3..40bbcd9f63b5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -955,8 +955,9 @@ int br_multicast_rcv(struct net_bridge_mcast **brmctx, struct net_bridge_mcast_port **pmctx, struct net_bridge_vlan *vlan, struct sk_buff *skb, u16 vid); -struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx, - struct sk_buff *skb, u16 vid); +struct net_bridge_mdb_entry * +br_mdb_entry_skb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, + u16 vid); int br_multicast_add_port(struct net_bridge_port *port); void br_multicast_del_port(struct net_bridge_port *port); void br_multicast_enable_port(struct net_bridge_port *port); @@ -1345,8 +1346,9 @@ static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx, return 0; } -static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx, - struct sk_buff *skb, u16 vid) +static inline struct net_bridge_mdb_entry * +br_mdb_entry_skb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, + u16 vid) { return NULL; } -- cgit v1.2.3 From 68b380a395a72ace8b77463f6cd2d7fd6dcb5a1b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 25 Oct 2023 15:30:16 +0300 Subject: bridge: mcast: Add MDB get support Implement support for MDB get operation by looking up a matching MDB entry, allocating the skb according to the entry's size and then filling in the response. The operation is performed under the bridge multicast lock to ensure that the entry does not change between the time the reply size is determined and when the reply is filled in. Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_device.c | 1 + net/bridge/br_mdb.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++ net/bridge/br_private.h | 9 +++ 3 files changed, 168 insertions(+) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index d624710b384a..8f40de3af154 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -472,6 +472,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_mdb_add = br_mdb_add, .ndo_mdb_del = br_mdb_del, .ndo_mdb_dump = br_mdb_dump, + .ndo_mdb_get = br_mdb_get, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, .ndo_bridge_dellink = br_dellink, diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 42983f6a0abd..8cc526067bc2 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -1411,3 +1411,161 @@ int br_mdb_del(struct net_device *dev, struct nlattr *tb[], br_mdb_config_fini(&cfg); return err; } + +static const struct nla_policy br_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = { + [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, + sizeof(struct in_addr), + sizeof(struct in6_addr)), +}; + +static int br_mdb_get_parse(struct net_device *dev, struct nlattr *tb[], + struct br_ip *group, struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(tb[MDBA_GET_ENTRY]); + struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; + int err; + + if (!tb[MDBA_GET_ENTRY_ATTRS]) { + __mdb_entry_to_br_ip(entry, group, NULL); + return 0; + } + + err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, + tb[MDBA_GET_ENTRY_ATTRS], br_mdbe_attrs_get_pol, + extack); + if (err) + return err; + + if (mdbe_attrs[MDBE_ATTR_SOURCE] && + !is_valid_mdb_source(mdbe_attrs[MDBE_ATTR_SOURCE], + entry->addr.proto, extack)) + return -EINVAL; + + __mdb_entry_to_br_ip(entry, group, mdbe_attrs); + + return 0; +} + +static struct sk_buff * +br_mdb_get_reply_alloc(const struct net_bridge_mdb_entry *mp) +{ + struct net_bridge_port_group *pg; + size_t nlmsg_size; + + nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + + /* MDBA_MDB */ + nla_total_size(0) + + /* MDBA_MDB_ENTRY */ + nla_total_size(0); + + if (mp->host_joined) + nlmsg_size += rtnl_mdb_nlmsg_pg_size(NULL); + + for (pg = mlock_dereference(mp->ports, mp->br); pg; + pg = mlock_dereference(pg->next, mp->br)) + nlmsg_size += rtnl_mdb_nlmsg_pg_size(pg); + + return nlmsg_new(nlmsg_size, GFP_ATOMIC); +} + +static int br_mdb_get_reply_fill(struct sk_buff *skb, + struct net_bridge_mdb_entry *mp, u32 portid, + u32 seq) +{ + struct nlattr *mdb_nest, *mdb_entry_nest; + struct net_bridge_port_group *pg; + struct br_port_msg *bpm; + struct nlmsghdr *nlh; + int err; + + nlh = nlmsg_put(skb, portid, seq, RTM_NEWMDB, sizeof(*bpm), 0); + if (!nlh) + return -EMSGSIZE; + + bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); + bpm->family = AF_BRIDGE; + bpm->ifindex = mp->br->dev->ifindex; + mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB); + if (!mdb_nest) { + err = -EMSGSIZE; + goto cancel; + } + mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); + if (!mdb_entry_nest) { + err = -EMSGSIZE; + goto cancel; + } + + if (mp->host_joined) { + err = __mdb_fill_info(skb, mp, NULL); + if (err) + goto cancel; + } + + for (pg = mlock_dereference(mp->ports, mp->br); pg; + pg = mlock_dereference(pg->next, mp->br)) { + err = __mdb_fill_info(skb, mp, pg); + if (err) + goto cancel; + } + + nla_nest_end(skb, mdb_entry_nest); + nla_nest_end(skb, mdb_nest); + nlmsg_end(skb, nlh); + + return 0; + +cancel: + nlmsg_cancel(skb, nlh); + return err; +} + +int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, + struct netlink_ext_ack *extack) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_mdb_entry *mp; + struct sk_buff *skb; + struct br_ip group; + int err; + + err = br_mdb_get_parse(dev, tb, &group, extack); + if (err) + return err; + + /* Hold the multicast lock to ensure that the MDB entry does not change + * between the time the reply size is determined and when the reply is + * filled in. + */ + spin_lock_bh(&br->multicast_lock); + + mp = br_mdb_ip_get(br, &group); + if (!mp) { + NL_SET_ERR_MSG_MOD(extack, "MDB entry not found"); + err = -ENOENT; + goto unlock; + } + + skb = br_mdb_get_reply_alloc(mp); + if (!skb) { + err = -ENOMEM; + goto unlock; + } + + err = br_mdb_get_reply_fill(skb, mp, portid, seq); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to fill MDB get reply"); + goto free; + } + + spin_unlock_bh(&br->multicast_lock); + + return rtnl_unicast(skb, dev_net(dev), portid); + +free: + kfree_skb(skb); +unlock: + spin_unlock_bh(&br->multicast_lock); + return err; +} diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 40bbcd9f63b5..6b7f36769d03 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1022,6 +1022,8 @@ int br_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb); +int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, + struct netlink_ext_ack *extack); void br_multicast_host_join(const struct net_bridge_mcast *brmctx, struct net_bridge_mdb_entry *mp, bool notify); void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify); @@ -1432,6 +1434,13 @@ static inline int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, return 0; } +static inline int br_mdb_get(struct net_device *dev, struct nlattr *tb[], + u32 portid, u32 seq, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + static inline int br_mdb_hash_init(struct net_bridge *br) { return 0; -- cgit v1.2.3 From ddd17a54e692bef1b646febf5242db10982e1965 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 25 Oct 2023 15:30:18 +0300 Subject: rtnetlink: Add MDB get support Now that both the bridge and VXLAN drivers implement the MDB get net device operation, expose the functionality to user space by registering a handler for RTM_GETMDB messages. Derive the net device from the ifindex specified in the ancillary header and invoke its MDB get NDO. Note that unlike other get handlers, the allocation of the skb containing the response is not performed in the common rtnetlink code as the size is variable and needs to be determined by the respective driver. Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f2753fd58881..e8431c6c8490 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6219,6 +6219,93 @@ out: return skb->len; } +static int rtnl_validate_mdb_entry_get(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(attr); + + if (nla_len(attr) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); + return -EINVAL; + } + + if (entry->ifindex) { + NL_SET_ERR_MSG(extack, "Entry ifindex cannot be specified"); + return -EINVAL; + } + + if (entry->state) { + NL_SET_ERR_MSG(extack, "Entry state cannot be specified"); + return -EINVAL; + } + + if (entry->flags) { + NL_SET_ERR_MSG(extack, "Entry flags cannot be specified"); + return -EINVAL; + } + + if (entry->vid >= VLAN_VID_MASK) { + NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); + return -EINVAL; + } + + if (entry->addr.proto != htons(ETH_P_IP) && + entry->addr.proto != htons(ETH_P_IPV6) && + entry->addr.proto != 0) { + NL_SET_ERR_MSG(extack, "Unknown entry protocol"); + return -EINVAL; + } + + return 0; +} + +static const struct nla_policy mdba_get_policy[MDBA_GET_ENTRY_MAX + 1] = { + [MDBA_GET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + rtnl_validate_mdb_entry_get, + sizeof(struct br_mdb_entry)), + [MDBA_GET_ENTRY_ATTRS] = { .type = NLA_NESTED }, +}; + +static int rtnl_mdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[MDBA_GET_ENTRY_MAX + 1]; + struct net *net = sock_net(in_skb->sk); + struct br_port_msg *bpm; + struct net_device *dev; + int err; + + err = nlmsg_parse(nlh, sizeof(struct br_port_msg), tb, + MDBA_GET_ENTRY_MAX, mdba_get_policy, extack); + if (err) + return err; + + bpm = nlmsg_data(nlh); + if (!bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Invalid ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "Device doesn't exist"); + return -ENODEV; + } + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_GET_ENTRY)) { + NL_SET_ERR_MSG(extack, "Missing MDBA_GET_ENTRY attribute"); + return -EINVAL; + } + + if (!dev->netdev_ops->ndo_mdb_get) { + NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); + return -EOPNOTSUPP; + } + + return dev->netdev_ops->ndo_mdb_get(dev, tb, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, extack); +} + static int rtnl_validate_mdb_entry(const struct nlattr *attr, struct netlink_ext_ack *extack) { @@ -6595,7 +6682,7 @@ void __init rtnetlink_init(void) 0); rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, rtnl_mdb_dump, 0); + rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0); rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0); rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, 0); } -- cgit v1.2.3 From 6808918343a8b4b6970ba52ba2d1d511a0976748 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 27 Oct 2023 13:05:49 +0300 Subject: net: bridge: fill in MODULE_DESCRIPTION() Fill in bridge's module description. Suggested-by: Jakub Kicinski Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bridge/br.c b/net/bridge/br.c index a6e94ceb7c9a..ac19b797dbec 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -477,3 +477,4 @@ module_exit(br_deinit) MODULE_LICENSE("GPL"); MODULE_VERSION(BR_VERSION); MODULE_ALIAS_RTNL_LINK("bridge"); +MODULE_DESCRIPTION("Ethernet bridge driver"); -- cgit v1.2.3 From 79fa29570bd340d5cb6229f047f2b9127dbff32c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Oct 2023 19:29:16 -0700 Subject: net: selftests: use ethtool_sprintf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During a W=1 build GCC 13.2 says: net/core/selftests.c: In function ‘net_selftest_get_strings’: net/core/selftests.c:404:52: error: ‘%s’ directive output may be truncated writing up to 279 bytes into a region of size 28 [-Werror=format-truncation=] 404 | snprintf(p, ETH_GSTRING_LEN, "%2d. %s", i + 1, | ^~ net/core/selftests.c:404:17: note: ‘snprintf’ output between 5 and 284 bytes into a destination of size 32 404 | snprintf(p, ETH_GSTRING_LEN, "%2d. %s", i + 1, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 405 | net_selftests[i].name); | ~~~~~~~~~~~~~~~~~~~~~~ avoid it by using ethtool_sprintf(). Reviewed-by: Oleksij Rempel Tested-by: Oleksij Rempel Link: https://lore.kernel.org/r/20231026022916.566661-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/selftests.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/selftests.c b/net/core/selftests.c index acb1ee97bbd3..94fe3146a959 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -397,14 +397,11 @@ EXPORT_SYMBOL_GPL(net_selftest_get_count); void net_selftest_get_strings(u8 *data) { - u8 *p = data; int i; - for (i = 0; i < net_selftest_get_count(); i++) { - snprintf(p, ETH_GSTRING_LEN, "%2d. %s", i + 1, - net_selftests[i].name); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < net_selftest_get_count(); i++) + ethtool_sprintf(&data, "%2d. %s", i + 1, + net_selftests[i].name); } EXPORT_SYMBOL_GPL(net_selftest_get_strings); -- cgit v1.2.3 From 84c531f54ad9a124a924c9505d74e33d16965146 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 25 Oct 2023 16:37:04 -0700 Subject: mptcp: userspace pm send RM_ADDR for ID 0 This patch adds the ability to send RM_ADDR for local ID 0. Check whether id 0 address is removed, if not, put id 0 into a removing list, pass it to mptcp_pm_remove_addr() to remove id 0 address. There is no reason not to allow the userspace to remove the initial address (ID 0). This special case was not taken into account not letting the userspace to delete all addresses as announced. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/379 Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231025-send-net-next-20231025-v1-3-db8f25f798eb@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'net') diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 0f92e5b13a8a..25fa37ac3620 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -208,6 +208,40 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) return err; } +static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, + struct genl_info *info) +{ + struct mptcp_rm_list list = { .nr = 0 }; + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + bool has_id_0 = false; + int err = -EINVAL; + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + if (subflow->local_id == 0) { + has_id_0 = true; + break; + } + } + if (!has_id_0) { + GENL_SET_ERR_MSG(info, "address with id 0 not found"); + goto remove_err; + } + + list.ids[list.nr++] = 0; + + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &list); + spin_unlock_bh(&msk->pm.lock); + + err = 0; + +remove_err: + release_sock(sk); + return err; +} + int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; @@ -239,6 +273,11 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) goto remove_err; } + if (id_val == 0) { + err = mptcp_userspace_pm_remove_id_zero_address(msk, info); + goto remove_err; + } + lock_sock((struct sock *)msk); list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { -- cgit v1.2.3 From 74cbb0c65b2963c1f1b51e2426cf0774ed828bc0 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 25 Oct 2023 16:37:05 -0700 Subject: mptcp: drop useless ssk in pm_subflow_check_next The code using 'ssk' parameter of mptcp_pm_subflow_check_next() has been dropped in commit "95d686517884 (mptcp: fix subflow accounting on close)". So drop this useless parameter ssk. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231025-send-net-next-20231025-v1-4-db8f25f798eb@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 2 +- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index d8da5374d9e1..4ae19113b8eb 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -184,7 +184,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk) spin_unlock_bh(&pm->lock); } -void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, +void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct mptcp_subflow_context *subflow) { struct mptcp_pm_data *pm = &msk->pm; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1dacc072dcca..a29116eda30a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2490,7 +2490,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, /* subflow aborted before reaching the fully_established status * attempt the creation of the next subflow */ - mptcp_pm_subflow_check_next(mptcp_sk(sk), ssk, subflow); + mptcp_pm_subflow_check_next(mptcp_sk(sk), subflow); __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_PUSH); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 9092fcf18798..a5322074353b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -888,7 +888,7 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); void mptcp_pm_subflow_established(struct mptcp_sock *msk); bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk); -void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, +void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct mptcp_subflow_context *subflow); void mptcp_pm_add_addr_received(const struct sock *ssk, const struct mptcp_addr_info *addr); -- cgit v1.2.3 From 83d580ddbe1b3297c346b24070c23fcf6698393c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 25 Oct 2023 16:37:06 -0700 Subject: mptcp: use mptcp_check_fallback helper Use __mptcp_check_fallback() helper defined in net/mptcp/protocol.h, instead of open-coding it in both __mptcp_do_fallback() and mptcp_diag_fill_info(). Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231025-send-net-next-20231025-v1-5-db8f25f798eb@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.h | 2 +- net/mptcp/sockopt.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a5322074353b..fe6f2d399ee8 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1065,7 +1065,7 @@ static inline bool mptcp_check_fallback(const struct sock *sk) static inline void __mptcp_do_fallback(struct mptcp_sock *msk) { - if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) { + if (__mptcp_check_fallback(msk)) { pr_debug("TCP fallback already done (msk=%p)", msk); return; } diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 574e221bb765..77f5e8932abf 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -916,7 +916,7 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) mptcp_pm_get_local_addr_max(msk); } - if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) + if (__mptcp_check_fallback(msk)) flags |= MPTCP_INFO_FLAG_FALLBACK; if (READ_ONCE(msk->can_ack)) flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; -- cgit v1.2.3 From a16c054b527bbaed611fef03e8b19e111a1769ef Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 25 Oct 2023 16:37:07 -0700 Subject: mptcp: use mptcp_get_ext helper Use mptcp_get_ext() helper defined in protocol.h instead of open-coding it in mptcp_sendmsg_frag(). Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231025-send-net-next-20231025-v1-6-db8f25f798eb@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a29116eda30a..a0b8356cd8c5 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1267,7 +1267,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, * queue management operation, to avoid breaking the ext <-> * SSN association set here */ - mpext = skb_ext_find(skb, SKB_EXT_MPTCP); + mpext = mptcp_get_ext(skb); if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) { TCP_SKB_CB(skb)->eor = 1; goto alloc_skb; @@ -1289,7 +1289,7 @@ alloc_skb: i = skb_shinfo(skb)->nr_frags; reuse_skb = false; - mpext = skb_ext_find(skb, SKB_EXT_MPTCP); + mpext = mptcp_get_ext(skb); } /* Zero window and all data acked? Probe. */ -- cgit v1.2.3 From a6c85fc61c088c7ef43ba81e80b48c263a80602a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 25 Oct 2023 16:37:08 -0700 Subject: mptcp: move sk assignment statement ahead If we move the sk assignment statement ahead in mptcp_nl_cmd_sf_create() or mptcp_nl_cmd_sf_destroy(), right after the msk null-check statements, sk can be used after the create_err or destroy_err labels instead of open-coding it again. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231025-send-net-next-20231025-v1-7-db8f25f798eb@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 25fa37ac3620..7bb2b29e5b96 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -335,6 +335,8 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) return err; } + sk = (struct sock *)msk; + if (!mptcp_pm_is_userspace(msk)) { GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); goto create_err; @@ -352,8 +354,6 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) goto create_err; } - sk = (struct sock *)msk; - if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) { GENL_SET_ERR_MSG(info, "families mismatch"); err = -EINVAL; @@ -381,7 +381,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&msk->pm.lock); create_err: - sock_put((struct sock *)msk); + sock_put(sk); return err; } @@ -458,6 +458,8 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info return err; } + sk = (struct sock *)msk; + if (!mptcp_pm_is_userspace(msk)) { GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); goto destroy_err; @@ -487,7 +489,6 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info goto destroy_err; } - sk = (struct sock *)msk; lock_sock(sk); ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r); if (ssk) { @@ -507,7 +508,7 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info release_sock(sk); destroy_err: - sock_put((struct sock *)msk); + sock_put(sk); return err; } -- cgit v1.2.3 From 14cb0e0bf39bd10429ba14e9e2f905f1144226fc Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 25 Oct 2023 16:37:09 -0700 Subject: mptcp: define more local variables sk '(struct sock *)msk' is used several times in mptcp_nl_cmd_announce(), mptcp_nl_cmd_remove() or mptcp_userspace_pm_set_flags() in pm_userspace.c, it's worth adding a local variable sk to point it. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231025-send-net-next-20231025-v1-8-db8f25f798eb@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 7bb2b29e5b96..5c01b9bc619a 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -152,6 +152,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) struct mptcp_pm_addr_entry addr_val; struct mptcp_sock *msk; int err = -EINVAL; + struct sock *sk; u32 token_val; if (!addr || !token) { @@ -167,6 +168,8 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) return err; } + sk = (struct sock *)msk; + if (!mptcp_pm_is_userspace(msk)) { GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); goto announce_err; @@ -190,7 +193,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) goto announce_err; } - lock_sock((struct sock *)msk); + lock_sock(sk); spin_lock_bh(&msk->pm.lock); if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) { @@ -200,11 +203,11 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) } spin_unlock_bh(&msk->pm.lock); - release_sock((struct sock *)msk); + release_sock(sk); err = 0; announce_err: - sock_put((struct sock *)msk); + sock_put(sk); return err; } @@ -251,6 +254,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) struct mptcp_sock *msk; LIST_HEAD(free_list); int err = -EINVAL; + struct sock *sk; u32 token_val; u8 id_val; @@ -268,6 +272,8 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) return err; } + sk = (struct sock *)msk; + if (!mptcp_pm_is_userspace(msk)) { GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); goto remove_err; @@ -278,7 +284,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) goto remove_err; } - lock_sock((struct sock *)msk); + lock_sock(sk); list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { if (entry->addr.id == id_val) { @@ -289,7 +295,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) if (!match) { GENL_SET_ERR_MSG(info, "address with specified id not found"); - release_sock((struct sock *)msk); + release_sock(sk); goto remove_err; } @@ -297,15 +303,15 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) mptcp_pm_remove_addrs(msk, &free_list); - release_sock((struct sock *)msk); + release_sock(sk); list_for_each_entry_safe(match, entry, &free_list, list) { - sock_kfree_s((struct sock *)msk, match, sizeof(*match)); + sock_kfree_s(sk, match, sizeof(*match)); } err = 0; remove_err: - sock_put((struct sock *)msk); + sock_put(sk); return err; } @@ -518,6 +524,7 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, { struct mptcp_sock *msk; int ret = -EINVAL; + struct sock *sk; u32 token_val; token_val = nla_get_u32(token); @@ -526,6 +533,8 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, if (!msk) return ret; + sk = (struct sock *)msk; + if (!mptcp_pm_is_userspace(msk)) goto set_flags_err; @@ -533,11 +542,11 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, rem->addr.family == AF_UNSPEC) goto set_flags_err; - lock_sock((struct sock *)msk); + lock_sock(sk); ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup); - release_sock((struct sock *)msk); + release_sock(sk); set_flags_err: - sock_put((struct sock *)msk); + sock_put(sk); return ret; } -- cgit v1.2.3 From 3a04927f8d4b7a4f008f04af41e31173002eb1ea Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 26 Oct 2023 14:23:05 -0700 Subject: af_unix: Remove module remnants. Since commit 97154bcf4d1b ("af_unix: Kconfig: make CONFIG_UNIX bool"), af_unix.c is no longer built as module. Let's remove unnecessary #if condition, exitcall, and module macros. Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20231026212305.45545-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e10d07c76044..45506a95b25f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3344,7 +3344,7 @@ static const struct seq_operations unix_seq_ops = { .show = unix_seq_show, }; -#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) +#ifdef CONFIG_BPF_SYSCALL struct bpf_unix_iter_state { struct seq_net_private p; unsigned int cur_sk; @@ -3606,7 +3606,7 @@ static struct pernet_operations unix_net_ops = { .exit = unix_net_exit, }; -#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, struct unix_sock *unix_sk, uid_t uid) @@ -3706,7 +3706,7 @@ static int __init af_unix_init(void) register_pernet_subsys(&unix_net_ops); unix_bpf_build_proto(); -#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) bpf_iter_register(); #endif @@ -3714,20 +3714,5 @@ out: return rc; } -static void __exit af_unix_exit(void) -{ - sock_unregister(PF_UNIX); - proto_unregister(&unix_dgram_proto); - proto_unregister(&unix_stream_proto); - unregister_pernet_subsys(&unix_net_ops); -} - -/* Earlier than device_initcall() so that other drivers invoking - request_module() don't end up in a loop when modprobe tries - to use a UNIX socket. But later than subsys_initcall() because - we depend on stuff initialised there */ +/* Later than subsys_initcall() because we depend on stuff initialised there */ fs_initcall(af_unix_init); -module_exit(af_unix_exit); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_UNIX); -- cgit v1.2.3 From 06497763c8f15d08c0e356e651a61f2930a8987c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 27 Oct 2023 11:24:24 -0700 Subject: net: bpf: Use sockopt_lock_sock() in ip_sock_set_tos() With latest sync from net-next tree, bpf-next has a bpf selftest failure: [root@arch-fb-vm1 bpf]# ./test_progs -t setget_sockopt ... [ 76.194349] ============================================ [ 76.194682] WARNING: possible recursive locking detected [ 76.195039] 6.6.0-rc7-g37884503df08-dirty #67 Tainted: G W OE [ 76.195518] -------------------------------------------- [ 76.195852] new_name/154 is trying to acquire lock: [ 76.196159] ffff8c3e06ad8d30 (sk_lock-AF_INET){+.+.}-{0:0}, at: ip_sock_set_tos+0x19/0x30 [ 76.196669] [ 76.196669] but task is already holding lock: [ 76.197028] ffff8c3e06ad8d30 (sk_lock-AF_INET){+.+.}-{0:0}, at: inet_listen+0x21/0x70 [ 76.197517] [ 76.197517] other info that might help us debug this: [ 76.197919] Possible unsafe locking scenario: [ 76.197919] [ 76.198287] CPU0 [ 76.198444] ---- [ 76.198600] lock(sk_lock-AF_INET); [ 76.198831] lock(sk_lock-AF_INET); [ 76.199062] [ 76.199062] *** DEADLOCK *** [ 76.199062] [ 76.199420] May be due to missing lock nesting notation [ 76.199420] [ 76.199879] 2 locks held by new_name/154: [ 76.200131] #0: ffff8c3e06ad8d30 (sk_lock-AF_INET){+.+.}-{0:0}, at: inet_listen+0x21/0x70 [ 76.200644] #1: ffffffff90f96a40 (rcu_read_lock){....}-{1:2}, at: __cgroup_bpf_run_filter_sock_ops+0x55/0x290 [ 76.201268] [ 76.201268] stack backtrace: [ 76.201538] CPU: 4 PID: 154 Comm: new_name Tainted: G W OE 6.6.0-rc7-g37884503df08-dirty #67 [ 76.202134] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 [ 76.202699] Call Trace: [ 76.202858] [ 76.203002] dump_stack_lvl+0x4b/0x80 [ 76.203239] __lock_acquire+0x740/0x1ec0 [ 76.203503] lock_acquire+0xc1/0x2a0 [ 76.203766] ? ip_sock_set_tos+0x19/0x30 [ 76.204050] ? sk_stream_write_space+0x12a/0x230 [ 76.204389] ? lock_release+0xbe/0x260 [ 76.204661] lock_sock_nested+0x32/0x80 [ 76.204942] ? ip_sock_set_tos+0x19/0x30 [ 76.205208] ip_sock_set_tos+0x19/0x30 [ 76.205452] do_ip_setsockopt+0x4b3/0x1580 [ 76.205719] __bpf_setsockopt+0x62/0xa0 [ 76.205963] bpf_sock_ops_setsockopt+0x11/0x20 [ 76.206247] bpf_prog_630217292049c96e_bpf_test_sockopt_int+0xbc/0x123 [ 76.206660] bpf_prog_493685a3bae00bbd_bpf_test_ip_sockopt+0x49/0x4b [ 76.207055] bpf_prog_b0bcd27f269aeea0_skops_sockopt+0x44c/0xec7 [ 76.207437] __cgroup_bpf_run_filter_sock_ops+0xda/0x290 [ 76.207829] __inet_listen_sk+0x108/0x1b0 [ 76.208122] inet_listen+0x48/0x70 [ 76.208373] __sys_listen+0x74/0xb0 [ 76.208630] __x64_sys_listen+0x16/0x20 [ 76.208911] do_syscall_64+0x3f/0x90 [ 76.209174] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 ... Both ip_sock_set_tos() and inet_listen() calls lock_sock(sk) which caused a dead lock. To fix the issue, use sockopt_lock_sock() in ip_sock_set_tos() instead. sockopt_lock_sock() will avoid lock_sock() if it is in bpf context. Fixes: 878d951c6712 ("inet: lock the socket in ip_sock_set_tos()") Suggested-by: Martin KaFai Lau Signed-off-by: Yonghong Song Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231027182424.1444845-1-yonghong.song@linux.dev Signed-off-by: Jakub Kicinski --- net/ipv4/ip_sockglue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 9c68b6b74d9f..2efc53526a38 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -602,9 +602,9 @@ void __ip_sock_set_tos(struct sock *sk, int val) void ip_sock_set_tos(struct sock *sk, int val) { - lock_sock(sk); + sockopt_lock_sock(sk); __ip_sock_set_tos(sk, val); - release_sock(sk); + sockopt_release_sock(sk); } EXPORT_SYMBOL(ip_sock_set_tos); -- cgit v1.2.3