From 9bcb084f0b648d032efdbb8226fe33663f8fca79 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 10 Dec 2019 15:34:17 +0100 Subject: mac80211: Always show airtime debugfs file when TXQs are enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The AQL statistics are displayed in the 'airtime' station debugfs file, but that file was only shown if a driver has enabled the airtime fairness feature flag. Since AQL can be enabled without airtime fairness, let's expose the airtime file whenever TXQs are enabled, so the AQL data can be read. Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20191210143417.142964-1-toke@redhat.com Signed-off-by: Johannes Berg --- net/mac80211/debugfs_sta.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index b3c9001d1f43..c190c29fe0dd 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -989,12 +989,10 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments); DEBUGFS_ADD_COUNTER(tx_filtered, status_stats.filtered); - if (local->ops->wake_tx_queue) + if (local->ops->wake_tx_queue) { DEBUGFS_ADD(aqm); - - if (wiphy_ext_feature_isset(local->hw.wiphy, - NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) DEBUGFS_ADD(airtime); + } debugfs_create_xul("driver_buffered_tids", 0400, sta->debugfs_dir, &sta->driver_buffered_tids); -- cgit v1.2.3 From 50ff477a8639fa1fbbeecb5a6f2f8b6c5557ecec Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 25 Nov 2019 11:04:37 +0100 Subject: mac80211: add 802.11 encapsulation offloading support This patch adds a new transmit path for hardware that supports 802.11 encapsulation offloading. In those cases 802.3 frames get passed directly to the driver allowing the hardware to handle the encapsulation. Some features such as monitor mode and TKIP would break when encapsulation offloading is enabled. If any of these get enabled, the code will alwyas fallback to the normal sw encapsulation data path. The patch defines a secondary netdev_ops struct that the device gets assigned if 802.11 encap support is available and enabled. The driver needs to enable the support on a per vif basis if it finds that all pre-reqs are meet. Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20191125100438.16539-1-john@phrozen.org [reword comments, remove SUPPORTS_80211_ENCAP HW flag, minor cleanups] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 32 +++++++++ net/mac80211/ieee80211_i.h | 9 +++ net/mac80211/iface.c | 67 ++++++++++++++++++ net/mac80211/key.c | 11 +++ net/mac80211/status.c | 71 +++++++++++++++++++ net/mac80211/tx.c | 170 +++++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 356 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index aa145808e57a..682fd2f4431b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -826,6 +826,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_AMSDU = BIT(3), IEEE80211_TX_CTRL_FAST_XMIT = BIT(4), IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP = BIT(5), + IEEE80211_TX_CTRL_HW_80211_ENCAP = BIT(6), }; /* @@ -4660,6 +4661,26 @@ static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw, void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb); +/** + * ieee80211_tx_status_8023 - transmit status callback for 802.3 frame format + * + * Call this function for all transmitted data frames after their transmit + * completion. This callback should only be called for data frames which + * are are using driver's (or hardware's) offload capability of encap/decap + * 802.11 frames. + * + * This function may not be called in IRQ context. Calls to this function + * for a single hardware must be synchronized against each other and all + * calls in the same tx status family. + * + * @hw: the hardware the frame was transmitted by + * @vif: the interface for which the frame was transmitted + * @skb: the frame that was transmitted, owned by mac80211 after this call + */ +void ieee80211_tx_status_8023(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct sk_buff *skb); + /** * ieee80211_report_low_ack - report non-responding station * @@ -6480,5 +6501,16 @@ u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw, u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw, struct ieee80211_tx_info *info, int len); +/** + * ieee80211_set_hw_80211_encap - enable hardware encapsulation offloading. + * + * This function is used to notify mac80211 that a vif can be passed raw 802.3 + * frames. The driver needs to then handle the 802.11 encapsulation inside the + * hardware or firmware. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @enable: indicate if the feature should be turned on or off + */ +bool ieee80211_set_hw_80211_encap(struct ieee80211_vif *vif, bool enable); #endif /* MAC80211_H */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ad15b3be8bb3..e3cf24cb4615 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -984,6 +984,8 @@ struct ieee80211_sub_if_data { } debugfs; #endif + bool hw_80211_encap; + /* must be last, dynamically sized area in this! */ struct ieee80211_vif vif; }; @@ -1762,6 +1764,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb, + struct net_device *dev); void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags, @@ -1948,6 +1952,11 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, enum nl80211_band band, u32 txdata_flags); +/* sta_out needs to be checked for ERR_PTR() before using */ +int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + struct sta_info **sta_out); + static inline void ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index af8b09214786..9b833e170c20 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1205,6 +1205,72 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_get_stats64 = ieee80211_get_stats64, }; +static const struct net_device_ops ieee80211_dataif_8023_ops = { + .ndo_open = ieee80211_open, + .ndo_stop = ieee80211_stop, + .ndo_uninit = ieee80211_uninit, + .ndo_start_xmit = ieee80211_subif_start_xmit_8023, + .ndo_set_rx_mode = ieee80211_set_multicast_list, + .ndo_set_mac_address = ieee80211_change_mac, + .ndo_select_queue = ieee80211_netdev_select_queue, + .ndo_get_stats64 = ieee80211_get_stats64, +}; + +static void __ieee80211_set_hw_80211_encap(struct ieee80211_sub_if_data *sdata, + bool enable) +{ + sdata->dev->netdev_ops = enable ? &ieee80211_dataif_8023_ops : + &ieee80211_dataif_ops; + sdata->hw_80211_encap = enable; +} + +bool ieee80211_set_hw_80211_encap(struct ieee80211_vif *vif, bool enable) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *iter; + struct ieee80211_key *key; + + mutex_lock(&local->iflist_mtx); + list_for_each_entry(iter, &local->interfaces, list) { + struct ieee80211_sub_if_data *disable = NULL; + + if (vif->type == NL80211_IFTYPE_MONITOR) { + disable = iter; + __ieee80211_set_hw_80211_encap(iter, false); + } else if (iter->vif.type == NL80211_IFTYPE_MONITOR) { + disable = sdata; + enable = false; + } + if (disable) + sdata_dbg(disable, + "disable hw 80211 encap due to mon co-exist\n"); + } + mutex_unlock(&local->iflist_mtx); + + if (enable == sdata->hw_80211_encap) + return enable; + + if (!sdata->dev) + return false; + + if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG) && + (local->hw.wiphy->frag_threshold != (u32)-1)) + enable = false; + + mutex_lock(&sdata->local->key_mtx); + list_for_each_entry(key, &sdata->key_list, list) { + if (key->conf.cipher == WLAN_CIPHER_SUITE_TKIP) + enable = false; + } + mutex_unlock(&sdata->local->key_mtx); + + __ieee80211_set_hw_80211_encap(sdata, enable); + + return enable; +} +EXPORT_SYMBOL(ieee80211_set_hw_80211_encap); + static void ieee80211_if_free(struct net_device *dev) { free_percpu(dev->tstats); @@ -1404,6 +1470,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.idle = true; sdata->noack_map = 0; + sdata->hw_80211_encap = false; /* only monitor/p2p-device differ */ if (sdata->dev) { diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 0f889b919b06..9a3a6b95fa27 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -177,6 +177,13 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) } } + /* TKIP countermeasures don't work in encap offload mode */ + if (key->conf.cipher == WLAN_CIPHER_SUITE_TKIP && + sdata->hw_80211_encap) { + sdata_dbg(sdata, "TKIP is not allowed in hw 80211 encap mode\n"); + return -EINVAL; + } + ret = drv_set_key(key->local, SET_KEY, sdata, sta ? &sta->sta : NULL, &key->conf); @@ -203,6 +210,10 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) key->conf.keyidx, sta ? sta->sta.addr : bcast_addr, ret); + /* cannot do software crypto with encapsulation offload */ + if (sdata->hw_80211_encap) + return -EINVAL; + out_unsupported: switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_WEP40: diff --git a/net/mac80211/status.c b/net/mac80211/status.c index b720feaf9a74..0344b82a34f5 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -1198,6 +1198,77 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_tx_rate_update); +void ieee80211_tx_status_8023(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct sk_buff *skb) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct sta_info *sta; + int retry_count; + int rates_idx; + bool acked; + + sdata = vif_to_sdata(vif); + + acked = info->flags & IEEE80211_TX_STAT_ACK; + rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count); + + rcu_read_lock(); + + if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) + goto counters_update; + + if (IS_ERR(sta)) + goto counters_update; + + if (!acked) + sta->status_stats.retry_failed++; + + if (rates_idx != -1) + sta->tx_stats.last_rate = info->status.rates[rates_idx]; + + sta->status_stats.retry_count += retry_count; + + if (ieee80211_hw_check(hw, REPORTS_TX_ACK_STATUS)) { + if (acked && vif->type == NL80211_IFTYPE_STATION) + ieee80211_sta_reset_conn_monitor(sdata); + + sta->status_stats.last_ack = jiffies; + if (info->flags & IEEE80211_TX_STAT_ACK) { + if (sta->status_stats.lost_packets) + sta->status_stats.lost_packets = 0; + + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) + sta->status_stats.last_tdls_pkt_time = jiffies; + } else { + ieee80211_lost_packet(sta, info); + } + } + +counters_update: + rcu_read_unlock(); + ieee80211_led_tx(local); + + if (!(info->flags & IEEE80211_TX_STAT_ACK) && + !(info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED)) + goto skip_stats_update; + + I802_DEBUG_INC(local->dot11TransmittedFrameCount); + if (is_multicast_ether_addr(skb->data)) + I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount); + if (retry_count > 0) + I802_DEBUG_INC(local->dot11RetryCount); + if (retry_count > 1) + I802_DEBUG_INC(local->dot11MultipleRetryCount); + +skip_stats_update: + ieee80211_report_used_skb(local, skb, false); + dev_kfree_skb(skb); +} +EXPORT_SYMBOL(ieee80211_tx_status_8023); + void ieee80211_report_low_ack(struct ieee80211_sta *pubsta, u32 num_packets) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b696b9136f4c..b31b5078f656 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1250,7 +1250,8 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) return NULL; - if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) { + if (!(info->control.flags & IEEE80211_TX_CTRL_HW_80211_ENCAP) && + unlikely(!ieee80211_is_data_present(hdr->frame_control))) { if ((!ieee80211_is_mgmt(hdr->frame_control) || ieee80211_is_bufferable_mmpdu(hdr->frame_control) || vif->type == NL80211_IFTYPE_STATION) && @@ -2351,9 +2352,9 @@ static inline bool ieee80211_is_tdls_setup(struct sk_buff *skb) skb->data[14] == WLAN_TDLS_SNAP_RFTYPE; } -static int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, - struct sta_info **sta_out) +int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + struct sta_info **sta_out) { struct sta_info *sta; @@ -3607,6 +3608,9 @@ begin: else info->flags &= ~IEEE80211_TX_CTL_AMPDU; + if (info->control.flags & IEEE80211_TX_CTRL_HW_80211_ENCAP) + goto encap_out; + if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) { struct sta_info *sta = container_of(txq->sta, struct sta_info, sta); @@ -3666,6 +3670,7 @@ begin: break; } +encap_out: IEEE80211_SKB_CB(skb)->control.vif = vif; if (local->airtime_flags & AIRTIME_USE_AQL) { @@ -4097,6 +4102,153 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } +static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, int led_len, + struct sta_info *sta, + bool txpending) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_tx_control control = {}; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_sta *pubsta = NULL; + unsigned long flags; + int q = info->hw_queue; + + if (ieee80211_queue_skb(local, sdata, sta, skb)) + return true; + + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + + if (local->queue_stop_reasons[q] || + (!txpending && !skb_queue_empty(&local->pending[q]))) { + if (txpending) + skb_queue_head(&local->pending[q], skb); + else + skb_queue_tail(&local->pending[q], skb); + + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + + return false; + } + + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + + if (sta && sta->uploaded) + pubsta = &sta->sta; + + control.sta = pubsta; + + drv_tx(local, &control, skb); + + return true; +} + +static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, + struct net_device *dev, struct sta_info *sta, + struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ethhdr *ehdr = (struct ethhdr *)skb->data; + struct ieee80211_local *local = sdata->local; + bool authorized = false; + bool multicast; + unsigned char *ra = ehdr->h_dest; + + if (IS_ERR(sta) || (sta && !sta->uploaded)) + sta = NULL; + + if (sdata->vif.type == NL80211_IFTYPE_STATION && + (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER))) + ra = sdata->u.mgd.bssid; + + if (!is_valid_ether_addr(ra)) + goto out_free; + + multicast = is_multicast_ether_addr(ra); + + if (sta) + authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED); + + if (!multicast && !authorized && + (ehdr->h_proto != sdata->control_port_protocol || + !ether_addr_equal(sdata->vif.addr, ehdr->h_source))) + goto out_free; + + if (multicast && sdata->vif.type == NL80211_IFTYPE_AP && + !atomic_read(&sdata->u.ap.num_mcast_sta)) + goto out_free; + + if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning)) && + test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) + goto out_free; + + if (unlikely(!multicast && skb->sk && + skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) + ieee80211_store_ack_skb(local, skb, &info->flags); + + memset(info, 0, sizeof(*info)); + + if (unlikely(sdata->control_port_protocol == ehdr->h_proto)) { + if (sdata->control_port_no_encrypt) + info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + info->control.flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO; + } + + if (multicast) + info->flags |= IEEE80211_TX_CTL_NO_ACK; + + info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; + + ieee80211_tx_stats(dev, skb->len); + + if (sta) { + sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; + } + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + + info->control.flags |= IEEE80211_TX_CTRL_HW_80211_ENCAP; + info->control.vif = &sdata->vif; + + ieee80211_tx_8023(sdata, skb, skb->len, sta, false); + + return; + +out_free: + kfree_skb(skb); +} + +netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct sta_info *sta; + + if (WARN_ON(!sdata->hw_80211_encap)) { + kfree_skb(skb); + return NETDEV_TX_OK; + } + + if (unlikely(skb->len < ETH_HLEN)) { + kfree_skb(skb); + return NETDEV_TX_OK; + } + + rcu_read_lock(); + + if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) + kfree_skb(skb); + else + ieee80211_8023_xmit(sdata, dev, sta, skb); + + rcu_read_unlock(); + + return NETDEV_TX_OK; +} + struct sk_buff * ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 info_flags) @@ -4175,6 +4327,16 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, } info->band = chanctx_conf->def.chan->band; result = ieee80211_tx(sdata, NULL, skb, true, 0); + } else if (info->control.flags & IEEE80211_TX_CTRL_HW_80211_ENCAP) { + if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) { + dev_kfree_skb(skb); + return true; + } + + if (IS_ERR(sta) || (sta && !sta->uploaded)) + sta = NULL; + + result = ieee80211_tx_8023(sdata, skb, skb->len, sta, true); } else { struct sk_buff_head skbs; -- cgit v1.2.3 From 1ee7826ab68f7e9fa1a01533983acf6a6f62e297 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Sun, 15 Dec 2019 09:23:48 -0600 Subject: mac80211: Remove redundant assertion In wiphy_to_ieee80211_hw, the assertion to check if wiphy is NULL is repeated in wiphy_priv. The patch removes the duplicated BUG_ON check. Signed-off-by: Aditya Pakki Link: https://lore.kernel.org/r/20191215152348.20912-1-pakki001@umn.edu Signed-off-by: Johannes Berg --- net/mac80211/util.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 32a7a53833c0..780df3e9092e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -39,7 +39,6 @@ const void *const mac80211_wiphy_privid = &mac80211_wiphy_privid; struct ieee80211_hw *wiphy_to_ieee80211_hw(struct wiphy *wiphy) { struct ieee80211_local *local; - BUG_ON(!wiphy); local = wiphy_priv(wiphy); return &local->hw; -- cgit v1.2.3 From e322c07f8371164eaa9e76d46d70b1b88ad5d153 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 27 Nov 2019 17:36:15 +0100 Subject: mac80211: debugfs: improve airtime_flags handler readability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve airtime_flags debugfs handler readability reporting configured airtime flags in both numeric and human readable manner Signed-off-by: Lorenzo Bianconi Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/9df7e40b45e95bb0b320317831455beaed1ee3ee.1574872357.git.lorenzo@kernel.org [remove AQL since it's no longer there] Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index ad41d74530c6..54080290d6e2 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -150,6 +150,59 @@ static const struct file_operations aqm_ops = { .llseek = default_llseek, }; +static ssize_t airtime_flags_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + char buf[128] = {}, *pos, *end; + + pos = buf; + end = pos + sizeof(buf) - 1; + + if (local->airtime_flags & AIRTIME_USE_TX) + pos += scnprintf(pos, end - pos, "AIRTIME_TX\t(%lx)\n", + AIRTIME_USE_TX); + if (local->airtime_flags & AIRTIME_USE_RX) + pos += scnprintf(pos, end - pos, "AIRTIME_RX\t(%lx)\n", + AIRTIME_USE_RX); + + return simple_read_from_buffer(user_buf, count, ppos, buf, + strlen(buf)); +} + +static ssize_t airtime_flags_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + char buf[16]; + size_t len; + + if (count > sizeof(buf)) + return -EINVAL; + + if (copy_from_user(buf, user_buf, count)) + return -EFAULT; + + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); + if (len > 0 && buf[len - 1] == '\n') + buf[len - 1] = 0; + + if (kstrtou16(buf, 0, &local->airtime_flags)) + return -EINVAL; + + return count; +} + +static const struct file_operations airtime_flags_ops = { + .write = airtime_flags_write, + .read = airtime_flags_read, + .open = simple_open, + .llseek = default_llseek, +}; + static ssize_t aql_txq_limit_read(struct file *file, char __user *user_buf, size_t count, @@ -522,8 +575,7 @@ void debugfs_hw_add(struct ieee80211_local *local) if (local->ops->wake_tx_queue) DEBUGFS_ADD_MODE(aqm, 0600); - debugfs_create_u16("airtime_flags", 0600, - phyd, &local->airtime_flags); + DEBUGFS_ADD_MODE(airtime_flags, 0600); DEBUGFS_ADD(aql_txq_limit); debugfs_create_u32("aql_threshold", 0600, -- cgit v1.2.3 From 5c5e52d1bb962510fecdc1ebecdde89694d1b223 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Tue, 17 Dec 2019 15:19:18 +0100 Subject: nl80211: add handling for BSS color This patch adds the attributes, policy and parsing code to allow userland to send the info about the BSS coloring settings to the kernel. Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20191217141921.8114-1-john@phrozen.org [johannes: remove the strict policy parsing, that was a misunderstanding] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 15 +++++++++++++++ include/uapi/linux/nl80211.h | 26 ++++++++++++++++++++++++++ net/wireless/nl80211.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 059524b87c4c..5c0b1b5ec77b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -259,6 +259,19 @@ struct ieee80211_he_obss_pd { u8 max_offset; }; +/** + * struct cfg80211_he_bss_color - AP settings for BSS coloring + * + * @color: the current color. + * @disabled: is the feature disabled. + * @partial: define the AID equation. + */ +struct cfg80211_he_bss_color { + u8 color; + bool disabled; + bool partial; +}; + /** * struct ieee80211_sta_ht_cap - STA's HT capabilities * @@ -990,6 +1003,7 @@ enum cfg80211_ap_settings_flags { * @twt_responder: Enable Target Wait Time * @flags: flags, as defined in enum cfg80211_ap_settings_flags * @he_obss_pd: OBSS Packet Detection settings + * @he_bss_color: BSS Color settings */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -1018,6 +1032,7 @@ struct cfg80211_ap_settings { bool twt_responder; u32 flags; struct ieee80211_he_obss_pd he_obss_pd; + struct cfg80211_he_bss_color he_bss_color; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5eab191607f8..809ef9165684 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2400,6 +2400,8 @@ enum nl80211_commands { * @NL80211_ATTR_VLAN_ID: VLAN ID (1..4094) for the station and VLAN group key * (u16). * + * @NL80211_ATTR_HE_BSS_COLOR: nested attribute for BSS Color Settings. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2864,6 +2866,8 @@ enum nl80211_attrs { NL80211_ATTR_VLAN_ID, + NL80211_ATTR_HE_BSS_COLOR, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -6587,5 +6591,27 @@ enum nl80211_obss_pd_attributes { NL80211_HE_OBSS_PD_ATTR_MAX = __NL80211_HE_OBSS_PD_ATTR_LAST - 1, }; +/** + * enum nl80211_bss_color_attributes - BSS Color attributes + * @__NL80211_HE_BSS_COLOR_ATTR_INVALID: Invalid + * + * @NL80211_HE_BSS_COLOR_ATTR_COLOR: the current BSS Color. + * @NL80211_HE_BSS_COLOR_ATTR_DISABLED: is BSS coloring disabled. + * @NL80211_HE_BSS_COLOR_ATTR_PARTIAL: the AID equation to be used.. + * + * @__NL80211_HE_BSS_COLOR_ATTR_LAST: Internal + * @NL80211_HE_BSS_COLOR_ATTR_MAX: highest BSS Color attribute. + */ +enum nl80211_bss_color_attributes { + __NL80211_HE_BSS_COLOR_ATTR_INVALID, + + NL80211_HE_BSS_COLOR_ATTR_COLOR, + NL80211_HE_BSS_COLOR_ATTR_DISABLED, + NL80211_HE_BSS_COLOR_ATTR_PARTIAL, + + /* keep last */ + __NL80211_HE_BSS_COLOR_ATTR_LAST, + NL80211_HE_BSS_COLOR_ATTR_MAX = __NL80211_HE_BSS_COLOR_ATTR_LAST - 1, +}; #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fa3526592c51..00f24d4c623e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -321,6 +321,13 @@ he_obss_pd_policy[NL80211_HE_OBSS_PD_ATTR_MAX + 1] = { NLA_POLICY_RANGE(NLA_U8, 1, 20), }; +static const struct nla_policy +he_bss_color_policy[NL80211_HE_BSS_COLOR_ATTR_MAX + 1] = { + [NL80211_HE_BSS_COLOR_ATTR_COLOR] = NLA_POLICY_RANGE(NLA_U8, 1, 63), + [NL80211_HE_BSS_COLOR_ATTR_DISABLED] = { .type = NLA_FLAG }, + [NL80211_HE_BSS_COLOR_ATTR_PARTIAL] = { .type = NLA_FLAG }, +}; + const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, @@ -625,6 +632,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_TWT_RESPONDER] = { .type = NLA_FLAG }, [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy), [NL80211_ATTR_VLAN_ID] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), + [NL80211_ATTR_HE_BSS_COLOR] = NLA_POLICY_NESTED(he_bss_color_policy), }; /* policy for the key attributes */ @@ -4511,6 +4519,30 @@ static int nl80211_parse_he_obss_pd(struct nlattr *attrs, return 0; } +static int nl80211_parse_he_bss_color(struct nlattr *attrs, + struct cfg80211_he_bss_color *he_bss_color) +{ + struct nlattr *tb[NL80211_HE_BSS_COLOR_ATTR_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NL80211_HE_BSS_COLOR_ATTR_MAX, attrs, + he_bss_color_policy, NULL); + if (err) + return err; + + if (!tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]) + return -EINVAL; + + he_bss_color->color = + nla_get_u8(tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]); + he_bss_color->disabled = + nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]); + he_bss_color->partial = + nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_PARTIAL]); + + return 0; +} + static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, const u8 *rates) { @@ -4803,6 +4835,14 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) return err; } + if (info->attrs[NL80211_ATTR_HE_BSS_COLOR]) { + err = nl80211_parse_he_bss_color( + info->attrs[NL80211_ATTR_HE_BSS_COLOR], + ¶ms.he_bss_color); + if (err) + return err; + } + nl80211_calculate_ap_params(¶ms); if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) -- cgit v1.2.3 From dd56e90230334752221473c06ff40cac44563a70 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Tue, 17 Dec 2019 15:19:19 +0100 Subject: mac80211: add handling for BSS color It is now possible to propagate BSS color settings into the subsystem. Lets make mac80211 also handle them so that we can send them further down the stack into the drivers. We drop the old bss_color field and change iwlwifi to use the new he_bss_color struct. Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20191217141921.8114-2-john@phrozen.org Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +- include/net/cfg80211.h | 13 +++++++++++++ include/net/mac80211.h | 6 ++++-- net/mac80211/cfg.c | 5 ++++- net/mac80211/mlme.c | 9 ++++++++- 5 files changed, 30 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 32dc9d6f0fb6..01ef054e9176 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2016,7 +2016,7 @@ static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm, struct iwl_he_sta_context_cmd sta_ctxt_cmd = { .sta_id = sta_id, .tid_limit = IWL_MAX_TID_COUNT, - .bss_color = vif->bss_conf.bss_color, + .bss_color = vif->bss_conf.he_bss_color.color, .htc_trig_based_pkt_ext = vif->bss_conf.htc_trig_based_pkt_ext, .frame_time_rts_th = cpu_to_le16(vif->bss_conf.frame_time_rts_th), diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5c0b1b5ec77b..fa027d0d031b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -272,6 +272,19 @@ struct cfg80211_he_bss_color { bool partial; }; +/** + * struct ieee80211_he_bss_color - AP settings for BSS coloring + * + * @color: the current color. + * @disabled: is the feature disabled. + * @partial: define the AID equation. + */ +struct ieee80211_he_bss_color { + u8 color; + bool disabled; + bool partial; +}; + /** * struct ieee80211_sta_ht_cap - STA's HT capabilities * diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 682fd2f4431b..6d4ea71523d2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -316,6 +316,7 @@ struct ieee80211_vif_chanctx_switch { * functionality changed for this BSS (AP mode). * @BSS_CHANGED_TWT: TWT status changed * @BSS_CHANGED_HE_OBSS_PD: OBSS Packet Detection status changed. + * @BSS_CHANGED_HE_BSS_COLOR: BSS Color has changed * */ enum ieee80211_bss_change { @@ -348,6 +349,7 @@ enum ieee80211_bss_change { BSS_CHANGED_FTM_RESPONDER = 1<<26, BSS_CHANGED_TWT = 1<<27, BSS_CHANGED_HE_OBSS_PD = 1<<28, + BSS_CHANGED_HE_BSS_COLOR = 1<<29, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -494,7 +496,6 @@ struct ieee80211_ftm_responder_params { * This structure keeps information about a BSS (and an association * to that BSS) that can change during the lifetime of the BSS. * - * @bss_color: 6-bit value to mark inter-BSS frame, if BSS supports HE * @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE * @multi_sta_back_32bit: supports BA bitmap of 32-bits in Multi-STA BACK * @uora_exists: is the UORA element advertised by AP @@ -604,10 +605,10 @@ struct ieee80211_ftm_responder_params { * in order to discover all the nontransmitted BSSIDs in the set. * @he_operation: HE operation information of the AP we are connected to * @he_obss_pd: OBSS Packet Detection parameters. + * @he_bss_color: BSS coloring settings, if BSS supports HE */ struct ieee80211_bss_conf { const u8 *bssid; - u8 bss_color; u8 htc_trig_based_pkt_ext; bool multi_sta_back_32bit; bool uora_exists; @@ -667,6 +668,7 @@ struct ieee80211_bss_conf { u8 profile_periodicity; struct ieee80211_he_operation he_operation; struct ieee80211_he_obss_pd he_obss_pd; + struct cfg80211_he_bss_color he_bss_color; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4fb7f1f12109..a11bd1669c13 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -981,7 +981,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_P2P_PS | BSS_CHANGED_TXPOWER | BSS_CHANGED_TWT | - BSS_CHANGED_HE_OBSS_PD; + BSS_CHANGED_HE_OBSS_PD | + BSS_CHANGED_HE_BSS_COLOR; int err; int prev_beacon_int; @@ -1054,6 +1055,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.twt_responder = params->twt_responder; memcpy(&sdata->vif.bss_conf.he_obss_pd, ¶ms->he_obss_pd, sizeof(struct ieee80211_he_obss_pd)); + memcpy(&sdata->vif.bss_conf.he_bss_color, ¶ms->he_bss_color, + sizeof(struct ieee80211_he_bss_color)); sdata->vif.bss_conf.ssid_len = params->ssid_len; if (params->ssid_len) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5fa13176036f..6e4099009eab 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3368,9 +3368,16 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, } if (bss_conf->he_support) { - bss_conf->bss_color = + bss_conf->he_bss_color.color = le32_get_bits(elems->he_operation->he_oper_params, IEEE80211_HE_OPERATION_BSS_COLOR_MASK); + bss_conf->he_bss_color.partial = + le32_get_bits(elems->he_operation->he_oper_params, + IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR); + bss_conf->he_bss_color.disabled = + le32_get_bits(elems->he_operation->he_oper_params, + IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED); + changed |= BSS_CHANGED_HE_BSS_COLOR; bss_conf->htc_trig_based_pkt_ext = le32_get_bits(elems->he_operation->he_oper_params, -- cgit v1.2.3 From 5972fa15b923df6ccd02ae6e7095a6b08b5fca52 Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Wed, 18 Dec 2019 15:27:36 +0100 Subject: mac80211: fix tx status for no ack cases Before this patch, frames which where successfully transmitted without requiring acks where accounted as lost frames. Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20191218142736.15843-1-markus.theil@tu-ilmenau.de Signed-off-by: Johannes Berg --- net/mac80211/status.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 0344b82a34f5..c9b90d38c54d 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -888,6 +888,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, int rates_idx; bool send_to_cooked; bool acked; + bool noack_success; struct ieee80211_bar *bar; int shift = 0; int tid = IEEE80211_NUM_TIDS; @@ -906,6 +907,8 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, clear_sta_flag(sta, WLAN_STA_SP); acked = !!(info->flags & IEEE80211_TX_STAT_ACK); + noack_success = !!(info->flags & + IEEE80211_TX_STAT_NOACK_TRANSMITTED); /* mesh Peer Service Period support */ if (ieee80211_vif_is_mesh(&sta->sdata->vif) && @@ -970,12 +973,12 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, ieee80211_handle_filtered_frame(local, sta, skb); return; } else { - if (!acked) + if (!acked && !noack_success) sta->status_stats.retry_failed++; sta->status_stats.retry_count += retry_count; if (ieee80211_is_data_present(fc)) { - if (!acked) + if (!acked && !noack_success) sta->status_stats.msdu_failed[tid]++; sta->status_stats.msdu_retries[tid] += @@ -1013,7 +1016,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, } if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { - if (info->flags & IEEE80211_TX_STAT_ACK) { + if (acked) { if (sta->status_stats.lost_packets) sta->status_stats.lost_packets = 0; @@ -1021,6 +1024,8 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) sta->status_stats.last_tdls_pkt_time = jiffies; + } else if (noack_success) { + /* nothing to do here, do not account as lost */ } else { ieee80211_lost_packet(sta, info); } @@ -1141,7 +1146,7 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, sta = container_of(pubsta, struct sta_info, sta); - if (!acked) + if (!acked && !noack_success) sta->status_stats.retry_failed++; sta->status_stats.retry_count += retry_count; @@ -1156,6 +1161,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, sta->status_stats.last_tdls_pkt_time = jiffies; } else if (test_sta_flag(sta, WLAN_STA_PS_STA)) { return; + } else if (noack_success) { + /* nothing to do here, do not account as lost */ } else { ieee80211_lost_packet(sta, info); } -- cgit v1.2.3 From 30b2f0be23fb40e58d0ad2caf8702c2a44cda2e1 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 13 Jan 2020 21:59:40 -0800 Subject: mac80211: add ieee80211_is_any_nullfunc() commit 08a5bdde3812 ("mac80211: consider QoS Null frames for STA_NULLFUNC_ACKED") Fixed a bug where we failed to take into account a nullfunc frame can be either non-QoS or QoS. It turns out there is at least one more bug in ieee80211_sta_tx_notify(), introduced in commit 7b6ddeaf27ec ("mac80211: use QoS NDP for AP probing"), where we forgot to check for the QoS variant and so assumed the QoS nullfunc frame never went out Fix this by adding a helper ieee80211_is_any_nullfunc() which consolidates the check for non-QoS and QoS nullfunc frames. Replace existing compound conditionals and add a couple more missing checks for QoS variant. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200114055940.18502-3-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 9 +++++++++ net/mac80211/mlme.c | 2 +- net/mac80211/rx.c | 8 +++----- net/mac80211/status.c | 5 ++--- net/mac80211/tx.c | 2 +- 5 files changed, 16 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e172d0c7bf74..1c4409b4c012 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -619,6 +619,15 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); } +/** + * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee80211_is_any_nullfunc(__le16 fc) +{ + return (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)); +} + /** * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU * @fc: frame control field in little-endian byteorder diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6e4099009eab..cb6fd0a09e07 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2460,7 +2460,7 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, if (!ieee80211_is_data(hdr->frame_control)) return; - if (ieee80211_is_nullfunc(hdr->frame_control) && + if (ieee80211_is_any_nullfunc(hdr->frame_control) && sdata->u.mgd.probe_send_count > 0) { if (ack) ieee80211_sta_reset_conn_monitor(sdata); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0e05ff037672..619c223f1cde 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1450,8 +1450,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (ieee80211_is_ctl(hdr->frame_control) || - ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control) || + ieee80211_is_any_nullfunc(hdr->frame_control) || is_multicast_ether_addr(hdr->addr1)) return RX_CONTINUE; @@ -1838,8 +1837,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * Drop (qos-)data::nullfunc frames silently, since they * are used only to control station power saving mode. */ - if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) { + if (ieee80211_is_any_nullfunc(hdr->frame_control)) { I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); /* @@ -2319,7 +2317,7 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) /* Drop unencrypted frames if key is set. */ if (unlikely(!ieee80211_has_protected(fc) && - !ieee80211_is_nullfunc(fc) && + !ieee80211_is_any_nullfunc(fc) && ieee80211_is_data(fc) && rx->key)) return -EACCES; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index c9b90d38c54d..22512805eafb 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -643,8 +643,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, rcu_read_lock(); sdata = ieee80211_sdata_from_skb(local, skb); if (sdata) { - if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) + if (ieee80211_is_any_nullfunc(hdr->frame_control)) cfg80211_probe_status(sdata->dev, hdr->addr1, cookie, acked, info->status.ack_signal, @@ -1061,7 +1060,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, I802_DEBUG_INC(local->dot11FailedCount); } - if ((ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) && + if (ieee80211_is_any_nullfunc(fc) && ieee80211_has_pm(fc) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) && !(info->flags & IEEE80211_TX_CTL_INJECTED) && diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c56d801e708f..4296d9d71311 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -297,7 +297,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (unlikely(test_bit(SCAN_SW_SCANNING, &tx->local->scanning)) && test_bit(SDATA_STATE_OFFCHANNEL, &tx->sdata->state) && !ieee80211_is_probe_req(hdr->frame_control) && - !ieee80211_is_nullfunc(hdr->frame_control)) + !ieee80211_is_any_nullfunc(hdr->frame_control)) /* * When software scanning only nullfunc frames (to notify * the sleep state to the AP) and probe requests (for the -- cgit v1.2.3 From 151129df2f4ac29e55be6d3a7be91d0979f71a55 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 27 Jan 2020 23:36:09 +0100 Subject: Bluetooth: SMP: Fix SALT value in some comments Salts are 16 bytes long. Remove some extra and erroneous '0' in the human readable format used in comments. Signed-off-by: Christophe JAILLET Signed-off-by: Marcel Holtmann --- net/bluetooth/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 204f14f8b507..83449a88a182 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1145,7 +1145,7 @@ static void sc_generate_link_key(struct smp_chan *smp) return; if (test_bit(SMP_FLAG_CT2, &smp->flags)) { - /* SALT = 0x00000000000000000000000000000000746D7031 */ + /* SALT = 0x000000000000000000000000746D7031 */ const u8 salt[16] = { 0x31, 0x70, 0x6d, 0x74 }; if (smp_h7(smp->tfm_cmac, smp->tk, salt, smp->link_key)) { @@ -1203,7 +1203,7 @@ static void sc_generate_ltk(struct smp_chan *smp) set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); if (test_bit(SMP_FLAG_CT2, &smp->flags)) { - /* SALT = 0x00000000000000000000000000000000746D7032 */ + /* SALT = 0x000000000000000000000000746D7032 */ const u8 salt[16] = { 0x32, 0x70, 0x6d, 0x74 }; if (smp_h7(smp->tfm_cmac, key->val, salt, smp->tk)) -- cgit v1.2.3 From 6c08fc896b60893c5d673764b0668015d76df462 Mon Sep 17 00:00:00 2001 From: Manish Mandlik Date: Tue, 28 Jan 2020 10:54:14 -0800 Subject: Bluetooth: Fix refcount use-after-free issue There is no lock preventing both l2cap_sock_release() and chan->ops->close() from running at the same time. If we consider Thread A running l2cap_chan_timeout() and Thread B running l2cap_sock_release(), expected behavior is: A::l2cap_chan_timeout()->l2cap_chan_close()->l2cap_sock_teardown_cb() A::l2cap_chan_timeout()->l2cap_sock_close_cb()->l2cap_sock_kill() B::l2cap_sock_release()->sock_orphan() B::l2cap_sock_release()->l2cap_sock_kill() where, sock_orphan() clears "sk->sk_socket" and l2cap_sock_teardown_cb() marks socket as SOCK_ZAPPED. In l2cap_sock_kill(), there is an "if-statement" that checks if both sock_orphan() and sock_teardown() has been run i.e. sk->sk_socket is NULL and socket is marked as SOCK_ZAPPED. Socket is killed if the condition is satisfied. In the race condition, following occurs: A::l2cap_chan_timeout()->l2cap_chan_close()->l2cap_sock_teardown_cb() B::l2cap_sock_release()->sock_orphan() B::l2cap_sock_release()->l2cap_sock_kill() A::l2cap_chan_timeout()->l2cap_sock_close_cb()->l2cap_sock_kill() In this scenario, "if-statement" is true in both B::l2cap_sock_kill() and A::l2cap_sock_kill() and we hit "refcount: underflow; use-after-free" bug. Similar condition occurs at other places where teardown/sock_kill is happening: l2cap_disconnect_rsp()->l2cap_chan_del()->l2cap_sock_teardown_cb() l2cap_disconnect_rsp()->l2cap_sock_close_cb()->l2cap_sock_kill() l2cap_conn_del()->l2cap_chan_del()->l2cap_sock_teardown_cb() l2cap_conn_del()->l2cap_sock_close_cb()->l2cap_sock_kill() l2cap_disconnect_req()->l2cap_chan_del()->l2cap_sock_teardown_cb() l2cap_disconnect_req()->l2cap_sock_close_cb()->l2cap_sock_kill() l2cap_sock_cleanup_listen()->l2cap_chan_close()->l2cap_sock_teardown_cb() l2cap_sock_cleanup_listen()->l2cap_sock_kill() Protect teardown/sock_kill and orphan/sock_kill by adding hold_lock on l2cap channel to ensure that the socket is killed only after marked as zapped and orphan. Signed-off-by: Manish Mandlik Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_core.c | 26 +++++++++++++++----------- net/bluetooth/l2cap_sock.c | 16 +++++++++++++--- 2 files changed, 28 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 195459a1e53e..dd2021270b8a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -419,6 +419,9 @@ static void l2cap_chan_timeout(struct work_struct *work) BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); mutex_lock(&conn->chan_lock); + /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling + * this work. No need to call l2cap_chan_hold(chan) here again. + */ l2cap_chan_lock(chan); if (chan->state == BT_CONNECTED || chan->state == BT_CONFIG) @@ -431,12 +434,12 @@ static void l2cap_chan_timeout(struct work_struct *work) l2cap_chan_close(chan, reason); - l2cap_chan_unlock(chan); - chan->ops->close(chan); - mutex_unlock(&conn->chan_lock); + l2cap_chan_unlock(chan); l2cap_chan_put(chan); + + mutex_unlock(&conn->chan_lock); } struct l2cap_chan *l2cap_chan_create(void) @@ -1737,9 +1740,9 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_del(chan, err); - l2cap_chan_unlock(chan); - chan->ops->close(chan); + + l2cap_chan_unlock(chan); l2cap_chan_put(chan); } @@ -4405,6 +4408,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, return 0; } + l2cap_chan_hold(chan); l2cap_chan_lock(chan); rsp.dcid = cpu_to_le16(chan->scid); @@ -4413,12 +4417,11 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, chan->ops->set_shutdown(chan); - l2cap_chan_hold(chan); l2cap_chan_del(chan, ECONNRESET); - l2cap_chan_unlock(chan); - chan->ops->close(chan); + + l2cap_chan_unlock(chan); l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); @@ -4450,20 +4453,21 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, return 0; } + l2cap_chan_hold(chan); l2cap_chan_lock(chan); if (chan->state != BT_DISCONN) { l2cap_chan_unlock(chan); + l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); return 0; } - l2cap_chan_hold(chan); l2cap_chan_del(chan, 0); - l2cap_chan_unlock(chan); - chan->ops->close(chan); + + l2cap_chan_unlock(chan); l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a7be8b59b3c2..ab65304f3f63 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1042,7 +1042,7 @@ done: } /* Kill socket (only if zapped and orphan) - * Must be called on unlocked socket. + * Must be called on unlocked socket, with l2cap channel lock. */ static void l2cap_sock_kill(struct sock *sk) { @@ -1203,8 +1203,15 @@ static int l2cap_sock_release(struct socket *sock) err = l2cap_sock_shutdown(sock, 2); + l2cap_chan_hold(l2cap_pi(sk)->chan); + l2cap_chan_lock(l2cap_pi(sk)->chan); + sock_orphan(sk); l2cap_sock_kill(sk); + + l2cap_chan_unlock(l2cap_pi(sk)->chan); + l2cap_chan_put(l2cap_pi(sk)->chan); + return err; } @@ -1222,12 +1229,15 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) BT_DBG("child chan %p state %s", chan, state_to_string(chan->state)); + l2cap_chan_hold(chan); l2cap_chan_lock(chan); + __clear_chan_timer(chan); l2cap_chan_close(chan, ECONNRESET); - l2cap_chan_unlock(chan); - l2cap_sock_kill(sk); + + l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } } -- cgit v1.2.3 From fe66483156050f4eb63c4a1988f3b439e6c9ff2a Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 29 Jan 2020 10:10:41 -0800 Subject: Bluetooth: optimize barrier usage for Rmw atomics Use smp_mb__before_atomic() instead of smp_mb() and avoid the unnecessary barrier for non LL/SC architectures, such as x86. Signed-off-by: Davidlohr Bueso Signed-off-by: Marcel Holtmann --- net/bluetooth/hidp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index bef84b95e2c4..3b4fa27a44e6 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1279,7 +1279,7 @@ static int hidp_session_thread(void *arg) add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait); /* This memory barrier is paired with wq_has_sleeper(). See * sock_poll_wait() for more information why this is needed. */ - smp_mb(); + smp_mb__before_atomic(); /* notify synchronous startup that we're ready */ atomic_inc(&session->state); -- cgit v1.2.3 From 2a154903cec20fb64ff4d7d617ca53c16f8fd53a Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Wed, 5 Feb 2020 10:31:59 +0800 Subject: Bluetooth: prefetch channel before killing sock Prefetch channel before killing sock in order to fix UAF like BUG: KASAN: use-after-free in l2cap_sock_release+0x24c/0x290 net/bluetooth/l2cap_sock.c:1212 Read of size 8 at addr ffff8880944904a0 by task syz-fuzzer/9751 Reported-by: syzbot+c3c5bdea7863886115dc@syzkaller.appspotmail.com Fixes: 6c08fc896b60 ("Bluetooth: Fix refcount use-after-free issue") Cc: Manish Mandlik Signed-off-by: Hillf Danton Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_sock.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index ab65304f3f63..390a9afab647 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1193,6 +1193,7 @@ static int l2cap_sock_release(struct socket *sock) { struct sock *sk = sock->sk; int err; + struct l2cap_chan *chan; BT_DBG("sock %p, sk %p", sock, sk); @@ -1202,15 +1203,16 @@ static int l2cap_sock_release(struct socket *sock) bt_sock_unlink(&l2cap_sk_list, sk); err = l2cap_sock_shutdown(sock, 2); + chan = l2cap_pi(sk)->chan; - l2cap_chan_hold(l2cap_pi(sk)->chan); - l2cap_chan_lock(l2cap_pi(sk)->chan); + l2cap_chan_hold(chan); + l2cap_chan_lock(chan); sock_orphan(sk); l2cap_sock_kill(sk); - l2cap_chan_unlock(l2cap_pi(sk)->chan); - l2cap_chan_put(l2cap_pi(sk)->chan); + l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return err; } -- cgit v1.2.3 From 2ade42d88fdbb0af37dfdcaae8e5246f7b5b6e9b Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Wed, 5 Feb 2020 11:23:27 +0800 Subject: Bluetooth: remove __get_channel/dir and __dir These 3 macros are never used from first git commit Linux-2.6.12-rc2. let's remove them. Signed-off-by: Alex Shi Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 3a9e9d9670be..dcecce087b24 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -73,8 +73,6 @@ static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s); /* ---- RFCOMM frame parsing macros ---- */ #define __get_dlci(b) ((b & 0xfc) >> 2) -#define __get_channel(b) ((b & 0xf8) >> 3) -#define __get_dir(b) ((b & 0x04) >> 2) #define __get_type(b) ((b & 0xef)) #define __test_ea(b) ((b & 0x01)) @@ -87,7 +85,6 @@ static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s); #define __ctrl(type, pf) (((type & 0xef) | (pf << 4))) #define __dlci(dir, chn) (((chn & 0x1f) << 1) | dir) #define __srv_channel(dlci) (dlci >> 1) -#define __dir(dlci) (dlci & 0x01) #define __len8(len) (((len) << 1) | 1) #define __len16(len) ((len) << 1) -- cgit v1.2.3 From 3c706b973b51ed45e4c0f40642cfb650dfc804d7 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 3 Feb 2020 13:28:12 +0100 Subject: mac80211: fix 11w when using encapsulation offloading The 802.11 encapsulation returned early when setting up the keys in case offloading was enabled. This causes ieee802.11w to not work anymore. Fix this by moving the check for offloading into the switch/case construct and allowing CCMP/GCMP keys. With this patch applied ieee80211w works again when enabling offloading. Fixes: 50ff477a8639 ("mac80211: add 802.11 encapsulation offloading support") Reported-by: Maharaja Kennadyrajan Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20200203122812.18993-1-john@phrozen.org Signed-off-by: Johannes Berg --- net/mac80211/key.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 9a3a6b95fa27..54934eff4ac1 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -210,10 +210,6 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) key->conf.keyidx, sta ? sta->sta.addr : bcast_addr, ret); - /* cannot do software crypto with encapsulation offload */ - if (sdata->hw_80211_encap) - return -EINVAL; - out_unsupported: switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_WEP40: @@ -221,12 +217,20 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + /* We cannot do software crypto of data frames with + * encapsulation offload enabled. However for 802.11w to + * function properly we need cmac/gmac keys. + */ + if (sdata->hw_80211_encap) + return -EINVAL; + /* Fall through */ + case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: - case WLAN_CIPHER_SUITE_GCMP: - case WLAN_CIPHER_SUITE_GCMP_256: /* all of these we can do in software - if driver can */ if (ret == 1) return 0; -- cgit v1.2.3 From 1e61d82cca170465902003bfe445f0461e28208b Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Tue, 21 Jan 2020 10:12:13 +0200 Subject: cfg80211: add no HE indication to the channel flag The regulatory domain might forbid HE operation. Certain regulatory domains may restrict it for specific channels whereas others may do it for the whole regulatory domain. Add an option to indicate it in the channel flag. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200121081213.733757-1-luca@coelho.fi Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 5 +++++ net/wireless/nl80211.c | 3 +++ net/wireless/reg.c | 2 ++ 4 files changed, 12 insertions(+) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fa027d0d031b..40f2a3a30e3d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -95,6 +95,7 @@ struct wiphy; * on this channel. * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted * on this channel. + * @IEEE80211_CHAN_NO_HE: HE operation is not permitted on this channel. * */ enum ieee80211_channel_flags { @@ -111,6 +112,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_IR_CONCURRENT = 1<<10, IEEE80211_CHAN_NO_20MHZ = 1<<11, IEEE80211_CHAN_NO_10MHZ = 1<<12, + IEEE80211_CHAN_NO_HE = 1<<13, }; #define IEEE80211_CHAN_NO_HT40 \ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 809ef9165684..d996bac97e9d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3587,6 +3587,8 @@ enum nl80211_wmm_rule { * @NL80211_FREQUENCY_ATTR_WMM: this channel has wmm limitations. * This is a nested attribute that contains the wmm limitation per AC. * (see &enum nl80211_wmm_rule) + * @NL80211_FREQUENCY_ATTR_NO_HE: HE operation is not allowed on this channel + * in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -3616,6 +3618,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_20MHZ, NL80211_FREQUENCY_ATTR_NO_10MHZ, NL80211_FREQUENCY_ATTR_WMM, + NL80211_FREQUENCY_ATTR_NO_HE, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -3813,6 +3816,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed * @NL80211_RRF_NO_160MHZ: 160MHz operation not allowed + * @NL80211_RRF_NO_HE: HE operation not allowed */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -3830,6 +3834,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_HT40PLUS = 1<<14, NL80211_RRF_NO_80MHZ = 1<<15, NL80211_RRF_NO_160MHZ = 1<<16, + NL80211_RRF_NO_HE = 1<<17, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 00f24d4c623e..d8cdbf07aeec 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -972,6 +972,9 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if ((chan->flags & IEEE80211_CHAN_NO_10MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_10MHZ)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_HE) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HE)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 446c76d44e65..ea7bc5652a41 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1569,6 +1569,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_80MHZ; if (rd_flags & NL80211_RRF_NO_160MHZ) channel_flags |= IEEE80211_CHAN_NO_160MHZ; + if (rd_flags & NL80211_RRF_NO_HE) + channel_flags |= IEEE80211_CHAN_NO_HE; return channel_flags; } -- cgit v1.2.3 From d6039a3416f7af37c04f22c411f120ad46f51663 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Mon, 27 Jan 2020 02:00:32 +0530 Subject: cfg80211: Enhance the AKM advertizement to support per interface. Commit ab4dfa20534e ("cfg80211: Allow drivers to advertise supported AKM suites") introduces the support to advertize supported AKMs to userspace. This needs an enhancement to advertize the AKM support per interface type, specifically for the cfg80211-based drivers that implement SME and use different mechanisms to support the AKM's for each interface type (e.g., the support for SAE, OWE AKM's take different paths for such drivers on STA/AP mode). This commit aims the same and enhances the earlier mechanism of advertizing the AKMs per wiphy. Add new nl80211 attributes and data structure to provide supported AKMs per interface type to userspace. the AKMs advertized in akm_suites are default capabilities if not advertized for a specific interface type in iftype_akm_suites. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20200126203032.21934-1-vjakkam@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 28 +++++++++++++++++++++++++++- include/uapi/linux/nl80211.h | 33 +++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 40f2a3a30e3d..c3a9b375d3ca 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4400,6 +4400,21 @@ struct cfg80211_pmsr_capabilities { } ftm; }; +/** + * struct wiphy_iftype_akm_suites - This structure encapsulates supported akm + * suites for interface types defined in @iftypes_mask. Each type in the + * @iftypes_mask must be unique across all instances of iftype_akm_suites. + * + * @iftypes_mask: bitmask of interfaces types + * @akm_suites: points to an array of supported akm suites + * @n_akm_suites: number of supported AKM suites + */ +struct wiphy_iftype_akm_suites { + u16 iftypes_mask; + const u32 *akm_suites; + int n_akm_suites; +}; + /** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback, @@ -4412,8 +4427,16 @@ struct cfg80211_pmsr_capabilities { * @signal_type: signal type reported in &struct cfg80211_bss. * @cipher_suites: supported cipher suites * @n_cipher_suites: number of supported cipher suites - * @akm_suites: supported AKM suites + * @akm_suites: supported AKM suites. These are the default AKMs supported if + * the supported AKMs not advertized for a specific interface type in + * iftype_akm_suites. * @n_akm_suites: number of supported AKM suites + * @iftype_akm_suites: array of supported akm suites info per interface type. + * Note that the bits in @iftypes_mask inside this structure cannot + * overlap (i.e. only one occurrence of each type is allowed across all + * instances of iftype_akm_suites). + * @num_iftype_akm_suites: number of interface types for which supported akm + * suites are specified separately. * @retry_short: Retry limit for short frames (dot11ShortRetryLimit) * @retry_long: Retry limit for long frames (dot11LongRetryLimit) * @frag_threshold: Fragmentation threshold (dot11FragmentationThreshold); @@ -4620,6 +4643,9 @@ struct wiphy { int n_akm_suites; const u32 *akm_suites; + const struct wiphy_iftype_akm_suites *iftype_akm_suites; + unsigned int num_iftype_akm_suites; + u8 retry_short; u8 retry_long; u32 frag_threshold; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d996bac97e9d..350ab86fe20e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2402,6 +2402,13 @@ enum nl80211_commands { * * @NL80211_ATTR_HE_BSS_COLOR: nested attribute for BSS Color Settings. * + * @NL80211_ATTR_IFTYPE_AKM_SUITES: nested array attribute, with each entry + * using attributes from &enum nl80211_iftype_akm_attributes. This + * attribute is sent in a response to %NL80211_CMD_GET_WIPHY indicating + * supported AKM suites capability per interface. AKMs advertised in + * %NL80211_ATTR_AKM_SUITES are default capabilities if AKM suites not + * advertised for a specific interface type. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2868,6 +2875,8 @@ enum nl80211_attrs { NL80211_ATTR_HE_BSS_COLOR, + NL80211_ATTR_IFTYPE_AKM_SUITES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -6619,4 +6628,28 @@ enum nl80211_bss_color_attributes { NL80211_HE_BSS_COLOR_ATTR_MAX = __NL80211_HE_BSS_COLOR_ATTR_LAST - 1, }; +/** + * enum nl80211_iftype_akm_attributes - interface type AKM attributes + * @__NL80211_IFTYPE_AKM_ATTR_INVALID: Invalid + * + * @NL80211_IFTYPE_AKM_ATTR_IFTYPES: nested attribute containing a flag + * attribute for each interface type that supports AKM suites specified in + * %NL80211_IFTYPE_AKM_ATTR_SUITES + * @NL80211_IFTYPE_AKM_ATTR_SUITES: an array of u32. Used to indicate supported + * AKM suites for the specified interface types. + * + * @__NL80211_IFTYPE_AKM_ATTR_LAST: Internal + * @NL80211_IFTYPE_AKM_ATTR_MAX: highest interface type AKM attribute. + */ +enum nl80211_iftype_akm_attributes { + __NL80211_IFTYPE_AKM_ATTR_INVALID, + + NL80211_IFTYPE_AKM_ATTR_IFTYPES, + NL80211_IFTYPE_AKM_ATTR_SUITES, + + /* keep last */ + __NL80211_IFTYPE_AKM_ATTR_LAST, + NL80211_IFTYPE_AKM_ATTR_MAX = __NL80211_IFTYPE_AKM_ATTR_LAST - 1, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d8cdbf07aeec..3ad937d0a256 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1896,6 +1896,46 @@ static int nl80211_send_pmsr_capa(struct cfg80211_registered_device *rdev, return 0; } +static int +nl80211_put_iftype_akm_suites(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) +{ + int i; + struct nlattr *nested, *nested_akms; + const struct wiphy_iftype_akm_suites *iftype_akms; + + if (!rdev->wiphy.num_iftype_akm_suites || + !rdev->wiphy.iftype_akm_suites) + return 0; + + nested = nla_nest_start(msg, NL80211_ATTR_IFTYPE_AKM_SUITES); + if (!nested) + return -ENOBUFS; + + for (i = 0; i < rdev->wiphy.num_iftype_akm_suites; i++) { + nested_akms = nla_nest_start(msg, i + 1); + if (!nested_akms) + return -ENOBUFS; + + iftype_akms = &rdev->wiphy.iftype_akm_suites[i]; + + if (nl80211_put_iftypes(msg, NL80211_IFTYPE_AKM_ATTR_IFTYPES, + iftype_akms->iftypes_mask)) + return -ENOBUFS; + + if (nla_put(msg, NL80211_IFTYPE_AKM_ATTR_SUITES, + sizeof(u32) * iftype_akms->n_akm_suites, + iftype_akms->akm_suites)) { + return -ENOBUFS; + } + nla_nest_end(msg, nested_akms); + } + + nla_nest_end(msg, nested); + + return 0; +} + struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; @@ -2454,6 +2494,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, rdev->wiphy.akm_suites)) goto nla_put_failure; + if (nl80211_put_iftype_akm_suites(rdev, msg)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; -- cgit v1.2.3 From 75e296e9b22aef6fa467523ace87ef623dac1fad Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 31 Jan 2020 13:12:39 +0200 Subject: mac80211: simplify and improve HT/VHT/HE disable code Check early on that a device has support for QoS (at least 4 queues) when it supports HT/VHT/HE, so we don't have to check this while connecting. This lets us clean up the code there: move some of it into channel preparation to clean up a bit more, and then change the logic to only check the "wmm_used" flag. Additionally, disable HE consistently when VHT is disabled. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-3-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/main.c | 5 +++++ net/mac80211/mlme.c | 62 ++++++++++++++++++++++++++++------------------------- 2 files changed, 38 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4c2b5ba3ac09..34728cf94172 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -981,6 +981,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (!supp_he) supp_he = !!ieee80211_get_he_sta_cap(sband); + /* HT, VHT, HE require QoS, thus >= 4 queues */ + if (WARN_ON(local->hw.queues < IEEE80211_NUM_ACS && + (supp_ht || supp_vht || supp_he))) + return -EINVAL; + if (!sband->ht_cap.ht_supported) continue; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cb6fd0a09e07..f076e73314a6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -164,7 +164,9 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, chandef->center_freq1 = channel->center_freq; if (!ht_oper || !sta_ht_cap.ht_supported) { - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + ret = IEEE80211_STA_DISABLE_HT | + IEEE80211_STA_DISABLE_VHT | + IEEE80211_STA_DISABLE_HE; goto out; } @@ -185,7 +187,9 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", channel->center_freq, ht_cfreq, ht_oper->primary_chan, channel->band); - ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + ret = IEEE80211_STA_DISABLE_HT | + IEEE80211_STA_DISABLE_VHT | + IEEE80211_STA_DISABLE_HE; goto out; } @@ -301,7 +305,8 @@ out: IEEE80211_CHAN_DISABLED)) { if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { ret = IEEE80211_STA_DISABLE_HT | - IEEE80211_STA_DISABLE_VHT; + IEEE80211_STA_DISABLE_VHT | + IEEE80211_STA_DISABLE_HE; break; } @@ -393,6 +398,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, if (flags != (ifmgd->flags & (IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT | + IEEE80211_STA_DISABLE_HE | IEEE80211_STA_DISABLE_40MHZ | IEEE80211_STA_DISABLE_80P80MHZ | IEEE80211_STA_DISABLE_160MHZ)) || @@ -4760,10 +4766,22 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, IEEE80211_STA_DISABLE_80P80MHZ | IEEE80211_STA_DISABLE_160MHZ); + /* disable HT/VHT/HE if we don't support them */ + if (!sband->ht_cap.ht_supported) { + ifmgd->flags |= IEEE80211_STA_DISABLE_HT; + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + ifmgd->flags |= IEEE80211_STA_DISABLE_HE; + } + + if (!sband->vht_cap.vht_supported) + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + + if (!ieee80211_get_he_sta_cap(sband)) + ifmgd->flags |= IEEE80211_STA_DISABLE_HE; + rcu_read_lock(); - if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && - sband->ht_cap.ht_supported) { + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { const u8 *ht_oper_ie, *ht_cap_ie; ht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_OPERATION); @@ -4780,8 +4798,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, } } - if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && - sband->vht_cap.vht_supported) { + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) { const u8 *vht_oper_ie, *vht_cap; vht_oper_ie = ieee80211_bss_get_ie(cbss, @@ -4791,9 +4808,10 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, if (vht_oper && !ht_oper) { vht_oper = NULL; sdata_info(sdata, - "AP advertised VHT without HT, disabling both\n"); + "AP advertised VHT without HT, disabling HT/VHT/HE\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_HT; ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } vht_cap = ieee80211_bss_get_ie(cbss, WLAN_EID_VHT_CAPABILITY); @@ -4803,9 +4821,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, } } - if (!ieee80211_get_he_sta_cap(sband)) - ifmgd->flags |= IEEE80211_STA_DISABLE_HE; - if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) { const struct cfg80211_bss_ies *ies; const u8 *he_oper_ie; @@ -5304,27 +5319,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, } } - /* Also disable HT if we don't support it or the AP doesn't use WMM */ sband = local->hw.wiphy->bands[req->bss->channel->band]; - if (!sband->ht_cap.ht_supported || - local->hw.queues < IEEE80211_NUM_ACS || !bss->wmm_used || - ifmgd->flags & IEEE80211_STA_DISABLE_WMM) { - ifmgd->flags |= IEEE80211_STA_DISABLE_HT; - if (!bss->wmm_used && - !(ifmgd->flags & IEEE80211_STA_DISABLE_WMM)) - netdev_info(sdata->dev, - "disabling HT as WMM/QoS is not supported by the AP\n"); - } - /* disable VHT if we don't support it or the AP doesn't use WMM */ - if (!sband->vht_cap.vht_supported || - local->hw.queues < IEEE80211_NUM_ACS || !bss->wmm_used || - ifmgd->flags & IEEE80211_STA_DISABLE_WMM) { + /* also disable HT/VHT/HE if the AP doesn't use WMM */ + if (!bss->wmm_used) { + ifmgd->flags |= IEEE80211_STA_DISABLE_HT; ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; - if (!bss->wmm_used && - !(ifmgd->flags & IEEE80211_STA_DISABLE_WMM)) - netdev_info(sdata->dev, - "disabling VHT as WMM/QoS is not supported by the AP\n"); + ifmgd->flags |= IEEE80211_STA_DISABLE_HE; + netdev_info(sdata->dev, + "disabling HT/VHT/HE as WMM/QoS is not supported by the AP\n"); } memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa)); @@ -5456,6 +5459,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (req->flags & ASSOC_REQ_DISABLE_HT) { ifmgd->flags |= IEEE80211_STA_DISABLE_HT; ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } if (req->flags & ASSOC_REQ_DISABLE_VHT) -- cgit v1.2.3 From e4d005b80deeb053526ca089510bf2e20473ef62 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 31 Jan 2020 13:12:40 +0200 Subject: mac80211: refactor extended element parsing This code was really ugly, refactor it a bit to make it more readable. While at it, use sizeof() and fix the UORA element length check bug. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-4-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/util.c | 75 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 780df3e9092e..72039c8dbc38 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -890,6 +890,51 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_queue_delayed_work); +static void ieee80211_parse_extension_element(u32 *crc, + const struct element *elem, + struct ieee802_11_elems *elems) +{ + const void *data = elem->data + 1; + u8 len = elem->datalen - 1; + + switch (elem->data[0]) { + case WLAN_EID_EXT_HE_MU_EDCA: + if (len == sizeof(*elems->mu_edca_param_set)) { + elems->mu_edca_param_set = data; + if (crc) + *crc = crc32_be(*crc, (void *)elem, + elem->datalen + 2); + } + break; + case WLAN_EID_EXT_HE_CAPABILITY: + elems->he_cap = data; + elems->he_cap_len = len; + break; + case WLAN_EID_EXT_HE_OPERATION: + if (len >= sizeof(*elems->he_operation) && + len == ieee80211_he_oper_size(data) - 1) + elems->he_operation = data; + break; + case WLAN_EID_EXT_UORA: + if (len == 1) + elems->uora_element = data; + break; + case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME: + if (len == 3) + elems->max_channel_switch_time = data; + break; + case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION: + if (len == sizeof(*elems->mbssid_config_ie)) + elems->mbssid_config_ie = data; + break; + case WLAN_EID_EXT_HE_SPR: + if (len >= sizeof(*elems->he_spr) && + len >= ieee80211_he_spr_size(data)) + elems->he_spr = data; + break; + } +} + static u32 _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, struct ieee802_11_elems *elems, @@ -1220,33 +1265,9 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elems->max_idle_period_ie = (void *)pos; break; case WLAN_EID_EXTENSION: - if (pos[0] == WLAN_EID_EXT_HE_MU_EDCA && - elen >= (sizeof(*elems->mu_edca_param_set) + 1)) { - elems->mu_edca_param_set = (void *)&pos[1]; - if (calc_crc) - crc = crc32_be(crc, pos - 2, elen + 2); - } else if (pos[0] == WLAN_EID_EXT_HE_CAPABILITY) { - elems->he_cap = (void *)&pos[1]; - elems->he_cap_len = elen - 1; - } else if (pos[0] == WLAN_EID_EXT_HE_OPERATION && - elen >= sizeof(*elems->he_operation) && - elen >= ieee80211_he_oper_size(&pos[1])) { - elems->he_operation = (void *)&pos[1]; - } else if (pos[0] == WLAN_EID_EXT_UORA && elen >= 1) { - elems->uora_element = (void *)&pos[1]; - } else if (pos[0] == - WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME && - elen == 4) { - elems->max_channel_switch_time = pos + 1; - } else if (pos[0] == - WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION && - elen == 3) { - elems->mbssid_config_ie = (void *)&pos[1]; - } else if (pos[0] == WLAN_EID_EXT_HE_SPR && - elen >= sizeof(*elems->he_spr) && - elen >= ieee80211_he_spr_size(&pos[1])) { - elems->he_spr = (void *)&pos[1]; - } + ieee80211_parse_extension_element(calc_crc ? + &crc : NULL, + elem, elems); break; default: break; -- cgit v1.2.3 From b5db1acab19b99da5b30042133734b01f65b5900 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Fri, 31 Jan 2020 13:12:44 +0200 Subject: mac80211: check whether HE connection is allowed by the reg domain The wireless device might be capable to connect HE as well as the AP. However, the regulatory domain might forbid it. Check whether the regulatory domain allows HE connection when considering if HE IE should be added. Also, add it when setting our peer capability. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-8-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f076e73314a6..152577cc2213 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -313,6 +313,10 @@ out: ret |= ieee80211_chandef_downgrade(chandef); } + if (!cfg80211_chandef_usable(sdata->wdev.wiphy, chandef, + IEEE80211_CHAN_NO_HE)) + ret |= IEEE80211_STA_DISABLE_HE; + if (chandef->width != vht_chandef.width && !tracking) sdata_info(sdata, "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n"); @@ -622,10 +626,21 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, { u8 *pos; const struct ieee80211_sta_he_cap *he_cap = NULL; + struct ieee80211_chanctx_conf *chanctx_conf; u8 he_cap_size; + bool reg_cap = false; + + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + if (!WARN_ON_ONCE(!chanctx_conf)) + reg_cap = cfg80211_chandef_usable(sdata->wdev.wiphy, + &chanctx_conf->def, + IEEE80211_CHAN_NO_HE); + + rcu_read_unlock(); he_cap = ieee80211_get_he_sta_cap(sband); - if (!he_cap) + if (!he_cap || !reg_cap) return; /* -- cgit v1.2.3 From 07b83d2ecd2f812c32d1f852f853375f50e1ccf2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 31 Jan 2020 13:12:46 +0200 Subject: mac80211: allow changing TX-related netdev features Set ndev->hw_features as well as ndev->features to allow changing the TX-related features with ethtool. We cannot (yet) change RX-related features since that requires telling the driver about it and we have no API for that yet. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-10-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 7 +++++++ net/mac80211/iface.c | 4 +++- net/mac80211/main.c | 6 +----- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e3cf24cb4615..b89543269f51 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1729,6 +1729,13 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params); /* interface handling */ +#define MAC80211_SUPPORTED_FEATURES_TX (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \ + NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_HIGHDMA | NETIF_F_GSO_SOFTWARE) +#define MAC80211_SUPPORTED_FEATURES_RX (NETIF_F_RXCSUM) +#define MAC80211_SUPPORTED_FEATURES (MAC80211_SUPPORTED_FEATURES_TX | \ + MAC80211_SUPPORTED_FEATURES_RX) + int ieee80211_iface_init(void); void ieee80211_iface_exit(void); int ieee80211_if_add(struct ieee80211_local *local, const char *name, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 9b833e170c20..99d913a6e651 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -8,7 +8,7 @@ * Copyright 2008, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation */ #include #include @@ -1938,6 +1938,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sdata->u.mgd.use_4addr = params->use_4addr; ndev->features |= local->hw.netdev_features; + ndev->hw_features |= ndev->features & + MAC80211_SUPPORTED_FEATURES_TX; netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 34728cf94172..287dd0588476 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -872,7 +872,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) enum nl80211_band band; int channels, max_bitrates; bool supp_ht, supp_vht, supp_he; - netdev_features_t feature_whitelist; struct cfg80211_chan_def dflt_chandef = {}; if (ieee80211_hw_check(hw, QUEUE_CONTROL) && @@ -931,10 +930,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } /* Only HW csum features are currently compatible with mac80211 */ - feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | - NETIF_F_GSO_SOFTWARE | NETIF_F_RXCSUM; - if (WARN_ON(hw->netdev_features & ~feature_whitelist)) + if (WARN_ON(hw->netdev_features & ~MAC80211_SUPPORTED_FEATURES)) return -EINVAL; if (hw->max_report_rates == 0) -- cgit v1.2.3 From 4a65cc2437ce4a643c24b357d849e3ff773efed1 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 31 Jan 2020 13:12:47 +0200 Subject: mac80211: make ieee80211_wep_init() return void This function always returns 0, so there's no point in returning int. Make it void and remove the impossible error-path when calling it. Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-11-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/main.c | 5 +---- net/mac80211/wep.c | 4 +--- net/mac80211/wep.h | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 287dd0588476..d91bcef738dc 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1185,10 +1185,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (!local->hw.weight_multiplier) local->hw.weight_multiplier = 1; - result = ieee80211_wep_init(local); - if (result < 0) - wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", - result); + ieee80211_wep_init(local); local->hw.conf.flags = IEEE80211_CONF_IDLE; diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index b75c2c54e665..9a6e11d7b4db 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -22,12 +22,10 @@ #include "wep.h" -int ieee80211_wep_init(struct ieee80211_local *local) +void ieee80211_wep_init(struct ieee80211_local *local) { /* start WEP IV from a random value */ get_random_bytes(&local->wep_iv, IEEE80211_WEP_IV_LEN); - - return 0; } static inline bool ieee80211_wep_weak_iv(u32 iv, int keylen) diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h index 997a034233c2..4ffe83554c67 100644 --- a/net/mac80211/wep.h +++ b/net/mac80211/wep.h @@ -13,7 +13,7 @@ #include "ieee80211_i.h" #include "key.h" -int ieee80211_wep_init(struct ieee80211_local *local); +void ieee80211_wep_init(struct ieee80211_local *local); int ieee80211_wep_encrypt_data(struct arc4_ctx *ctx, u8 *rc4key, size_t klen, u8 *data, size_t data_len); int ieee80211_wep_encrypt(struct ieee80211_local *local, -- cgit v1.2.3 From 8cadb207145c7e2fa45bbec2319cb84aec5da988 Mon Sep 17 00:00:00 2001 From: Daniel Gabay Date: Fri, 31 Jan 2020 13:12:52 +0200 Subject: mac80211: update condition for HE disablement Disable HE if the beacon does not contain an HE operation IE. Signed-off-by: Daniel Gabay Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-16-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 152577cc2213..16b678bea106 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -313,8 +313,8 @@ out: ret |= ieee80211_chandef_downgrade(chandef); } - if (!cfg80211_chandef_usable(sdata->wdev.wiphy, chandef, - IEEE80211_CHAN_NO_HE)) + if (!he_oper || !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef, + IEEE80211_CHAN_NO_HE)) ret |= IEEE80211_STA_DISABLE_HE; if (chandef->width != vht_chandef.width && !tracking) -- cgit v1.2.3 From 2ff69b0e25f4faa442f28b652a909f1b75206803 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 31 Jan 2020 13:31:11 +0200 Subject: mac80211: remove supported channels element in 6 GHz if ECSA support We should not include the supported channels element if we have (advertise) support for extended channel switching. To avoid any interop issues because we always added it in the past, obey this restriction only in the (new) 6 GHz band. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131113111.893106-1-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 16b678bea106..2733555a5f3b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -671,6 +671,13 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *chan; u32 rates = 0; + struct element *ext_capa = NULL; + + /* we know it's writable, cast away the const */ + if (assoc_data->ie_len) + ext_capa = (void *)cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, + assoc_data->ie, + assoc_data->ie_len); sdata_assert_lock(sdata); @@ -821,7 +828,15 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) *pos++ = ieee80211_chandef_max_power(&chanctx_conf->def); } - if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT) { + /* + * Per spec, we shouldn't include the list of channels if we advertise + * support for extended channel switching, but we've always done that; + * (for now?) apply this restriction only on the (new) 6 GHz band. + */ + if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT && + (sband->band != NL80211_BAND_6GHZ || + !ext_capa || ext_capa->datalen < 1 || + !(ext_capa->data[0] & WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING))) { /* TODO: get this in reg domain format */ pos = skb_put(skb, 2 * sband->n_channels + 2); *pos++ = WLAN_EID_SUPPORTED_CHANNELS; @@ -835,18 +850,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) /* Set MBSSID support for HE AP if needed */ if (ieee80211_hw_check(&local->hw, SUPPORTS_ONLY_HE_MULTI_BSSID) && - !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && assoc_data->ie_len) { - struct element *elem; - - /* we know it's writable, cast away the const */ - elem = (void *)cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, - assoc_data->ie, - assoc_data->ie_len); - - /* We can probably assume both always true */ - if (elem && elem->datalen >= 3) - elem->data[2] |= WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT; - } + !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && assoc_data->ie_len && + ext_capa && ext_capa->datalen >= 3) + ext_capa->data[2] |= WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT; /* if present, add any custom IEs that go before HT */ if (assoc_data->ie_len) { -- cgit v1.2.3 From cf2c9cc3980fb2aa5a71ebbe982739987100db76 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 31 Jan 2020 13:12:41 +0200 Subject: mac80211: set station bandwidth from HE capability Set the station bandwidth in HE capability parsing and from HE capability as the HT/VHT information will not be present on the 6 GHz band. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-5-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/he.c | 4 ++++ net/mac80211/vht.c | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'net') diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 736da0035135..5245c19f39bf 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -3,6 +3,7 @@ * HE handling * * Copyright(c) 2017 Intel Deutschland GmbH + * Copyright(c) 2019 Intel Corporation */ #include "ieee80211_i.h" @@ -49,6 +50,9 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, he_ppe_size); he_cap->has_he = true; + + sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta); + sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); } void diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index ccdcb9ad9ac7..a7cd22594a14 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -333,11 +333,33 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, } } +/* FIXME: move this to some better location - parses HE now */ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) { struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap; u32 cap_width; + if (he_cap->has_he) { + u8 info = he_cap->he_cap_elem.phy_cap_info[0]; + + if (sta->sdata->vif.bss_conf.chandef.chan->band == + NL80211_BAND_2GHZ) { + if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G) + return IEEE80211_STA_RX_BW_40; + else + return IEEE80211_STA_RX_BW_20; + } + + if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G || + info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) + return IEEE80211_STA_RX_BW_160; + else if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G) + return IEEE80211_STA_RX_BW_80; + + return IEEE80211_STA_RX_BW_20; + } + if (!vht_cap->vht_supported) return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : @@ -433,6 +455,7 @@ ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width) } } +/* FIXME: rename/move - this deals with everything not just VHT */ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; -- cgit v1.2.3 From f46209b9ff76e7a4194c40a2994c9f115d2227b5 Mon Sep 17 00:00:00 2001 From: Tova Mussai Date: Fri, 31 Jan 2020 13:12:45 +0200 Subject: mac80211: HE: set RX NSS In case of HE, the RX NSS is taken from the HE capabilities. If the supported NSS capabilities that are reported by AP for HE mode in the HE Capabilities element are different from the NSS capabilities that are reported by AP for the VHT mode in the VHT Capabilities element, use the lowest supported NSS to not get all the values confused. Signed-off-by: Tova Mussai Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-9-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/vht.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index a7cd22594a14..632f07401850 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -481,12 +481,40 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) void ieee80211_sta_set_rx_nss(struct sta_info *sta) { - u8 ht_rx_nss = 0, vht_rx_nss = 0; + u8 ht_rx_nss = 0, vht_rx_nss = 0, he_rx_nss = 0, rx_nss; /* if we received a notification already don't overwrite it */ if (sta->sta.rx_nss) return; + if (sta->sta.he_cap.has_he) { + int i; + u8 rx_mcs_80 = 0, rx_mcs_160 = 0; + const struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap; + u16 mcs_160_map = + le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_160); + u16 mcs_80_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_80); + + for (i = 7; i >= 0; i--) { + u8 mcs_160 = (mcs_160_map >> (2 * i)) & 3; + + if (mcs_160 != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + rx_mcs_160 = i + 1; + break; + } + } + for (i = 7; i >= 0; i--) { + u8 mcs_80 = (mcs_80_map >> (2 * i)) & 3; + + if (mcs_80 != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + rx_mcs_80 = i + 1; + break; + } + } + + he_rx_nss = min(rx_mcs_80, rx_mcs_160); + } + if (sta->sta.ht_cap.ht_supported) { if (sta->sta.ht_cap.mcs.rx_mask[0]) ht_rx_nss++; @@ -516,8 +544,9 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) /* FIXME: consider rx_highest? */ } - ht_rx_nss = max(ht_rx_nss, vht_rx_nss); - sta->sta.rx_nss = max_t(u8, 1, ht_rx_nss); + rx_nss = max(vht_rx_nss, ht_rx_nss); + rx_nss = max(he_rx_nss, rx_nss); + sta->sta.rx_nss = max_t(u8, 1, rx_nss); } u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, -- cgit v1.2.3 From 85b27ef73419db8d59a5d685bc62113883ca9330 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Fri, 31 Jan 2020 13:12:50 +0200 Subject: mac80211: Accept broadcast probe responses on 6GHz band An AP that operates on 6GHz may respond with a broadcast probe response. Don't ignore such frames. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-14-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2733555a5f3b..320a2b3b0f5a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3683,13 +3683,28 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt = (void *)skb->data; struct ieee80211_if_managed *ifmgd; struct ieee80211_rx_status *rx_status = (void *) skb->cb; + struct ieee80211_channel *channel; size_t baselen, len = skb->len; ifmgd = &sdata->u.mgd; sdata_assert_lock(sdata); - if (!ether_addr_equal(mgmt->da, sdata->vif.addr)) + /* + * According to Draft P802.11ax D6.0 clause 26.17.2.3.2: + * "If a 6 GHz AP receives a Probe Request frame and responds with + * a Probe Response frame [..], the Address 1 field of the Probe + * Response frame shall be set to the broadcast address [..]" + * So, on 6GHz band we should also accept broadcast responses. + */ + channel = ieee80211_get_channel(sdata->local->hw.wiphy, + rx_status->freq); + if (!channel) + return; + + if (!ether_addr_equal(mgmt->da, sdata->vif.addr) && + (channel->band != NL80211_BAND_6GHZ || + !is_broadcast_ether_addr(mgmt->da))) return; /* ignore ProbeResp to foreign address */ baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; -- cgit v1.2.3 From c4d800dcc7c57837cca66638b54b1d8a09949f79 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 31 Jan 2020 13:12:53 +0200 Subject: mac80211: Handle SMPS mode changes only in AP mode According to IEEE802.11 specifications the SM power save field in the HT capability IE and the HE extended capability IE is valid only in (re)association frames and should be ignored otherwise. Remove code paths that handled this also for non AP modes. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-17-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/he.c | 2 +- net/mac80211/ht.c | 42 ++++++++++++++++++++++++------------------ net/mac80211/rx.c | 6 +++++- 3 files changed, 30 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 5245c19f39bf..1087f715338b 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -3,7 +3,7 @@ * HE handling * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright(c) 2019 Intel Corporation + * Copyright(c) 2019 - 2020 Intel Corporation */ #include "ieee80211_i.h" diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index a2e4d6b8fd98..a8e144fd02f1 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -9,6 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright 2017 Intel Deutschland GmbH + * Copyright(c) 2020 Intel Corporation */ #include @@ -144,7 +145,6 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, int i, max_tx_streams; bool changed; enum ieee80211_sta_rx_bandwidth bw; - enum ieee80211_smps_mode smps_mode; memset(&ht_cap, 0, sizeof(ht_cap)); @@ -270,24 +270,30 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; - switch ((ht_cap.cap & IEEE80211_HT_CAP_SM_PS) - >> IEEE80211_HT_CAP_SM_PS_SHIFT) { - case WLAN_HT_CAP_SM_PS_INVALID: - case WLAN_HT_CAP_SM_PS_STATIC: - smps_mode = IEEE80211_SMPS_STATIC; - break; - case WLAN_HT_CAP_SM_PS_DYNAMIC: - smps_mode = IEEE80211_SMPS_DYNAMIC; - break; - case WLAN_HT_CAP_SM_PS_DISABLED: - smps_mode = IEEE80211_SMPS_OFF; - break; - } - - if (smps_mode != sta->sta.smps_mode) - changed = true; - sta->sta.smps_mode = smps_mode; + if (sta->sdata->vif.type == NL80211_IFTYPE_AP || + sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + enum ieee80211_smps_mode smps_mode; + + switch ((ht_cap.cap & IEEE80211_HT_CAP_SM_PS) + >> IEEE80211_HT_CAP_SM_PS_SHIFT) { + case WLAN_HT_CAP_SM_PS_INVALID: + case WLAN_HT_CAP_SM_PS_STATIC: + smps_mode = IEEE80211_SMPS_STATIC; + break; + case WLAN_HT_CAP_SM_PS_DYNAMIC: + smps_mode = IEEE80211_SMPS_DYNAMIC; + break; + case WLAN_HT_CAP_SM_PS_DISABLED: + smps_mode = IEEE80211_SMPS_OFF; + break; + } + if (smps_mode != sta->sta.smps_mode) + changed = true; + sta->sta.smps_mode = smps_mode; + } else { + sta->sta.smps_mode = IEEE80211_SMPS_OFF; + } return changed; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 619c223f1cde..ec3a04a1db20 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -6,7 +6,7 @@ * Copyright 2007-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ #include @@ -3082,6 +3082,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) enum ieee80211_smps_mode smps_mode; struct sta_opmode_info sta_opmode = {}; + if (sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN) + goto handled; + /* convert to HT capability */ switch (mgmt->u.action.u.ht_smps.smps_control) { case WLAN_HT_SMPS_CONTROL_DISABLED: -- cgit v1.2.3 From 52b4810bed836929d73e1ff419a8d3f1eb1b4c4b Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 31 Jan 2020 13:12:56 +0200 Subject: mac80211: Remove support for changing AP SMPS mode The SMPS feature is defined in the specification only to be used by non-AP stations and not by APs, so remove the support for changing the AP's SMPS mode dynamically. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-20-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 101 ++---------------------------------------- net/mac80211/debugfs_netdev.c | 13 ++---- net/mac80211/ht.c | 22 --------- net/mac80211/ieee80211_i.h | 7 +-- net/mac80211/iface.c | 8 +--- net/mac80211/sta_info.c | 16 +------ 6 files changed, 10 insertions(+), 157 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a11bd1669c13..eb7a84150817 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -990,20 +990,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (old) return -EALREADY; - switch (params->smps_mode) { - case NL80211_SMPS_OFF: - sdata->smps_mode = IEEE80211_SMPS_OFF; - break; - case NL80211_SMPS_STATIC: - sdata->smps_mode = IEEE80211_SMPS_STATIC; - break; - case NL80211_SMPS_DYNAMIC: - sdata->smps_mode = IEEE80211_SMPS_DYNAMIC; - break; - default: - return -EINVAL; - } - sdata->u.ap.req_smps = sdata->smps_mode; + if (params->smps_mode != NL80211_SMPS_OFF) + return -ENOTSUPP; + + sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = sdata->local->rx_chains; @@ -1169,7 +1159,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) kfree_rcu(old_beacon, rcu_head); if (old_probe_resp) kfree_rcu(old_probe_resp, rcu_head); - sdata->u.ap.driver_smps_mode = IEEE80211_SMPS_OFF; kfree(sdata->vif.bss_conf.ftmr_params); sdata->vif.bss_conf.ftmr_params = NULL; @@ -1694,20 +1683,6 @@ static int ieee80211_change_station(struct wiphy *wiphy, mutex_unlock(&local->sta_mtx); - if ((sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && - sta->known_smps_mode != sta->sdata->bss->req_smps && - test_sta_flag(sta, WLAN_STA_AUTHORIZED) && - sta_info_tx_streams(sta) != 1) { - ht_dbg(sta->sdata, - "%pM just authorized and MIMO capable - update SMPS\n", - sta->sta.addr); - ieee80211_send_smps_action(sta->sdata, - sta->sdata->bss->req_smps, - sta->sta.addr, - sta->sdata->vif.bss_conf.bssid); - } - if (sdata->vif.type == NL80211_IFTYPE_STATION && params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { ieee80211_recalc_ps(local); @@ -2639,74 +2614,6 @@ static int ieee80211_testmode_dump(struct wiphy *wiphy, } #endif -int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata, - enum ieee80211_smps_mode smps_mode) -{ - struct sta_info *sta; - enum ieee80211_smps_mode old_req; - - if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP)) - return -EINVAL; - - if (sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) - return 0; - - old_req = sdata->u.ap.req_smps; - sdata->u.ap.req_smps = smps_mode; - - /* AUTOMATIC doesn't mean much for AP - don't allow it */ - if (old_req == smps_mode || - smps_mode == IEEE80211_SMPS_AUTOMATIC) - return 0; - - ht_dbg(sdata, - "SMPS %d requested in AP mode, sending Action frame to %d stations\n", - smps_mode, atomic_read(&sdata->u.ap.num_mcast_sta)); - - mutex_lock(&sdata->local->sta_mtx); - list_for_each_entry(sta, &sdata->local->sta_list, list) { - /* - * Only stations associated to our AP and - * associated VLANs - */ - if (sta->sdata->bss != &sdata->u.ap) - continue; - - /* This station doesn't support MIMO - skip it */ - if (sta_info_tx_streams(sta) == 1) - continue; - - /* - * Don't wake up a STA just to send the action frame - * unless we are getting more restrictive. - */ - if (test_sta_flag(sta, WLAN_STA_PS_STA) && - !ieee80211_smps_is_restrictive(sta->known_smps_mode, - smps_mode)) { - ht_dbg(sdata, "Won't send SMPS to sleeping STA %pM\n", - sta->sta.addr); - continue; - } - - /* - * If the STA is not authorized, wait until it gets - * authorized and the action frame will be sent then. - */ - if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) - continue; - - ht_dbg(sdata, "Sending SMPS to %pM\n", sta->sta.addr); - ieee80211_send_smps_action(sdata, smps_mode, sta->sta.addr, - sdata->vif.bss_conf.bssid); - } - mutex_unlock(&sdata->local->sta_mtx); - - sdata->smps_mode = smps_mode; - ieee80211_queue_work(&sdata->local->hw, &sdata->recalc_smps); - - return 0; -} - int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode) { diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 64b544ae9966..3dbe7c5cefd1 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2006 Jiri Benc * Copyright 2007 Johannes Berg + * Copyright (C) 2020 Intel Corporation */ #include @@ -254,15 +255,11 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, smps_mode == IEEE80211_SMPS_AUTOMATIC)) return -EINVAL; - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; sdata_lock(sdata); - if (sdata->vif.type == NL80211_IFTYPE_STATION) - err = __ieee80211_request_smps_mgd(sdata, smps_mode); - else - err = __ieee80211_request_smps_ap(sdata, smps_mode); + err = __ieee80211_request_smps_mgd(sdata, smps_mode); sdata_unlock(sdata); return err; @@ -282,10 +279,6 @@ static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_sub_if_data *sdata, return snprintf(buf, buflen, "request: %s\nused: %s\n", smps_modes[sdata->u.mgd.req_smps], smps_modes[sdata->smps_mode]); - if (sdata->vif.type == NL80211_IFTYPE_AP) - return snprintf(buf, buflen, "request: %s\nused: %s\n", - smps_modes[sdata->u.ap.req_smps], - smps_modes[sdata->smps_mode]); return -EINVAL; } diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index a8e144fd02f1..e32906202575 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -550,19 +550,6 @@ void ieee80211_request_smps_mgd_work(struct work_struct *work) sdata_unlock(sdata); } -void ieee80211_request_smps_ap_work(struct work_struct *work) -{ - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, - u.ap.request_smps_work); - - sdata_lock(sdata); - if (sdata_dereference(sdata->u.ap.beacon, sdata)) - __ieee80211_request_smps_ap(sdata, - sdata->u.ap.driver_smps_mode); - sdata_unlock(sdata); -} - void ieee80211_request_smps(struct ieee80211_vif *vif, enum ieee80211_smps_mode smps_mode) { @@ -578,15 +565,6 @@ void ieee80211_request_smps(struct ieee80211_vif *vif, sdata->u.mgd.driver_smps_mode = smps_mode; ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.request_smps_work); - } else { - /* AUTOMATIC is meaningless in AP mode */ - if (WARN_ON_ONCE(smps_mode == IEEE80211_SMPS_AUTOMATIC)) - return; - if (sdata->u.ap.driver_smps_mode == smps_mode) - return; - sdata->u.ap.driver_smps_mode = smps_mode; - ieee80211_queue_work(&sdata->local->hw, - &sdata->u.ap.request_smps_work); } } /* this might change ... don't want non-open drivers using it */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b89543269f51..7074af92b536 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007-2010 Johannes Berg * Copyright 2013-2015 Intel Mobile Communications GmbH - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ #ifndef IEEE80211_I_H @@ -292,10 +292,7 @@ struct ieee80211_if_ap { struct ps_data ps; atomic_t num_mcast_sta; /* number of stations receiving multicast */ - enum ieee80211_smps_mode req_smps, /* requested smps mode */ - driver_smps_mode; /* smps mode request */ - struct work_struct request_smps_work; bool multicast_to_unicast; }; @@ -2148,8 +2145,6 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, enum nl80211_band band, u32 *basic_rates); int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode); -int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata, - enum ieee80211_smps_mode smps_mode); void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata); void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 99d913a6e651..2fb26bc105f8 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -8,7 +8,7 @@ * Copyright 2008, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ #include #include @@ -824,9 +824,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_ADHOC: ieee80211_ibss_stop(sdata); break; - case NL80211_IFTYPE_AP: - cancel_work_sync(&sdata->u.ap.request_smps_work); - break; case NL80211_IFTYPE_MONITOR: if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) break; @@ -1494,10 +1491,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP: skb_queue_head_init(&sdata->u.ap.ps.bc_buf); INIT_LIST_HEAD(&sdata->u.ap.vlans); - INIT_WORK(&sdata->u.ap.request_smps_work, - ieee80211_request_smps_ap_work); sdata->vif.bss_conf.bssid = sdata->vif.addr; - sdata->u.ap.req_smps = IEEE80211_SMPS_OFF; break; case NL80211_IFTYPE_P2P_CLIENT: type = NL80211_IFTYPE_STATION; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 0f5f40678885..f357156b86ee 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ #include @@ -1351,20 +1351,6 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) atomic_dec(&ps->num_sta_ps); - /* This station just woke up and isn't aware of our SMPS state */ - if (!ieee80211_vif_is_mesh(&sdata->vif) && - !ieee80211_smps_is_restrictive(sta->known_smps_mode, - sdata->smps_mode) && - sta->known_smps_mode != sdata->bss->req_smps && - sta_info_tx_streams(sta) != 1) { - ht_dbg(sdata, - "%pM just woke up and MIMO capable - update SMPS\n", - sta->sta.addr); - ieee80211_send_smps_action(sdata, sdata->bss->req_smps, - sta->sta.addr, - sdata->vif.bss_conf.bssid); - } - local->total_ps_buffered -= buffered; sta_info_recalc_tim(sta); -- cgit v1.2.3 From ff74c51e8f4c543ed2bd3bf1c2f3287b098660df Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 31 Jan 2020 13:45:29 +0200 Subject: cfg80211/mac80211: Allow user space to register for station Rx authentication To support Pre Association Security Negotiation (PASN) while already associated to one AP, allow user space to register to Rx authentication frames, so that the user space logic would be able to receive/handle authentication frames from a different AP as part of PASN. Note that it is expected that user space would intelligently register for Rx authentication frames, i.e., only when PASN is used and configure a match filter only for PASN authentication algorithm, as otherwise the MLME functionality of mac80211 would be broken. Additionally, since some versions of the user space daemons wrongly register to all types of authentication frames (which might result in unexpected behavior) allow such registration if the request is for a specific authentication algorithm number. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131114529.894206-1-luca@coelho.fi Signed-off-by: Johannes Berg --- net/mac80211/main.c | 13 +++++++++++++ net/wireless/core.h | 2 +- net/wireless/mlme.c | 33 +++++++++++++++++++++++++++++---- net/wireless/nl80211.c | 5 +++-- 4 files changed, 46 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d91bcef738dc..9dd3c9e3731f 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -416,7 +416,20 @@ ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { }, [NL80211_IFTYPE_STATION] = { .tx = 0xffff, + /* + * To support Pre Association Security Negotiation (PASN) while + * already associated to one AP, allow user space to register to + * Rx authentication frames, so that the user space logic would + * be able to receive/handle authentication frames from a + * different AP as part of PASN. + * It is expected that user space would intelligently register + * for Rx authentication frames, i.e., only when PASN is used + * and configure a match filter only for PASN authentication + * algorithm, as otherwise the MLME functionality of mac80211 + * would be broken. + */ .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | + BIT(IEEE80211_STYPE_AUTH >> 4) | BIT(IEEE80211_STYPE_PROBE_REQ >> 4), }, [NL80211_IFTYPE_AP] = { diff --git a/net/wireless/core.h b/net/wireless/core.h index ed487e324571..bb897a803ffe 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -385,7 +385,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, u16 frame_type, const u8 *match_data, - int match_len); + int match_len, struct netlink_ext_ack *extack); void cfg80211_mlme_unreg_wk(struct work_struct *wk); void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index f9462010575f..e4805a3bd310 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -4,6 +4,7 @@ * * Copyright (c) 2009, Jouni Malinen * Copyright (c) 2015 Intel Deutschland GmbH + * Copyright (C) 2019 Intel Corporation */ #include @@ -470,7 +471,7 @@ void cfg80211_mlme_unreg_wk(struct work_struct *wk) int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, u16 frame_type, const u8 *match_data, - int match_len) + int match_len, struct netlink_ext_ack *extack) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); @@ -481,15 +482,38 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, if (!wdev->wiphy->mgmt_stypes) return -EOPNOTSUPP; - if ((frame_type & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT) + if ((frame_type & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT) { + NL_SET_ERR_MSG(extack, "frame type not management"); return -EINVAL; + } - if (frame_type & ~(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) + if (frame_type & ~(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) { + NL_SET_ERR_MSG(extack, "Invalid frame type"); return -EINVAL; + } mgmt_type = (frame_type & IEEE80211_FCTL_STYPE) >> 4; - if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].rx & BIT(mgmt_type))) + if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].rx & BIT(mgmt_type))) { + NL_SET_ERR_MSG(extack, + "Registration to specific type not supported"); + return -EINVAL; + } + + /* + * To support Pre Association Security Negotiation (PASN), registration + * for authentication frames should be supported. However, as some + * versions of the user space daemons wrongly register to all types of + * authentication frames (which might result in unexpected behavior) + * allow such registration if the request is for a specific + * authentication algorithm number. + */ + if (wdev->iftype == NL80211_IFTYPE_STATION && + (frame_type & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_AUTH && + !(match_data && match_len >= 2)) { + NL_SET_ERR_MSG(extack, + "Authentication algorithm number required"); return -EINVAL; + } nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL); if (!nreg) @@ -504,6 +528,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, continue; if (memcmp(reg->match, match_data, mlen) == 0) { + NL_SET_ERR_MSG(extack, "Match already configured"); err = -EALREADY; break; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3ad937d0a256..4c0ea54e0f59 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10624,8 +10624,9 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type, - nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), - nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); + nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), + nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]), + info->extack); } static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) -- cgit v1.2.3 From c0058df73309906ef4d5383fbaa10c43ebddc48a Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Fri, 31 Jan 2020 13:12:57 +0200 Subject: mac80211: parse also the RSNXE IE Parse also the RSN Extension IE when parsing the rest of the IEs. It will be used in a later patch. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-21-luca@coelho.fi Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 8 ++++++++ net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/util.c | 7 ++++++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 095a7108c394..6f3e7c5c600a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2532,6 +2532,7 @@ enum ieee80211_eid { WLAN_EID_FILS_INDICATION = 240, WLAN_EID_DILS = 241, WLAN_EID_FRAGMENT = 242, + WLAN_EID_RSNX = 244, WLAN_EID_EXTENSION = 255 }; @@ -3421,4 +3422,11 @@ static inline bool for_each_element_completed(const struct element *element, return (const u8 *)element == (const u8 *)data + datalen; } +/** + * RSNX Capabilities: + * bits 0-3: Field length (n-1) + */ +#define WLAN_RSNX_CAPA_PROTECTED_TWT BIT(4) +#define WLAN_RSNX_CAPA_SAE_H2E BIT(5) + #endif /* LINUX_IEEE80211_H */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7074af92b536..8a49d78ad7c9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1472,6 +1472,7 @@ struct ieee802_11_elems { const struct ieee80211_tim_ie *tim; const u8 *challenge; const u8 *rsn; + const u8 *rsnx; const u8 *erp_info; const u8 *ext_supp_rates; const u8 *wmm_info; @@ -1519,6 +1520,7 @@ struct ieee802_11_elems { u8 tim_len; u8 challenge_len; u8 rsn_len; + u8 rsnx_len; u8 ext_supp_rates_len; u8 wmm_info_len; u8 wmm_param_len; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 72039c8dbc38..7ddf0508779f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -6,7 +6,7 @@ * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation * * utilities for mac80211 */ @@ -994,6 +994,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, case WLAN_EID_CHAN_SWITCH_TIMING: case WLAN_EID_LINK_ID: case WLAN_EID_BSS_MAX_IDLE_PERIOD: + case WLAN_EID_RSNX: /* * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible * that if the content gets bigger it might be needed more than once @@ -1264,6 +1265,10 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, if (elen >= sizeof(*elems->max_idle_period_ie)) elems->max_idle_period_ie = (void *)pos; break; + case WLAN_EID_RSNX: + elems->rsnx = pos; + elems->rsnx_len = elen; + break; case WLAN_EID_EXTENSION: ieee80211_parse_extension_element(calc_crc ? &crc : NULL, -- cgit v1.2.3 From 8c3ed7aa2b9ef666195b789e9b02e28383243fa8 Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Wed, 15 Jan 2020 13:55:22 +0100 Subject: nl80211: add src and dst addr attributes for control port tx/rx When using control port over nl80211 in AP mode with pre-authentication, APs need to forward frames to other APs defined by their MAC address. Before this patch, pre-auth frames reaching user space over nl80211 control port have no longer any information about the dest attached, which can be used for forwarding to a controller or injecting the frame back to a ethernet interface over a AF_PACKET socket. Analog problems exist, when forwarding pre-auth frames from AP -> STA. This patch therefore adds the NL80211_ATTR_DST_MAC and NL80211_ATTR_SRC_MAC attributes to provide more context information when forwarding. The respective arguments are optional on tx and included on rx. Therefore unaware existing software is not affected. Software which wants to detect this feature, can do so by checking against: NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20200115125522.3755-1-markus.theil@tu-ilmenau.de [split into separate cfg80211/mac80211 patches] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 ++- include/uapi/linux/nl80211.h | 16 +++++++++++++++- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/tx.c | 3 ++- net/wireless/nl80211.c | 18 +++++++++++++++--- net/wireless/rdev-ops.h | 8 ++++---- net/wireless/trace.h | 27 +++++++++++++++++---------- 7 files changed, 57 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c3a9b375d3ca..ec0de3c43c61 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3969,7 +3969,8 @@ struct cfg80211_ops { int (*tx_control_port)(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, const __be16 proto, + const u8 *dest, const u8 *src, + const __be16 proto, const bool noencrypt); int (*get_ftm_responder_stats)(struct wiphy *wiphy, diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 350ab86fe20e..158bccb4a47b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1039,11 +1039,14 @@ * a control port frame and as a notification that a control port frame * has been received. %NL80211_ATTR_FRAME is used to specify the * frame contents. The frame is the raw EAPoL data, without ethernet or - * 802.11 headers. + * 802.11 headers. An optional %NL80211_ATTR_SRC_MAC can be used to send + * pre-auth frames to STAs on behalf of other APs. * When used as an event indication %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT and %NL80211_ATTR_MAC are added * indicating the protocol type of the received frame; whether the frame * was received unencrypted and the MAC address of the peer respectively. + * %NL80211_ATTR_DST_MAC can be used to forward pre-auth frames in + * userspace while using AP mode. * * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded. * @@ -2409,6 +2412,9 @@ enum nl80211_commands { * %NL80211_ATTR_AKM_SUITES are default capabilities if AKM suites not * advertised for a specific interface type. * + * @NL80211_ATTR_SRC_MAC: MAC address used in control port over nl80211 transmit + * @NL80211_ATTR_DST_MAC: MAC address used in control port over nl80211 receive + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2877,6 +2883,9 @@ enum nl80211_attrs { NL80211_ATTR_IFTYPE_AKM_SUITES, + NL80211_ATTR_SRC_MAC, + NL80211_ATTR_DST_MAC, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5539,6 +5548,10 @@ enum nl80211_feature_flags { * feature, which prevents bufferbloat by using the expected transmission * time to limit the amount of data buffered in the hardware. * + * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS: The driver + * can use src and dst MAC addresses with control port over nl80211 rx + * and tx operations. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5586,6 +5599,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SAE_OFFLOAD, NL80211_EXT_FEATURE_VLAN_OFFLOAD, NL80211_EXT_FEATURE_AQL, + NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8a49d78ad7c9..da9eaa9ee37e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1792,7 +1792,8 @@ void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata); void ieee80211_clear_fast_xmit(struct sta_info *sta); int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, __be16 proto, bool unencrypted); + const u8 *dest, const u8 *src, __be16 proto, + bool unencrypted); int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4296d9d71311..059c66b6bb5c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5285,7 +5285,8 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, __be16 proto, bool unencrypted) + const u8 *dest, const u8 *src, __be16 proto, + bool unencrypted) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4c0ea54e0f59..33fe6ac1c242 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -633,6 +633,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy), [NL80211_ATTR_VLAN_ID] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), [NL80211_ATTR_HE_BSS_COLOR] = NLA_POLICY_NESTED(he_bss_color_policy), + [NL80211_ATTR_SRC_MAC] = NLA_POLICY_ETH_ADDR, + [NL80211_ATTR_DST_MAC] = NLA_POLICY_ETH_ADDR, }; /* policy for the key attributes */ @@ -13694,6 +13696,7 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) const u8 *buf; size_t len; u8 *dest; + u8 src[ETH_ALEN]; u16 proto; bool noencrypt; int err; @@ -13731,6 +13734,13 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) goto out; } + /* copy src address under wdev_lock, as we may copy wdev_address */ + if (info->attrs[NL80211_ATTR_SRC_MAC]) + ether_addr_copy(src, + nla_data(info->attrs[NL80211_ATTR_SRC_MAC])); + else + ether_addr_copy(src, wdev_address(wdev)); + wdev_unlock(wdev); buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); @@ -13741,7 +13751,7 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]); return rdev_tx_control_port(rdev, dev, buf, len, - dest, cpu_to_be16(proto), noencrypt); + dest, src, cpu_to_be16(proto), noencrypt); out: wdev_unlock(wdev); @@ -15996,7 +16006,8 @@ static int __nl80211_rx_control_port(struct net_device *dev, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ethhdr *ehdr = eth_hdr(skb); - const u8 *addr = ehdr->h_source; + const u8 *daddr = ehdr->h_dest; + const u8 *saddr = ehdr->h_source; u16 proto = be16_to_cpu(skb->protocol); struct sk_buff *msg; void *hdr; @@ -16021,7 +16032,8 @@ static int __nl80211_rx_control_port(struct net_device *dev, nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, saddr) || + nla_put(msg, NL80211_ATTR_DST_MAC, ETH_ALEN, daddr) || nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) || (unencrypted && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT))) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index e853a4fe6f97..39e6c1db3092 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -730,14 +730,14 @@ static inline int rdev_mgmt_tx(struct cfg80211_registered_device *rdev, static inline int rdev_tx_control_port(struct cfg80211_registered_device *rdev, struct net_device *dev, const void *buf, size_t len, - const u8 *dest, __be16 proto, - const bool noencrypt) + const u8 *dest, const u8 *src, + __be16 proto, const bool noencrypt) { int ret; trace_rdev_tx_control_port(&rdev->wiphy, dev, buf, len, - dest, proto, noencrypt); + dest, src, proto, noencrypt); ret = rdev->ops->tx_control_port(&rdev->wiphy, dev, buf, len, - dest, proto, noencrypt); + dest, src, proto, noencrypt); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index d98ad2b3143b..fefa255fd062 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1923,27 +1923,31 @@ TRACE_EVENT(rdev_mgmt_tx, TRACE_EVENT(rdev_tx_control_port, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - const u8 *buf, size_t len, const u8 *dest, __be16 proto, + const u8 *buf, size_t len, + const u8 *dest, const u8 *src, __be16 proto, bool unencrypted), - TP_ARGS(wiphy, netdev, buf, len, dest, proto, unencrypted), + TP_ARGS(wiphy, netdev, buf, len, dest, src, proto, unencrypted), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(dest) - __field(__be16, proto) + MAC_ENTRY(src) + __field(u16, proto) __field(bool, unencrypted) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(dest, dest); - __entry->proto = proto; + MAC_ASSIGN(src, src); + __entry->proto = be16_to_cpu(proto); __entry->unencrypted = unencrypted; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT "," - " proto: 0x%x, unencrypted: %s", - WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest), - be16_to_cpu(__entry->proto), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", dest: " MAC_PR_FMT + ", src: " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", + WIPHY_PR_ARG, NETDEV_PR_ARG, + MAC_PR_ARG(dest), MAC_PR_ARG(src), + __entry->proto, BOOL_TO_STR(__entry->unencrypted)) ); @@ -2835,6 +2839,7 @@ TRACE_EVENT(cfg80211_rx_control_port, TP_STRUCT__entry( NETDEV_ENTRY __field(int, len) + MAC_ENTRY(to) MAC_ENTRY(from) __field(u16, proto) __field(bool, unencrypted) @@ -2842,12 +2847,14 @@ TRACE_EVENT(cfg80211_rx_control_port, TP_fast_assign( NETDEV_ASSIGN; __entry->len = skb->len; + MAC_ASSIGN(to, eth_hdr(skb)->h_dest); MAC_ASSIGN(from, eth_hdr(skb)->h_source); __entry->proto = be16_to_cpu(skb->protocol); __entry->unencrypted = unencrypted; ), - TP_printk(NETDEV_PR_FMT ", len=%d, " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", - NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(from), + TP_printk(NETDEV_PR_FMT ", len=%d, dest: " MAC_PR_FMT + ", src: " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", + NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(to), MAC_PR_ARG(from), __entry->proto, BOOL_TO_STR(__entry->unencrypted)) ); -- cgit v1.2.3 From 9b125c27998719288e4dcf2faf54511039526692 Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Fri, 7 Feb 2020 12:58:57 +0100 Subject: mac80211: support NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS This is now a trivial patch, but for seeing the actual changes I (Johannes) split it out from the original. Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20200115125522.3755-1-markus.theil@tu-ilmenau.de [split into separate cfg80211/mac80211 patches] Signed-off-by: Johannes Berg --- net/mac80211/main.c | 2 ++ net/mac80211/tx.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 9dd3c9e3731f..265a31a8104d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -587,6 +587,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_FILS_STA); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211); + wiphy_ext_feature_set(wiphy, + NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS); if (!ops->hw_scan) { wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 059c66b6bb5c..a447d258fea3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5317,7 +5317,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, ehdr = skb_push(skb, sizeof(struct ethhdr)); memcpy(ehdr->h_dest, dest, ETH_ALEN); - memcpy(ehdr->h_source, sdata->vif.addr, ETH_ALEN); + memcpy(ehdr->h_source, src, ETH_ALEN); ehdr->h_proto = proto; skb->dev = dev; -- cgit v1.2.3 From db6d9e9e8b48b7ab68c61553eb5fa68534dd0fde Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 17 Dec 2019 10:30:57 -0800 Subject: mac80211: Fix setting txpower to zero With multiple VIFS ath10k, and probably others, tries to find the minimum txpower for all vifs and uses that when setting txpower in the firmware. If a second vif is added and starts to scan, it's txpower is not initialized yet and it set to zero. ath10k had a patch to ignore zero values, but then it is impossible to actually set txpower to zero. So, instead initialize the txpower to INT_MIN in mac80211, and let drivers know that means the power has not been set and so should be ignored. This should fix regression in: commit 88407beb1b1462f706a1950a355fd086e1c450b6 Author: Ryan Hsu Date: Tue Dec 13 14:55:19 2016 -0800 ath10k: fix incorrect txpower set by P2P_DEVICE interface Tested on ath10k 9984 with ath10k-ct firmware. Signed-off-by: Ben Greear Link: https://lore.kernel.org/r/20191217183057.24586-1-greearb@candelatech.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/mac.c | 3 ++- drivers/net/wireless/ath/ath9k/main.c | 3 +++ drivers/net/wireless/ath/ath9k/xmit.c | 7 +++++-- include/net/mac80211.h | 2 +- net/mac80211/iface.c | 1 + net/mac80211/main.c | 2 ++ 6 files changed, 14 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 7fee35ff966b..2f820d4eb308 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5103,7 +5103,8 @@ static int ath10k_mac_txpower_recalc(struct ath10k *ar) lockdep_assert_held(&ar->conf_mutex); list_for_each_entry(arvif, &ar->arvifs, list) { - if (arvif->txpower <= 0) + /* txpower not initialized yet? */ + if (arvif->txpower == INT_MIN) continue; if (txpower == -1) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 0548aa3702e3..983b6d6c89f0 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1196,6 +1196,9 @@ static void ath9k_tpc_vif_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { int *power = data; + if (vif->bss_conf.txpower == INT_MIN) + return; + if (*power < vif->bss_conf.txpower) *power = vif->bss_conf.txpower; } diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 31e7b108279c..e60d4737fc6e 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -2095,10 +2095,13 @@ static void setup_frame_info(struct ieee80211_hw *hw, if (tx_info->control.vif) { struct ieee80211_vif *vif = tx_info->control.vif; - + if (vif->bss_conf.txpower == INT_MIN) + goto nonvifpower; txpower = 2 * vif->bss_conf.txpower; } else { - struct ath_softc *sc = hw->priv; + struct ath_softc *sc; + nonvifpower: + sc = hw->priv; txpower = sc->cur_chan->cur_txpower; } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6d4ea71523d2..7287ad3034dd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -574,7 +574,7 @@ struct ieee80211_ftm_responder_params { * @ssid: The SSID of the current vif. Valid in AP and IBSS mode. * @ssid_len: Length of SSID given in @ssid. * @hidden_ssid: The SSID of the current vif is hidden. Only valid in AP-mode. - * @txpower: TX power in dBm + * @txpower: TX power in dBm. INT_MIN means not configured. * @txpower_type: TX power adjustment used to control per packet Transmit * Power Control (TPC) in lower driver for the current vif. In particular * TPC is enabled if value passed in %txpower_type is diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 2fb26bc105f8..3c00408e9c8c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1465,6 +1465,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->control_port_no_encrypt = false; sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; sdata->vif.bss_conf.idle = true; + sdata->vif.bss_conf.txpower = INT_MIN; /* unset */ sdata->noack_map = 0; sdata->hw_80211_encap = false; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 265a31a8104d..cae3a34d3503 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -146,6 +146,8 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) continue; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) continue; + if (sdata->vif.bss_conf.txpower == INT_MIN) + continue; power = min(power, sdata->vif.bss_conf.txpower); } rcu_read_unlock(); -- cgit v1.2.3 From 1f6e0baa703d31002c312c3e423c108b04325df0 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Tue, 11 Feb 2020 13:26:04 +0100 Subject: mac80211: allow setting queue_len for drivers not using wake_tx_queue Currently a mac80211 driver can only set the txq_limit when using wake_tx_queue. Not all drivers use wake_tx_queue. This patch adds a new element to wiphy allowing a driver to set a custom tx_queue_len and the code that will apply it in case it is set. The current default is 1000 which is too low for ath11k when doing HE rates. Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20200211122605.13002-1-john@phrozen.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ net/mac80211/iface.c | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ec0de3c43c61..992bf0de6b9b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4739,6 +4739,8 @@ struct wiphy { u32 txq_memory_limit; u32 txq_quantum; + unsigned long tx_queue_len; + u8 support_mbssid:1, support_only_he_mbssid:1; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 3c00408e9c8c..128b3468d13e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1834,6 +1834,10 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, if_setup, txqs, 1); if (!ndev) return -ENOMEM; + + if (!local->ops->wake_tx_queue && local->hw.wiphy->tx_queue_len) + ndev->tx_queue_len = local->hw.wiphy->tx_queue_len; + dev_net_set(ndev, wiphy_net(local->hw.wiphy)); ndev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); -- cgit v1.2.3 From cee5f20fece32cd1722230cb05333f39db860698 Mon Sep 17 00:00:00 2001 From: Howard Chung Date: Fri, 14 Feb 2020 19:16:41 +0800 Subject: Bluetooth: secure bluetooth stack from bluedump attack Attack scenario: 1. A Chromebook (let's call this device A) is paired to a legitimate Bluetooth classic device (e.g. a speaker) (let's call this device B). 2. A malicious device (let's call this device C) pretends to be the Bluetooth speaker by using the same BT address. 3. If device A is not currently connected to device B, device A will be ready to accept connection from device B in the background (technically, doing Page Scan). 4. Therefore, device C can initiate connection to device A (because device A is doing Page Scan) and device A will accept the connection because device A trusts device C's address which is the same as device B's address. 5. Device C won't be able to communicate at any high level Bluetooth profile with device A because device A enforces that device C is encrypted with their common Link Key, which device C doesn't have. But device C can initiate pairing with device A with just-works model without requiring user interaction (there is only pairing notification). After pairing, device A now trusts device C with a new different link key, common between device A and C. 6. From now on, device A trusts device C, so device C can at anytime connect to device A to do any kind of high-level hijacking, e.g. speaker hijack or mouse/keyboard hijack. Since we don't know whether the repairing is legitimate or not, leave the decision to user space if all the conditions below are met. - the pairing is initialized by peer - the authorization method is just-work - host already had the link key to the peer Signed-off-by: Howard Chung Acked-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 10 ++++++++++ net/bluetooth/smp.c | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6ddc4a74a5e4..591e7477e925 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4557,6 +4557,16 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, goto confirm; } + /* If there already exists link key in local host, leave the + * decision to user space since the remote device could be + * legitimate or malicious. + */ + if (hci_find_link_key(hdev, &ev->bdaddr)) { + bt_dev_dbg(hdev, "Local host already has link key"); + confirm_hint = 1; + goto confirm; + } + BT_DBG("Auto-accept of user confirmation with %ums delay", hdev->auto_accept_delay); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 83449a88a182..50e0ac692ec4 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2168,6 +2168,25 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); + + /* Only Just-Works pairing requires extra checks */ + if (smp->method != JUST_WORKS) + goto mackey_and_ltk; + + /* If there already exists long term key in local host, leave + * the decision to user space since the remote device could + * be legitimate or malicious. + */ + if (hci_find_ltk(hcon->hdev, &hcon->dst, hcon->dst_type, + hcon->role)) { + err = mgmt_user_confirm_request(hcon->hdev, &hcon->dst, + hcon->type, + hcon->dst_type, + passkey, 1); + if (err) + return SMP_UNSPECIFIED; + set_bit(SMP_FLAG_WAIT_USER, &smp->flags); + } } mackey_and_ltk: -- cgit v1.2.3 From eab2404ba798a8efda2a970f44071c3406d94e57 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 14 Feb 2020 10:08:57 -0800 Subject: Bluetooth: Add BT_PHY socket option This adds BT_PHY socket option (read-only) which can be used to read the PHYs in use by the underline connection. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 17 ++++++ include/net/bluetooth/hci_core.h | 2 + net/bluetooth/hci_conn.c | 107 ++++++++++++++++++++++++++++++++++++++ net/bluetooth/l2cap_sock.c | 13 +++++ net/bluetooth/sco.c | 13 +++++ 5 files changed, 152 insertions(+) (limited to 'net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index e42bb8e03c09..1576353a2773 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -121,6 +121,23 @@ struct bt_voice { #define BT_SNDMTU 12 #define BT_RCVMTU 13 +#define BT_PHY 14 + +#define BT_PHY_BR_1M_1SLOT 0x00000001 +#define BT_PHY_BR_1M_3SLOT 0x00000002 +#define BT_PHY_BR_1M_5SLOT 0x00000004 +#define BT_PHY_EDR_2M_1SLOT 0x00000008 +#define BT_PHY_EDR_2M_3SLOT 0x00000010 +#define BT_PHY_EDR_2M_5SLOT 0x00000020 +#define BT_PHY_EDR_3M_1SLOT 0x00000040 +#define BT_PHY_EDR_3M_3SLOT 0x00000080 +#define BT_PHY_EDR_3M_5SLOT 0x00000100 +#define BT_PHY_LE_1M_TX 0x00000200 +#define BT_PHY_LE_1M_RX 0x00000400 +#define BT_PHY_LE_2M_TX 0x00000800 +#define BT_PHY_LE_2M_RX 0x00001000 +#define BT_PHY_LE_CODED_TX 0x00002000 +#define BT_PHY_LE_CODED_RX 0x00004000 __printf(1, 2) void bt_info(const char *fmt, ...); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 89ecf0a80aa1..dcc0dc6e2624 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1477,6 +1477,8 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode); struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u32 timeout); +u32 hci_conn_get_phy(struct hci_conn *conn); + /* ----- HCI Sockets ----- */ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 87691404d0c6..65fa44cbe514 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1725,3 +1725,110 @@ struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle) return hchan; } + +u32 hci_conn_get_phy(struct hci_conn *conn) +{ + u32 phys = 0; + + hci_dev_lock(conn->hdev); + + /* BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 2, Part B page 471: + * Table 6.2: Packets defined for synchronous, asynchronous, and + * CSB logical transport types. + */ + switch (conn->type) { + case SCO_LINK: + /* SCO logical transport (1 Mb/s): + * HV1, HV2, HV3 and DV. + */ + phys |= BT_PHY_BR_1M_1SLOT; + + break; + + case ACL_LINK: + /* ACL logical transport (1 Mb/s) ptt=0: + * DH1, DM3, DH3, DM5 and DH5. + */ + phys |= BT_PHY_BR_1M_1SLOT; + + if (conn->pkt_type & (HCI_DM3 | HCI_DH3)) + phys |= BT_PHY_BR_1M_3SLOT; + + if (conn->pkt_type & (HCI_DM5 | HCI_DH5)) + phys |= BT_PHY_BR_1M_5SLOT; + + /* ACL logical transport (2 Mb/s) ptt=1: + * 2-DH1, 2-DH3 and 2-DH5. + */ + if (!(conn->pkt_type & HCI_2DH1)) + phys |= BT_PHY_EDR_2M_1SLOT; + + if (!(conn->pkt_type & HCI_2DH3)) + phys |= BT_PHY_EDR_2M_3SLOT; + + if (!(conn->pkt_type & HCI_2DH5)) + phys |= BT_PHY_EDR_2M_5SLOT; + + /* ACL logical transport (3 Mb/s) ptt=1: + * 3-DH1, 3-DH3 and 3-DH5. + */ + if (!(conn->pkt_type & HCI_3DH1)) + phys |= BT_PHY_EDR_3M_1SLOT; + + if (!(conn->pkt_type & HCI_3DH3)) + phys |= BT_PHY_EDR_3M_3SLOT; + + if (!(conn->pkt_type & HCI_3DH5)) + phys |= BT_PHY_EDR_3M_5SLOT; + + break; + + case ESCO_LINK: + /* eSCO logical transport (1 Mb/s): EV3, EV4 and EV5 */ + phys |= BT_PHY_BR_1M_1SLOT; + + if (!(conn->pkt_type & (ESCO_EV4 | ESCO_EV5))) + phys |= BT_PHY_BR_1M_3SLOT; + + /* eSCO logical transport (2 Mb/s): 2-EV3, 2-EV5 */ + if (!(conn->pkt_type & ESCO_2EV3)) + phys |= BT_PHY_EDR_2M_1SLOT; + + if (!(conn->pkt_type & ESCO_2EV5)) + phys |= BT_PHY_EDR_2M_3SLOT; + + /* eSCO logical transport (3 Mb/s): 3-EV3, 3-EV5 */ + if (!(conn->pkt_type & ESCO_3EV3)) + phys |= BT_PHY_EDR_3M_1SLOT; + + if (!(conn->pkt_type & ESCO_3EV5)) + phys |= BT_PHY_EDR_3M_3SLOT; + + break; + + case LE_LINK: + if (conn->le_tx_phy & HCI_LE_SET_PHY_1M) + phys |= BT_PHY_LE_1M_TX; + + if (conn->le_rx_phy & HCI_LE_SET_PHY_1M) + phys |= BT_PHY_LE_1M_RX; + + if (conn->le_tx_phy & HCI_LE_SET_PHY_2M) + phys |= BT_PHY_LE_2M_TX; + + if (conn->le_rx_phy & HCI_LE_SET_PHY_2M) + phys |= BT_PHY_LE_2M_RX; + + if (conn->le_tx_phy & HCI_LE_SET_PHY_CODED) + phys |= BT_PHY_LE_CODED_TX; + + if (conn->le_rx_phy & HCI_LE_SET_PHY_CODED) + phys |= BT_PHY_LE_CODED_RX; + + break; + } + + hci_dev_unlock(conn->hdev); + + return phys; +} diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 390a9afab647..9fb47b2b13c9 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -499,6 +499,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct bt_security sec; struct bt_power pwr; + u32 phys; int len, err = 0; BT_DBG("sk %p", sk); @@ -603,6 +604,18 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, err = -EFAULT; break; + case BT_PHY: + if (sk->sk_state == BT_CONNECTED) { + err = -ENOTCONN; + break; + } + + phys = hci_conn_get_phy(chan->conn->hcon); + + if (put_user(phys, (u32 __user *) optval)) + err = -EFAULT; + break; + default: err = -ENOPROTOOPT; break; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index b91d6b440fdf..29ab3e12fb46 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -922,6 +922,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; int len, err = 0; struct bt_voice voice; + u32 phys; BT_DBG("sk %p", sk); @@ -956,6 +957,18 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, break; + case BT_PHY: + if (sk->sk_state == BT_CONNECTED) { + err = -ENOTCONN; + break; + } + + phys = hci_conn_get_phy(sco_pi(sk)->conn->hcon); + + if (put_user(phys, (u32 __user *) optval)) + err = -EFAULT; + break; + default: err = -ENOPROTOOPT; break; -- cgit v1.2.3 From edddb36644d56efc5833405bdf1c46f85677fc5c Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 12 Feb 2020 18:29:27 +0100 Subject: batman-adv: Start new development cycle Signed-off-by: Simon Wunderlich --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 692306df7b6f..2a234d0ad445 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2020.0" +#define BATADV_SOURCE_VERSION "2020.1" #endif /* B.A.T.M.A.N. parameters */ -- cgit v1.2.3 From 1e5946f5f7fe9267c71097a83615a6e5eb0f4cfd Mon Sep 17 00:00:00 2001 From: chenqiwu Date: Fri, 14 Feb 2020 17:18:26 +0800 Subject: net: x25: convert to list_for_each_entry_safe() Use list_for_each_entry_safe() instead of list_for_each_safe() to simplify the code. Signed-off-by: chenqiwu Signed-off-by: David S. Miller --- net/x25/x25_forward.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c index c82999941d3f..d48ad6d29197 100644 --- a/net/x25/x25_forward.c +++ b/net/x25/x25_forward.c @@ -131,13 +131,11 @@ out: void x25_clear_forward_by_lci(unsigned int lci) { - struct x25_forward *fwd; - struct list_head *entry, *tmp; + struct x25_forward *fwd, *tmp; write_lock_bh(&x25_forward_list_lock); - list_for_each_safe(entry, tmp, &x25_forward_list) { - fwd = list_entry(entry, struct x25_forward, node); + list_for_each_entry_safe(fwd, tmp, &x25_forward_list, node) { if (fwd->lci == lci) { list_del(&fwd->node); kfree(fwd); @@ -149,13 +147,11 @@ void x25_clear_forward_by_lci(unsigned int lci) void x25_clear_forward_by_dev(struct net_device *dev) { - struct x25_forward *fwd; - struct list_head *entry, *tmp; + struct x25_forward *fwd, *tmp; write_lock_bh(&x25_forward_list_lock); - list_for_each_safe(entry, tmp, &x25_forward_list) { - fwd = list_entry(entry, struct x25_forward, node); + list_for_each_entry_safe(fwd, tmp, &x25_forward_list, node) { if ((fwd->dev1 == dev) || (fwd->dev2 == dev)){ list_del(&fwd->node); kfree(fwd); -- cgit v1.2.3 From df12eb6d6cd920ab2f0e0a43cd6e1c23a05cea91 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Fri, 14 Feb 2020 12:48:01 +0100 Subject: net: virtio_vsock: Enhance connection semantics Whenever the vsock backend on the host sends a packet through the RX queue, it expects an answer on the TX queue. Unfortunately, there is one case where the host side will hang waiting for the answer and might effectively never recover if no timeout mechanism was implemented. This issue happens when the guest side starts binding to the socket, which insert a new bound socket into the list of already bound sockets. At this time, we expect the guest to also start listening, which will trigger the sk_state to move from TCP_CLOSE to TCP_LISTEN. The problem occurs if the host side queued a RX packet and triggered an interrupt right between the end of the binding process and the beginning of the listening process. In this specific case, the function processing the packet virtio_transport_recv_pkt() will find a bound socket, which means it will hit the switch statement checking for the sk_state, but the state won't be changed into TCP_LISTEN yet, which leads the code to pick the default statement. This default statement will only free the buffer, while it should also respond to the host side, by sending a packet on its TX queue. In order to simply fix this unfortunate chain of events, it is important that in case the default statement is entered, and because at this stage we know the host side is waiting for an answer, we must send back a packet containing the operation VIRTIO_VSOCK_OP_RST. One could say that a proper timeout mechanism on the host side will be enough to avoid the backend to hang. But the point of this patch is to ensure the normal use case will be provided with proper responsiveness when it comes to establishing the connection. Signed-off-by: Sebastien Boeuf Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index d9f0c9c5425a..2f696124bab6 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1153,6 +1153,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, virtio_transport_free_pkt(pkt); break; default: + (void)virtio_transport_reset_no_sock(t, pkt); virtio_transport_free_pkt(pkt); break; } -- cgit v1.2.3 From c8856c051454909e5059df4e81c77b9c366c5515 Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 14 Feb 2020 15:30:49 -0800 Subject: tcp-zerocopy: Return inq along with tcp receive zerocopy. This patchset is intended to reduce the number of extra system calls imposed by TCP receive zerocopy. For ping-pong RPC style workloads, this patchset has demonstrated a system call reduction of about 30% when coupled with userspace changes. For applications using edge-triggered epoll, returning inq along with the result of tcp receive zerocopy could remove the need to call recvmsg()=-EAGAIN after a successful zerocopy. Generally speaking, since normally we would need to perform a recvmsg() call for every successful small RPC read via TCP receive zerocopy, returning inq can reduce the number of system calls performed by approximately half. Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index fd9eb8f6bcae..548f480b9c66 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -345,5 +345,6 @@ struct tcp_zerocopy_receive { __u64 address; /* in: address of mapping */ __u32 length; /* in/out: number of bytes to map/mapped */ __u32 recv_skip_hint; /* out: amount of bytes to skip */ + __u32 inq; /* out: amount of bytes in read queue */ }; #endif /* _UAPI_LINUX_TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eb2d80519f8e..a697f1455f8d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3667,13 +3667,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (get_user(len, optlen)) return -EFAULT; - if (len != sizeof(zc)) + if (len < offsetofend(struct tcp_zerocopy_receive, length)) return -EINVAL; + if (len > sizeof(zc)) + len = sizeof(zc); if (copy_from_user(&zc, optval, len)) return -EFAULT; lock_sock(sk); err = tcp_zerocopy_receive(sk, &zc); release_sock(sk); + switch (len) { + case sizeof(zc): + case offsetofend(struct tcp_zerocopy_receive, inq): + goto zerocopy_rcv_inq; + case offsetofend(struct tcp_zerocopy_receive, length): + default: + goto zerocopy_rcv_out; + } +zerocopy_rcv_inq: + zc.inq = tcp_inq_hint(sk); +zerocopy_rcv_out: if (!err && copy_to_user(optval, &zc, len)) err = -EFAULT; return err; -- cgit v1.2.3 From 33946518d493cdf10aedb4a483f1aa41948a3dab Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 14 Feb 2020 15:30:50 -0800 Subject: tcp-zerocopy: Return sk_err (if set) along with tcp receive zerocopy. This patchset is intended to reduce the number of extra system calls imposed by TCP receive zerocopy. For ping-pong RPC style workloads, this patchset has demonstrated a system call reduction of about 30% when coupled with userspace changes. For applications using epoll, returning sk_err along with the result of tcp receive zerocopy could remove the need to call recvmsg()=-EAGAIN after a spurious wakeup. Consider a multi-threaded application using epoll. A thread may awaken with EPOLLIN but another thread may already be reading. The spuriously-awoken thread does not necessarily know that another thread 'won'; rather, it may be possible that it was woken up due to the presence of an error if there is no data. A zerocopy read receiving 0 bytes thus would need to be followed up by recvmsg to be sure. Instead, we return sk_err directly with zerocopy, so the application can avoid this extra system call. Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 548f480b9c66..1a7fc856e237 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -346,5 +346,6 @@ struct tcp_zerocopy_receive { __u32 length; /* in/out: number of bytes to map/mapped */ __u32 recv_skip_hint; /* out: amount of bytes to skip */ __u32 inq; /* out: amount of bytes in read queue */ + __s32 err; /* out: socket error */ }; #endif /* _UAPI_LINUX_TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a697f1455f8d..1b685485a5b5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3676,14 +3676,20 @@ static int do_tcp_getsockopt(struct sock *sk, int level, lock_sock(sk); err = tcp_zerocopy_receive(sk, &zc); release_sock(sk); + if (len == sizeof(zc)) + goto zerocopy_rcv_sk_err; switch (len) { - case sizeof(zc): + case offsetofend(struct tcp_zerocopy_receive, err): + goto zerocopy_rcv_sk_err; case offsetofend(struct tcp_zerocopy_receive, inq): goto zerocopy_rcv_inq; case offsetofend(struct tcp_zerocopy_receive, length): default: goto zerocopy_rcv_out; } +zerocopy_rcv_sk_err: + if (!err) + zc.err = sock_error(sk); zerocopy_rcv_inq: zc.inq = tcp_inq_hint(sk); zerocopy_rcv_out: -- cgit v1.2.3 From 744676e777207f4992ba4cc728a8a71352963c5b Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Sat, 15 Feb 2020 14:20:56 +0100 Subject: openvswitch: add TTL decrement action New action to decrement TTL instead of setting it to a fixed value. This action will decrement the TTL and, in case of expired TTL, drop it or execute an action passed via a nested attribute. The default TTL expired action is to drop the packet. Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively. Tested with a corresponding change in the userspace: # ovs-dpctl dump-flows in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1 in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},2 in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2 in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1 # ping -c1 192.168.0.2 -t 42 IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84) 192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64 # ping -c1 192.168.0.2 -t 120 IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84) 192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64 # ping -c1 192.168.0.2 -t 1 # Co-developed-by: Bindiya Kurle Signed-off-by: Bindiya Kurle Signed-off-by: Matteo Croce Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 7 ++++ net/openvswitch/actions.c | 67 ++++++++++++++++++++++++++++++++++++++ net/openvswitch/flow_netlink.c | 70 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+) (limited to 'net') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index ae2bff14e7e1..9b14519e74d9 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -958,6 +958,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */ OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ + OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ @@ -1050,4 +1051,10 @@ struct ovs_zone_limit { __u32 count; }; +enum ovs_dec_ttl_attr { + OVS_DEC_TTL_ATTR_UNSPEC, + OVS_DEC_TTL_ATTR_ACTION, /* Nested struct nlattr */ + __OVS_DEC_TTL_ATTR_MAX +}; + #endif /* _LINUX_OPENVSWITCH_H */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 7fbfe2adfffa..fc0efd8833c8 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -964,6 +964,25 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, return ovs_dp_upcall(dp, skb, key, &upcall, cutlen); } +static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, + const struct nlattr *attr, bool last) +{ + /* The first action is always 'OVS_DEC_TTL_ATTR_ARG'. */ + struct nlattr *dec_ttl_arg = nla_data(attr); + int rem = nla_len(attr); + + if (nla_len(dec_ttl_arg)) { + struct nlattr *actions = nla_next(dec_ttl_arg, &rem); + + if (actions) + return clone_execute(dp, skb, key, 0, actions, rem, + last, false); + } + consume_skb(skb); + return 0; +} + /* When 'last' is true, sample() should always consume the 'skb'. * Otherwise, sample() should keep 'skb' intact regardless what * actions are executed within sample(). @@ -1180,6 +1199,45 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb, nla_len(actions), last, clone_flow_key); } +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key) +{ + int err; + + if (skb->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *nh; + + err = skb_ensure_writable(skb, skb_network_offset(skb) + + sizeof(*nh)); + if (unlikely(err)) + return err; + + nh = ipv6_hdr(skb); + + if (nh->hop_limit <= 1) + return -EHOSTUNREACH; + + key->ip.ttl = --nh->hop_limit; + } else { + struct iphdr *nh; + u8 old_ttl; + + err = skb_ensure_writable(skb, skb_network_offset(skb) + + sizeof(*nh)); + if (unlikely(err)) + return err; + + nh = ip_hdr(skb); + if (nh->ttl <= 1) + return -EHOSTUNREACH; + + old_ttl = nh->ttl--; + csum_replace2(&nh->check, htons(old_ttl << 8), + htons(nh->ttl << 8)); + key->ip.ttl = nh->ttl; + } + return 0; +} + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, @@ -1365,6 +1423,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; } + + case OVS_ACTION_ATTR_DEC_TTL: + err = execute_dec_ttl(skb, key); + if (err == -EHOSTUNREACH) { + err = dec_ttl_exception_handler(dp, skb, key, + a, true); + return err; + } + break; } if (unlikely(err)) { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 7da4230627f5..1ccd5e092bca 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -80,6 +80,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_METER: case OVS_ACTION_ATTR_CHECK_PKT_LEN: case OVS_ACTION_ATTR_ADD_MPLS: + case OVS_ACTION_ATTR_DEC_TTL: default: return true; } @@ -2495,6 +2496,39 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, return 0; } +static int validate_and_copy_dec_ttl(struct net *net, + const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci, + u32 mpls_label_count, bool log) +{ + int start, err; + u32 nested = true; + + if (!nla_len(attr)) + return ovs_nla_add_action(sfa, OVS_ACTION_ATTR_DEC_TTL, + NULL, 0, log); + + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log); + if (start < 0) + return start; + + err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ACTION, &nested, + sizeof(nested), log); + + if (err) + return err; + + err = __ovs_nla_copy_actions(net, attr, key, sfa, eth_type, + vlan_tci, mpls_label_count, log); + if (err) + return err; + + add_nested_action_end(*sfa, start); + return 0; +} + static int validate_and_copy_clone(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, @@ -3007,6 +3041,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_CLONE] = (u32)-1, [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), + [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3267,6 +3302,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, break; } + case OVS_ACTION_ATTR_DEC_TTL: + err = validate_and_copy_dec_ttl(net, a, key, sfa, + eth_type, vlan_tci, + mpls_label_count, log); + if (err) + return err; + skip_copy = true; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; @@ -3438,6 +3482,26 @@ out: return err; } +static int dec_ttl_action_to_attr(const struct nlattr *attr, + struct sk_buff *skb) +{ + int err = 0, rem = nla_len(attr); + struct nlattr *start; + + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL); + + if (!start) + return -EMSGSIZE; + + err = ovs_nla_put_actions(nla_data(attr), rem, skb); + if (err) + nla_nest_cancel(skb, start); + else + nla_nest_end(skb, start); + + return err; +} + static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) { const struct nlattr *ovs_key = nla_data(a); @@ -3538,6 +3602,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) return err; break; + case OVS_ACTION_ATTR_DEC_TTL: + err = dec_ttl_action_to_attr(a, skb); + if (err) + return err; + break; + default: if (nla_put(skb, type, nla_len(a), nla_data(a))) return -EMSGSIZE; -- cgit v1.2.3 From 3b2582c7affde5f30bcc8321385507e66f4299e1 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 16 Feb 2020 18:30:12 +0100 Subject: batman-adv: Avoid RCU list-traversal in spinlock The new CONFIG_PROVE_RCU_LIST requires a condition statement in (h)list_for_each_entry_rcu when the code might be executed in a non RCU non-reader section with the writer lock. Otherwise lockdep might cause a false positive warning like ============================= WARNING: suspicious RCU usage ----------------------------- translation-table.c:940 RCU-list traversed in non-reader section!! batman-adv is (mostly) following the examples from the RCU documentation and is using the normal list-traversal primitives instead of the RCU list-traversal primitives when the writer (spin)lock is held. The remaining users of RCU list-traversal primitives with writer spinlock have to be converted to the same style as the rest of the code. Reported-by: Madhuparna Bhowmik Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 852932838ddc..a9635c882fe0 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -862,7 +862,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, u8 *tt_change_ptr; spin_lock_bh(&orig_node->vlan_list_lock); - hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { + hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { num_vlan++; num_entries += atomic_read(&vlan->tt.num_entries); } @@ -888,7 +888,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); - hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { + hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); @@ -937,7 +937,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, int change_offset; spin_lock_bh(&bat_priv->softif_vlan_list_lock); - hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + hlist_for_each_entry(vlan, &bat_priv->softif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) continue; @@ -967,7 +967,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); - hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + hlist_for_each_entry(vlan, &bat_priv->softif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) continue; -- cgit v1.2.3 From 5f27eb055d5c5814785fb9cf0ae4a4c150a8f334 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 17 Feb 2020 15:43:00 -0600 Subject: batman-adv: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Sven Eckelman Signed-off-by: Simon Wunderlich --- net/batman-adv/distributed-arp-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 3d21dd83f8cc..b85da4b7a77b 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -88,7 +88,7 @@ struct batadv_dhcp_packet { __u8 sname[64]; __u8 file[128]; __be32 magic; - __u8 options[0]; + __u8 options[]; }; #define BATADV_DHCP_YIADDR_LEN sizeof(((struct batadv_dhcp_packet *)0)->yiaddr) -- cgit v1.2.3 From 7a47281439ba00b11fc098f36695522184ce5a82 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 17 Feb 2020 12:12:09 +0200 Subject: net: sched: lock action when translating it to flow_action infra In order to remove dependency on rtnl lock, take action's tcfa_lock when constructing its representation as flow_action_entry structure. Refactor tcf_sample_get_group() to assume that caller holds tcf_lock and don't take it manually. This callback is only called from flow_action infra representation translator which now calls it with tcf_lock held, so this refactoring is necessary to prevent deadlock. Allocate memory with GFP_ATOMIC flag for ip_tunnel_info copy because tcf_tunnel_info_copy() is only called from flow_action representation infra code with tcf_lock spinlock taken. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/tc_act/tc_tunnel_key.h | 2 +- net/sched/act_sample.c | 2 -- net/sched/cls_api.c | 17 +++++++++++------ 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 0689d9bcdf84..2b3df076e5b6 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -69,7 +69,7 @@ tcf_tunnel_info_copy(const struct tc_action *a) if (tun) { size_t tun_size = sizeof(*tun) + tun->options_len; struct ip_tunnel_info *tun_copy = kmemdup(tun, tun_size, - GFP_KERNEL); + GFP_ATOMIC); return tun_copy; } diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index ce948c1e24dc..5e2df590bb58 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -267,14 +267,12 @@ tcf_sample_get_group(const struct tc_action *a, struct tcf_sample *s = to_sample(a); struct psample_group *group; - spin_lock_bh(&s->tcf_lock); group = rcu_dereference_protected(s->psample_group, lockdep_is_held(&s->tcf_lock)); if (group) { psample_group_take(group); *destructor = tcf_psample_group_put; } - spin_unlock_bh(&s->tcf_lock); return group; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c2cdd0fc2e70..610505117780 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3435,7 +3435,7 @@ static void tcf_sample_get_group(struct flow_action_entry *entry, int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts, bool rtnl_held) { - const struct tc_action *act; + struct tc_action *act; int i, j, k, err = 0; if (!exts) @@ -3449,6 +3449,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, struct flow_action_entry *entry; entry = &flow_action->entries[j]; + spin_lock_bh(&act->tcfa_lock); if (is_tcf_gact_ok(act)) { entry->id = FLOW_ACTION_ACCEPT; } else if (is_tcf_gact_shot(act)) { @@ -3489,13 +3490,13 @@ int tc_setup_flow_action(struct flow_action *flow_action, break; default: err = -EOPNOTSUPP; - goto err_out; + goto err_out_locked; } } else if (is_tcf_tunnel_set(act)) { entry->id = FLOW_ACTION_TUNNEL_ENCAP; err = tcf_tunnel_encap_get_tunnel(entry, act); if (err) - goto err_out; + goto err_out_locked; } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else if (is_tcf_pedit(act)) { @@ -3509,7 +3510,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, break; default: err = -EOPNOTSUPP; - goto err_out; + goto err_out_locked; } entry->mangle.htype = tcf_pedit_htype(act, k); entry->mangle.mask = tcf_pedit_mask(act, k); @@ -3560,15 +3561,16 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mpls_mangle.ttl = tcf_mpls_ttl(act); break; default: - goto err_out; + goto err_out_locked; } } else if (is_tcf_skbedit_ptype(act)) { entry->id = FLOW_ACTION_PTYPE; entry->ptype = tcf_skbedit_ptype(act); } else { err = -EOPNOTSUPP; - goto err_out; + goto err_out_locked; } + spin_unlock_bh(&act->tcfa_lock); if (!is_tcf_pedit(act)) j++; @@ -3582,6 +3584,9 @@ err_out: tc_cleanup_flow_action(flow_action); return err; +err_out_locked: + spin_unlock_bh(&act->tcfa_lock); + goto err_out; } EXPORT_SYMBOL(tc_setup_flow_action); -- cgit v1.2.3 From b15e7a6e8d31d6abe2d98929d60ad3a0e6ae4de1 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 17 Feb 2020 12:12:12 +0200 Subject: net: sched: don't take rtnl lock during flow_action setup Refactor tc_setup_flow_action() function not to use rtnl lock and remove 'rtnl_held' argument that is no longer needed. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- net/sched/cls_api.c | 8 +------- net/sched/cls_flower.c | 6 ++---- net/sched/cls_matchall.c | 4 ++-- 4 files changed, 6 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a972244ab193..53946b509b51 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -509,7 +509,7 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts, bool rtnl_held); + const struct tcf_exts *exts); void tc_cleanup_flow_action(struct flow_action *flow_action); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 610505117780..13c33eaf1ca1 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3433,7 +3433,7 @@ static void tcf_sample_get_group(struct flow_action_entry *entry, } int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts, bool rtnl_held) + const struct tcf_exts *exts) { struct tc_action *act; int i, j, k, err = 0; @@ -3441,9 +3441,6 @@ int tc_setup_flow_action(struct flow_action *flow_action, if (!exts) return 0; - if (!rtnl_held) - rtnl_lock(); - j = 0; tcf_exts_for_each_action(i, act, exts) { struct flow_action_entry *entry; @@ -3577,9 +3574,6 @@ int tc_setup_flow_action(struct flow_action *flow_action, } err_out: - if (!rtnl_held) - rtnl_unlock(); - if (err) tc_cleanup_flow_action(flow_action); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 7e54d2ab5254..726fc9c5910f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -449,8 +449,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, - rtnl_held); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); if (err) { kfree(cls_flower.rule); if (skip_sw) { @@ -2000,8 +1999,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.rule->match.mask = &f->mask->key; cls_flower.rule->match.key = &f->mkey; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, - true); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); if (err) { kfree(cls_flower.rule); if (tc_skip_sw(f->flags)) { diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 610a0b728161..a34b36adb9b7 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -97,7 +97,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.cookie = cookie; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); if (err) { kfree(cls_mall.rule); mall_destroy_hw_filter(tp, head, cookie, NULL); @@ -302,7 +302,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; cls_mall.cookie = (unsigned long)head; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); if (err) { kfree(cls_mall.rule); if (add && tc_skip_sw(head->flags)) { -- cgit v1.2.3 From bd706ff8ea2b6e2d3f21f0863b2fc42f860f8ba2 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 17 Feb 2020 13:27:58 +0100 Subject: net: vlan: suppress "failed to kill vid" warnings When a real dev unregisters, vlan_device_event() also unregisters all of its vlan interfaces. For each VID this ends up in __vlan_vid_del(), which attempts to remove the VID from the real dev's VLAN filter. But the unregistering real dev might no longer be able to issue the required IOs, and return an error. Subsequently we raise a noisy warning msg that is not appropriate for this situation: the real dev is being torn down anyway, there shouldn't be any worry about cleanly releasing all of its HW-internal resources. So to avoid scaring innocent users, suppress this warning when the failed deletion happens on an unregistering device. While at it also convert the raw pr_warn() to a more fitting netdev_warn(). Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index a313165e7a67..78ec2e1b14d1 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -359,9 +359,8 @@ static void __vlan_vid_del(struct vlan_info *vlan_info, int err; err = vlan_kill_rx_filter_info(dev, proto, vid); - if (err) - pr_warn("failed to kill vid %04x/%d for device %s\n", - proto, vid, dev->name); + if (err && dev->reg_state != NETREG_UNREGISTERING) + netdev_warn(dev, "failed to kill vid %04x/%d\n", proto, vid); list_del(&vid_info->list); kfree(vid_info); -- cgit v1.2.3 From 583cb0b4121f73e9e093dc3a14e419de6dab341e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 17 Feb 2020 14:45:01 +0100 Subject: net: bridge: teach ndo_dflt_bridge_getlink() more brport flags This enables ndo_dflt_bridge_getlink() to report a bridge port's offload settings for multicast and broadcast flooding. CC: Roopa Prabhu CC: Nikolay Aleksandrov Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 09c44bf2e1d2..9b4f8a254a15 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4555,7 +4555,11 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, brport_nla_put_flag(skb, flags, mask, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD) || brport_nla_put_flag(skb, flags, mask, - IFLA_BRPORT_PROXYARP, BR_PROXYARP)) { + IFLA_BRPORT_PROXYARP, BR_PROXYARP) || + brport_nla_put_flag(skb, flags, mask, + IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD) || + brport_nla_put_flag(skb, flags, mask, + IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD)) { nla_nest_cancel(skb, protinfo); goto nla_put_failure; } -- cgit v1.2.3 From 55dd5758175828bd03f4392b4df0d37edd31559d Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 17 Feb 2020 16:24:50 +0100 Subject: net/smc: improve smc_lgr_cleanup() smc_lgr_cleanup() is called during termination processing, there is no need to send a DELETE_LINK at that time. A DELETE_LINK should have been sent before the termination is initiated, if needed. And remove the extra call to wake_up(&lnk->wr_reg_wait) because smc_llc_link_inactive() already calls the related helper function smc_wr_wakeup_reg_wait(). Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2249de5379ee..8f3c1fced334 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -576,11 +576,8 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr) } else { struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; - wake_up(&lnk->wr_reg_wait); - if (lnk->state != SMC_LNK_INACTIVE) { - smc_link_send_delete(lnk, false); + if (lnk->state != SMC_LNK_INACTIVE) smc_llc_link_inactive(lnk); - } } } -- cgit v1.2.3 From 354ea2baa3936fcbfcb7ddf4ca3b6905389d4b25 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 17 Feb 2020 16:24:51 +0100 Subject: net/smc: use termination worker under send_lock smc_tx_rdma_write() is called under the send_lock and should not call smc_lgr_terminate() directly. Call smc_lgr_terminate_sched() instead which schedules a worker. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 0d42e7716b91..9f1ade86d70e 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -284,7 +284,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL); if (rc) - smc_lgr_terminate(lgr, true); + smc_lgr_terminate_sched(lgr); return rc; } -- cgit v1.2.3 From 3739707c4568f05842c8bf770285328067bd6679 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 17 Feb 2020 16:24:52 +0100 Subject: net/smc: do not delete lgr from list twice When 2 callers call smc_lgr_terminate() at the same time for the same lgr, one gets the lgr_lock and deletes the lgr from the list and releases the lock. Then the second caller gets the lock and tries to delete it again. In smc_lgr_terminate() add a check if the link group lgr is already deleted from the link group list and prevent to try to delete it a second time. And add a check if the lgr is marked as freeing, which means that a termination is already pending. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 8f3c1fced334..9b92b52952dd 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -629,7 +629,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr, bool soft) smc_lgr_list_head(lgr, &lgr_lock); spin_lock_bh(lgr_lock); - if (lgr->terminating) { + if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) { spin_unlock_bh(lgr_lock); return; /* lgr already terminating */ } -- cgit v1.2.3 From ba95206042099ad2d3a08c2b484431736c921904 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 17 Feb 2020 16:24:53 +0100 Subject: net/smc: remove unused parameter of smc_lgr_terminate() The soft parameter of smc_lgr_terminate() is not used and obsolete. Remove it. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_clc.c | 2 +- net/smc/smc_core.c | 18 ++++++++---------- net/smc/smc_core.h | 2 +- net/smc/smc_llc.c | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 86cccc24e52e..aee9ccfa99c2 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -349,7 +349,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); if (((struct smc_clc_msg_decline *)buf)->hdr.flag) { smc->conn.lgr->sync_err = 1; - smc_lgr_terminate(smc->conn.lgr, true); + smc_lgr_terminate(smc->conn.lgr); } } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 9b92b52952dd..53b6afbb1d93 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -229,7 +229,7 @@ static void smc_lgr_terminate_work(struct work_struct *work) struct smc_link_group *lgr = container_of(work, struct smc_link_group, terminate_work); - smc_lgr_terminate(lgr, true); + smc_lgr_terminate(lgr); } /* create a new SMC link group */ @@ -581,7 +581,10 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr) } } -/* terminate link group */ +/* terminate link group + * @soft: true if link group shutdown can take its time + * false if immediate link group shutdown is required + */ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) { struct smc_connection *conn; @@ -619,11 +622,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) smc_lgr_free(lgr); } -/* unlink and terminate link group - * @soft: true if link group shutdown can take its time - * false if immediate link group shutdown is required - */ -void smc_lgr_terminate(struct smc_link_group *lgr, bool soft) +/* unlink and terminate link group */ +void smc_lgr_terminate(struct smc_link_group *lgr) { spinlock_t *lgr_lock; @@ -633,11 +633,9 @@ void smc_lgr_terminate(struct smc_link_group *lgr, bool soft) spin_unlock_bh(lgr_lock); return; /* lgr already terminating */ } - if (!soft) - lgr->freeing = 1; list_del_init(&lgr->list); spin_unlock_bh(lgr_lock); - __smc_lgr_terminate(lgr, soft); + __smc_lgr_terminate(lgr, true); } /* Called when IB port is terminated */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index c472e12951d1..094d43c24345 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -296,7 +296,7 @@ struct smc_clc_msg_accept_confirm; struct smc_clc_msg_local; void smc_lgr_forget(struct smc_link_group *lgr); -void smc_lgr_terminate(struct smc_link_group *lgr, bool soft); +void smc_lgr_terminate(struct smc_link_group *lgr); void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index a9f6431dd69a..b134a08c929e 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -614,7 +614,7 @@ static void smc_llc_testlink_work(struct work_struct *work) rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, SMC_LLC_WAIT_TIME); if (rc <= 0) { - smc_lgr_terminate(smc_get_lgr(link), true); + smc_lgr_terminate(smc_get_lgr(link)); return; } next_interval = link->llc_testlink_time; -- cgit v1.2.3 From 5f78fe968d76902944534db85c4fb244dedc87f4 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 17 Feb 2020 16:24:54 +0100 Subject: net/smc: simplify normal link termination smc_lgr_terminate() and smc_lgr_terminate_sched() both result in soft link termination, smc_lgr_terminate_sched() is scheduling a worker for this task. Take out complexity by always using the termination worker and getting rid of smc_lgr_terminate() completely. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_clc.c | 2 +- net/smc/smc_core.c | 9 +++++---- net/smc/smc_core.h | 8 +------- net/smc/smc_llc.c | 2 +- 4 files changed, 8 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index aee9ccfa99c2..3e16b887cfcf 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -349,7 +349,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); if (((struct smc_clc_msg_decline *)buf)->hdr.flag) { smc->conn.lgr->sync_err = 1; - smc_lgr_terminate(smc->conn.lgr); + smc_lgr_terminate_sched(smc->conn.lgr); } } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 53b6afbb1d93..1bbce5531014 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -46,6 +46,7 @@ static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, struct smc_buf_desc *buf_desc); +static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); /* return head of link group list and its lock for a given link group */ static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, @@ -229,7 +230,7 @@ static void smc_lgr_terminate_work(struct work_struct *work) struct smc_link_group *lgr = container_of(work, struct smc_link_group, terminate_work); - smc_lgr_terminate(lgr); + __smc_lgr_terminate(lgr, true); } /* create a new SMC link group */ @@ -622,8 +623,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) smc_lgr_free(lgr); } -/* unlink and terminate link group */ -void smc_lgr_terminate(struct smc_link_group *lgr) +/* unlink link group and schedule termination */ +void smc_lgr_terminate_sched(struct smc_link_group *lgr) { spinlock_t *lgr_lock; @@ -635,7 +636,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr) } list_del_init(&lgr->list); spin_unlock_bh(lgr_lock); - __smc_lgr_terminate(lgr, true); + schedule_work(&lgr->terminate_work); } /* Called when IB port is terminated */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 094d43c24345..5695c7bc639e 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -285,18 +285,12 @@ static inline struct smc_connection *smc_lgr_find_conn( return res; } -static inline void smc_lgr_terminate_sched(struct smc_link_group *lgr) -{ - if (!lgr->terminating && !lgr->freeing) - schedule_work(&lgr->terminate_work); -} - struct smc_sock; struct smc_clc_msg_accept_confirm; struct smc_clc_msg_local; void smc_lgr_forget(struct smc_link_group *lgr); -void smc_lgr_terminate(struct smc_link_group *lgr); +void smc_lgr_terminate_sched(struct smc_link_group *lgr); void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index b134a08c929e..0e52aab53d97 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -614,7 +614,7 @@ static void smc_llc_testlink_work(struct work_struct *work) rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, SMC_LLC_WAIT_TIME); if (rc <= 0) { - smc_lgr_terminate(smc_get_lgr(link)); + smc_lgr_terminate_sched(smc_get_lgr(link)); return; } next_interval = link->llc_testlink_time; -- cgit v1.2.3 From 5613f20c938102093df2c67c7c22d7d0b0d3e3fb Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 17 Feb 2020 16:24:55 +0100 Subject: net/smc: reduce port_event scheduling IB event handlers schedule the port event worker for further processing of port state changes. This patch reduces the number of schedules to avoid duplicate processing of the same port change. Reviewed-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 548632621f4b..6756bd5a3fe4 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -257,6 +257,7 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, struct ib_event *ibevent) { struct smc_ib_device *smcibdev; + bool schedule = false; u8 port_idx; smcibdev = container_of(handler, struct smc_ib_device, event_handler); @@ -266,22 +267,35 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, /* terminate all ports on device */ for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) { set_bit(port_idx, &smcibdev->port_event_mask); - set_bit(port_idx, smcibdev->ports_going_away); + if (!test_and_set_bit(port_idx, + smcibdev->ports_going_away)) + schedule = true; } - schedule_work(&smcibdev->port_event_work); + if (schedule) + schedule_work(&smcibdev->port_event_work); break; - case IB_EVENT_PORT_ERR: case IB_EVENT_PORT_ACTIVE: - case IB_EVENT_GID_CHANGE: port_idx = ibevent->element.port_num - 1; - if (port_idx < SMC_MAX_PORTS) { - set_bit(port_idx, &smcibdev->port_event_mask); - if (ibevent->event == IB_EVENT_PORT_ERR) - set_bit(port_idx, smcibdev->ports_going_away); - else if (ibevent->event == IB_EVENT_PORT_ACTIVE) - clear_bit(port_idx, smcibdev->ports_going_away); + if (port_idx >= SMC_MAX_PORTS) + break; + set_bit(port_idx, &smcibdev->port_event_mask); + if (test_and_clear_bit(port_idx, smcibdev->ports_going_away)) + schedule_work(&smcibdev->port_event_work); + break; + case IB_EVENT_PORT_ERR: + port_idx = ibevent->element.port_num - 1; + if (port_idx >= SMC_MAX_PORTS) + break; + set_bit(port_idx, &smcibdev->port_event_mask); + if (!test_and_set_bit(port_idx, smcibdev->ports_going_away)) schedule_work(&smcibdev->port_event_work); - } + break; + case IB_EVENT_GID_CHANGE: + port_idx = ibevent->element.port_num - 1; + if (port_idx >= SMC_MAX_PORTS) + break; + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); break; default: break; @@ -316,11 +330,11 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) case IB_EVENT_QP_FATAL: case IB_EVENT_QP_ACCESS_ERR: port_idx = ibevent->element.qp->port - 1; - if (port_idx < SMC_MAX_PORTS) { - set_bit(port_idx, &smcibdev->port_event_mask); - set_bit(port_idx, smcibdev->ports_going_away); + if (port_idx >= SMC_MAX_PORTS) + break; + set_bit(port_idx, &smcibdev->port_event_mask); + if (!test_and_set_bit(port_idx, smcibdev->ports_going_away)) schedule_work(&smcibdev->port_event_work); - } break; default: break; -- cgit v1.2.3 From 9814428a44d63bf77831afdbb37b5f0aab7394c9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 17 Feb 2020 13:59:41 -0600 Subject: NFC: digital: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/nfc/digital_dep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c index 65aaa9d7c813..304b1a9bb18a 100644 --- a/net/nfc/digital_dep.c +++ b/net/nfc/digital_dep.c @@ -71,7 +71,7 @@ struct digital_atr_req { u8 bs; u8 br; u8 pp; - u8 gb[0]; + u8 gb[]; } __packed; struct digital_atr_res { @@ -83,7 +83,7 @@ struct digital_atr_res { u8 br; u8 to; u8 pp; - u8 gb[0]; + u8 gb[]; } __packed; struct digital_psl_req { -- cgit v1.2.3 From 45a4296b6e55ef2fbbe5946582822daf8f0f2e71 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 17 Feb 2020 14:01:11 -0600 Subject: bpf, sockmap: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/core/sock_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 085cef5857bb..3a7a96ab088a 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -518,7 +518,7 @@ struct bpf_htab_elem { u32 hash; struct sock *sk; struct hlist_node node; - u8 key[0]; + u8 key[]; }; struct bpf_htab_bucket { -- cgit v1.2.3 From fbfc8502af526578039dc89426224943d199c019 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 17 Feb 2020 14:02:36 -0600 Subject: net: switchdev: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 3a1d428c1336..60630762a748 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -29,7 +29,7 @@ struct switchdev_deferred_item { struct list_head list; struct net_device *dev; switchdev_deferred_func_t *func; - unsigned long data[0]; + unsigned long data[]; }; static struct switchdev_deferred_item *switchdev_deferred_dequeue(void) -- cgit v1.2.3 From 2b73812483e953ebff35d5624149d645df7a3022 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 17 Feb 2020 14:07:19 -0600 Subject: net: netlink: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4e31721e7293..bced11032681 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -71,7 +71,7 @@ struct listeners { struct rcu_head rcu; - unsigned long masks[0]; + unsigned long masks[]; }; /* state bits */ -- cgit v1.2.3 From 05bd80a10411c70b5cc5cf31e0a6d2fc054a7ff0 Mon Sep 17 00:00:00 2001 From: Sathish Narsimman Date: Mon, 17 Feb 2020 14:37:44 +0530 Subject: Bluetooth: Disable Extended Adv if enabled Disabling LEGACY_ADV when EXT_ADV is enabled causes 'command disallowed' during DIRECTED_ADV. This Patch fixes this issue. Signed-off-by: Sathish Narsimman Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 65fa44cbe514..a582c676e584 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1029,11 +1029,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, * anyway have to disable it in order to start directed * advertising. */ - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { - u8 enable = 0x00; - hci_req_add(&req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), - &enable); - } + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + __hci_req_disable_advertising(&req); /* If requested to connect as slave use directed advertising */ if (conn->role == HCI_ROLE_SLAVE) { -- cgit v1.2.3 From a2a8b0b4adeaec3de5213b7825588352a696df75 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 18 Feb 2020 10:33:20 -0800 Subject: Bluetooth: Fix crash when using new BT_PHY option This fixes the invalid check for connected socket which causes the following trace due to sco_pi(sk)->conn being NULL: RIP: 0010:sco_sock_getsockopt+0x2ff/0x800 net/bluetooth/sco.c:966 L2CAP has also been fixed since it has the same problem. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_sock.c | 2 +- net/bluetooth/sco.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 9fb47b2b13c9..305710446e66 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -605,7 +605,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, break; case BT_PHY: - if (sk->sk_state == BT_CONNECTED) { + if (sk->sk_state != BT_CONNECTED) { err = -ENOTCONN; break; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 29ab3e12fb46..c8c3d38cdc7b 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -958,7 +958,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, break; case BT_PHY: - if (sk->sk_state == BT_CONNECTED) { + if (sk->sk_state != BT_CONNECTED) { err = -ENOTCONN; break; } -- cgit v1.2.3 From 573ed90aa5e23b512168400ba6d65e592081944e Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 11 Feb 2020 14:32:42 -0800 Subject: devlink: Force enclosing array on binary fmsg data Add a new API for start/end binary array brackets [] to force array around binary data as required from JSON. With this restriction, re-open API to set binary fmsg data. Signed-off-by: Aya Levin Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/net/devlink.h | 5 +++ net/core/devlink.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 91 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/devlink.h b/include/net/devlink.h index ce5cea428fdc..149c108be66f 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -981,12 +981,17 @@ int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg); int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, const char *name); int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg); +int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name); +int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg); int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value); int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value); int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value); int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value); int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value); +int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len); int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, bool value); diff --git a/net/core/devlink.c b/net/core/devlink.c index 549ee56b7a21..216bdd25ce39 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4237,6 +4237,12 @@ struct devlink_fmsg_item { struct devlink_fmsg { struct list_head item_list; + bool putting_binary; /* This flag forces enclosing of binary data + * in an array brackets. It forces using + * of designated API: + * devlink_fmsg_binary_pair_nest_start() + * devlink_fmsg_binary_pair_nest_end() + */ }; static struct devlink_fmsg *devlink_fmsg_alloc(void) @@ -4280,17 +4286,26 @@ static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) { + if (fmsg->putting_binary) + return -EINVAL; + return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START); } EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start); static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg) { + if (fmsg->putting_binary) + return -EINVAL; + return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END); } int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) { + if (fmsg->putting_binary) + return -EINVAL; + return devlink_fmsg_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end); @@ -4301,6 +4316,9 @@ static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) { struct devlink_fmsg_item *item; + if (fmsg->putting_binary) + return -EINVAL; + if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE) return -EMSGSIZE; @@ -4321,6 +4339,9 @@ int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) { int err; + if (fmsg->putting_binary) + return -EINVAL; + err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START); if (err) return err; @@ -4335,6 +4356,9 @@ EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start); int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) { + if (fmsg->putting_binary) + return -EINVAL; + return devlink_fmsg_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end); @@ -4344,6 +4368,9 @@ int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, { int err; + if (fmsg->putting_binary) + return -EINVAL; + err = devlink_fmsg_pair_nest_start(fmsg, name); if (err) return err; @@ -4360,6 +4387,9 @@ int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) { int err; + if (fmsg->putting_binary) + return -EINVAL; + err = devlink_fmsg_nest_end(fmsg); if (err) return err; @@ -4372,6 +4402,30 @@ int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) } EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end); +int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name) +{ + int err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, name); + if (err) + return err; + + fmsg->putting_binary = true; + return err; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_start); + +int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg) +{ + if (!fmsg->putting_binary) + return -EINVAL; + + fmsg->putting_binary = false; + return devlink_fmsg_arr_pair_nest_end(fmsg); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_end); + static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, const void *value, u16 value_len, u8 value_nla_type) @@ -4396,40 +4450,59 @@ static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) { + if (fmsg->putting_binary) + return -EINVAL; + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); } EXPORT_SYMBOL_GPL(devlink_fmsg_bool_put); int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) { + if (fmsg->putting_binary) + return -EINVAL; + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); } EXPORT_SYMBOL_GPL(devlink_fmsg_u8_put); int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) { + if (fmsg->putting_binary) + return -EINVAL; + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32); } EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put); int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) { + if (fmsg->putting_binary) + return -EINVAL; + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); } EXPORT_SYMBOL_GPL(devlink_fmsg_u64_put); int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) { + if (fmsg->putting_binary) + return -EINVAL; + return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, NLA_NUL_STRING); } EXPORT_SYMBOL_GPL(devlink_fmsg_string_put); -static int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, - u16 value_len) +int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len) { + if (!fmsg->putting_binary) + return -EINVAL; + return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY); } +EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put); int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, bool value) @@ -4540,10 +4613,11 @@ int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, const void *value, u32 value_len) { u32 data_size; + int end_err; u32 offset; int err; - err = devlink_fmsg_arr_pair_nest_start(fmsg, name); + err = devlink_fmsg_binary_pair_nest_start(fmsg, name); if (err) return err; @@ -4553,14 +4627,18 @@ int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, data_size = DEVLINK_FMSG_MAX_SIZE; err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); if (err) - return err; + break; + /* Exit from loop with a break (instead of + * return) to make sure putting_binary is turned off in + * devlink_fmsg_binary_pair_nest_end + */ } - err = devlink_fmsg_arr_pair_nest_end(fmsg); - if (err) - return err; + end_err = devlink_fmsg_binary_pair_nest_end(fmsg); + if (end_err) + err = end_err; - return 0; + return err; } EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put); -- cgit v1.2.3 From f623e5970501449b475a8dc007f0fe24743ab9d3 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 11 Feb 2020 14:32:52 -0800 Subject: ethtool: Add support for low latency RS FEC Add support for low latency Reed Solomon FEC as LLRS. The LL-FEC is defined by the 25G/50G ethernet consortium, in the document titled "Low Latency Reed Solomon Forward Error Correction" Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha CC: Andrew Lunn Signed-off-by: Saeed Mahameed Reviewed-by: Andrew Lunn --- drivers/net/phy/phy-core.c | 2 +- include/uapi/linux/ethtool.h | 4 +++- net/ethtool/common.c | 1 + net/ethtool/linkmodes.c | 1 + 4 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index a4d2d59fceca..e083e7a76ada 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -8,7 +8,7 @@ const char *phy_speed_to_str(int speed) { - BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 74, + BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 75, "Enum ethtool_link_mode_bit_indices and phylib are out of sync. " "If a speed or mode has been added please update phy_speed_to_str " "and the PHY settings array.\n"); diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 4295ebfa2f91..d586ee5e10a1 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1330,6 +1330,7 @@ enum ethtool_fec_config_bits { ETHTOOL_FEC_OFF_BIT, ETHTOOL_FEC_RS_BIT, ETHTOOL_FEC_BASER_BIT, + ETHTOOL_FEC_LLRS_BIT, }; #define ETHTOOL_FEC_NONE (1 << ETHTOOL_FEC_NONE_BIT) @@ -1337,6 +1338,7 @@ enum ethtool_fec_config_bits { #define ETHTOOL_FEC_OFF (1 << ETHTOOL_FEC_OFF_BIT) #define ETHTOOL_FEC_RS (1 << ETHTOOL_FEC_RS_BIT) #define ETHTOOL_FEC_BASER (1 << ETHTOOL_FEC_BASER_BIT) +#define ETHTOOL_FEC_LLRS (1 << ETHTOOL_FEC_LLRS_BIT) /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* DEPRECATED, Get settings. @@ -1521,7 +1523,7 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT = 71, ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT = 72, ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT = 73, - + ETHTOOL_LINK_MODE_FEC_LLRS_BIT = 74, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS }; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 636ec6d5110e..7b6969af5ae7 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -168,6 +168,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(400000, LR8_ER8_FR8, Full), __DEFINE_LINK_MODE_NAME(400000, DR8, Full), __DEFINE_LINK_MODE_NAME(400000, CR8, Full), + __DEFINE_SPECIAL_MODE_NAME(FEC_LLRS, "LLRS"), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index 96f20be64553..f049b97072fe 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -237,6 +237,7 @@ static const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(400000, LR8_ER8_FR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, DR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, CR8, Full), + __DEFINE_SPECIAL_MODE_PARAMS(FEC_LLRS), }; static const struct nla_policy -- cgit v1.2.3 From a4c278d1bee1e2add3f12705401c1c5e6470f291 Mon Sep 17 00:00:00 2001 From: Huang Zijiang Date: Wed, 12 Feb 2020 17:54:36 +0800 Subject: xfrm: Use kmem_cache_zalloc() instead of kmem_cache_alloc() with flag GFP_ZERO. Use kmem_cache_zalloc instead of manually setting kmem_cache_alloc with flag GFP_ZERO since kzalloc sets allocated memory to zero. Change in v2: add indation Signed-off-by: Huang Zijiang Signed-off-by: Yi Wang Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 170d6e7f31d3..8be2d926acc2 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -612,7 +612,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) { struct xfrm_state *x; - x = kmem_cache_alloc(xfrm_state_cache, GFP_ATOMIC | __GFP_ZERO); + x = kmem_cache_zalloc(xfrm_state_cache, GFP_ATOMIC); if (x) { write_pnet(&x->xs_net, net); -- cgit v1.2.3 From dda520c4d4623701dd70cf7b40d29a4eed232d0f Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Wed, 19 Feb 2020 14:49:57 +0200 Subject: ESP: Export esp_output_fill_trailer function The esp fill trailer method is identical for both IPv6 and IPv4. Share the implementation for esp6 and esp to avoid code duplication in addition it could be also used at various drivers code. Signed-off-by: Raed Salem Reviewed-by: Boris Pismenny Reviewed-by: Saeed Mahameed Signed-off-by: Steffen Klassert --- include/net/esp.h | 16 ++++++++++++++++ net/ipv4/esp4.c | 16 ---------------- net/ipv6/esp6.c | 16 ---------------- 3 files changed, 16 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/include/net/esp.h b/include/net/esp.h index 117652eb6ea3..9c5637d41d95 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -11,6 +11,22 @@ static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb) return (struct ip_esp_hdr *)skb_transport_header(skb); } +static inline void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) +{ + /* Fill padding... */ + if (tfclen) { + memset(tail, 0, tfclen); + tail += tfclen; + } + do { + int i; + for (i = 0; i < plen - 2; i++) + tail[i] = i + 1; + } while (0); + tail[plen - 2] = plen - 2; + tail[plen - 1] = proto; +} + struct esp_info { struct ip_esp_hdr *esph; __be64 seqno; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 103c7d599a3c..8b07f3a4f2db 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -341,22 +341,6 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } -static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) -{ - /* Fill padding... */ - if (tfclen) { - memset(tail, 0, tfclen); - tail += tfclen; - } - do { - int i; - for (i = 0; i < plen - 2; i++) - tail[i] = i + 1; - } while (0); - tail[plen - 2] = plen - 2; - tail[plen - 1] = proto; -} - static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, int encap_type, struct esp_info *esp, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index a3b403ba8f8f..11143d039f16 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -207,22 +207,6 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } -static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) -{ - /* Fill padding... */ - if (tfclen) { - memset(tail, 0, tfclen); - tail += tfclen; - } - do { - int i; - for (i = 0; i < plen - 2; i++) - tail[i] = i + 1; - } while (0); - tail[plen - 2] = plen - 2; - tail[plen - 1] = proto; -} - int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; -- cgit v1.2.3 From 9cb8e048e5d93825ec5e8dfb5b8df4987ea25745 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 19 Feb 2020 13:02:53 +0100 Subject: net/ipv4/sysctl: show tcp_{allowed, available}_congestion_control in non-initial netns It is currenty possible to switch the TCP congestion control algorithm in non-initial network namespaces: unshare -U --map-root --net --fork --pid --mount-proc echo "reno" > /proc/sys/net/ipv4/tcp_congestion_control works just fine. But currently non-initial network namespaces have no way of kowing which congestion algorithms are available or allowed other than through trial and error by writing the names of the algorithms into the aforementioned file. Since we already allow changing the congestion algorithm in non-initial network namespaces by exposing the tcp_congestion_control file there is no reason to not also expose the tcp_{allowed,available}_congestion_control files to non-initial network namespaces. After this change a container with a separate network namespace will show: root@f1:~# ls -al /proc/sys/net/ipv4/tcp_* | grep congestion -rw-r--r-- 1 root root 0 Feb 19 11:54 /proc/sys/net/ipv4/tcp_allowed_congestion_control -r--r--r-- 1 root root 0 Feb 19 11:54 /proc/sys/net/ipv4/tcp_available_congestion_control -rw-r--r-- 1 root root 0 Feb 19 11:54 /proc/sys/net/ipv4/tcp_congestion_control Link: https://github.com/lxc/lxc/issues/3267 Reported-by: Haw Loeung Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 9684af02e0a5..d9531b4b33f2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -554,18 +554,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, #endif /* CONFIG_NETLABEL */ - { - .procname = "tcp_available_congestion_control", - .maxlen = TCP_CA_BUF_MAX, - .mode = 0444, - .proc_handler = proc_tcp_available_congestion_control, - }, - { - .procname = "tcp_allowed_congestion_control", - .maxlen = TCP_CA_BUF_MAX, - .mode = 0644, - .proc_handler = proc_allowed_congestion_control, - }, { .procname = "tcp_available_ulp", .maxlen = TCP_ULP_BUF_MAX, @@ -885,6 +873,18 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = TCP_CA_NAME_MAX, .proc_handler = proc_tcp_congestion_control, }, + { + .procname = "tcp_available_congestion_control", + .maxlen = TCP_CA_BUF_MAX, + .mode = 0444, + .proc_handler = proc_tcp_available_congestion_control, + }, + { + .procname = "tcp_allowed_congestion_control", + .maxlen = TCP_CA_BUF_MAX, + .mode = 0644, + .proc_handler = proc_allowed_congestion_control, + }, { .procname = "tcp_keepalive_time", .data = &init_net.ipv4.sysctl_tcp_keepalive_time, -- cgit v1.2.3 From 1c22d3cda8afa3fffa3875cbfa5c82e818a3f780 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 19 Feb 2020 21:31:55 -0800 Subject: Bluetooth: RFCOMM: Use MTU auto tune logic This reuse the L2CAP MTU auto logic to select the MTU used for RFCOMM channels, this should increase the maximum from 1013 to 1021 when 3-DH5 is supported. Since it does not set an L2CAP MTU we no longer need a debugfs so that is removed. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/rfcomm.h | 1 - net/bluetooth/rfcomm/core.c | 10 ++++------ 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index da4acefe39c8..8d65d2a0b9b4 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -34,7 +34,6 @@ #define RFCOMM_DEFAULT_MTU 127 #define RFCOMM_DEFAULT_CREDITS 7 -#define RFCOMM_MAX_L2CAP_MTU 1013 #define RFCOMM_MAX_CREDITS 40 #define RFCOMM_SKB_HEAD_RESERVE 8 diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index dcecce087b24..2e20af317cea 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -40,7 +40,6 @@ static bool disable_cfc; static bool l2cap_ertm; static int channel_mtu = -1; -static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU; static struct task_struct *rfcomm_thread; @@ -749,7 +748,8 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, /* Set L2CAP options */ sk = sock->sk; lock_sock(sk); - l2cap_pi(sk)->chan->imtu = l2cap_mtu; + /* Set MTU to 0 so L2CAP can auto select the MTU */ + l2cap_pi(sk)->chan->imtu = 0; l2cap_pi(sk)->chan->sec_level = sec_level; if (l2cap_ertm) l2cap_pi(sk)->chan->mode = L2CAP_MODE_ERTM; @@ -2036,7 +2036,8 @@ static int rfcomm_add_listener(bdaddr_t *ba) /* Set L2CAP options */ sk = sock->sk; lock_sock(sk); - l2cap_pi(sk)->chan->imtu = l2cap_mtu; + /* Set MTU to 0 so L2CAP can auto select the MTU */ + l2cap_pi(sk)->chan->imtu = 0; release_sock(sk); /* Start listening on the socket */ @@ -2234,9 +2235,6 @@ MODULE_PARM_DESC(disable_cfc, "Disable credit based flow control"); module_param(channel_mtu, int, 0644); MODULE_PARM_DESC(channel_mtu, "Default MTU for the RFCOMM channel"); -module_param(l2cap_mtu, uint, 0644); -MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection"); - module_param(l2cap_ertm, bool, 0644); MODULE_PARM_DESC(l2cap_ertm, "Use L2CAP ERTM mode for connection"); -- cgit v1.2.3 From eed467b517e8c6987e3f227758ff3e67c889e17b Mon Sep 17 00:00:00 2001 From: Howard Chung Date: Thu, 20 Feb 2020 11:17:29 +0800 Subject: Bluetooth: fix passkey uninitialized when used This patch fix the issue: warning:variable 'passkey' is uninitialized when used here Link: https://groups.google.com/forum/#!topic/clang-built-linux/kyRKCjRsGoU Fixes: cee5f20fece3 ("Bluetooth: secure bluetooth stack from bluedump attack") Reported-by: kbuild test robot Suggested-by: Marcel Holtmann Signed-off-by: Howard Chung Signed-off-by: Marcel Holtmann --- net/bluetooth/smp.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 50e0ac692ec4..1476a91ce935 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2115,7 +2115,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) struct l2cap_chan *chan = conn->smp; struct smp_chan *smp = chan->data; struct hci_conn *hcon = conn->hcon; - u8 *pkax, *pkbx, *na, *nb; + u8 *pkax, *pkbx, *na, *nb, confirm_hint; u32 passkey; int err; @@ -2179,13 +2179,12 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) */ if (hci_find_ltk(hcon->hdev, &hcon->dst, hcon->dst_type, hcon->role)) { - err = mgmt_user_confirm_request(hcon->hdev, &hcon->dst, - hcon->type, - hcon->dst_type, - passkey, 1); - if (err) - return SMP_UNSPECIFIED; - set_bit(SMP_FLAG_WAIT_USER, &smp->flags); + /* Set passkey to 0. The value can be any number since + * it'll be ignored anyway. + */ + passkey = 0; + confirm_hint = 1; + goto confirm; } } @@ -2207,8 +2206,11 @@ mackey_and_ltk: if (err) return SMP_UNSPECIFIED; + confirm_hint = 0; + +confirm: err = mgmt_user_confirm_request(hcon->hdev, &hcon->dst, hcon->type, - hcon->dst_type, passkey, 0); + hcon->dst_type, passkey, confirm_hint); if (err) return SMP_UNSPECIFIED; -- cgit v1.2.3 From 9410c9409d3e3a1ee6a02a830f9b6ab678c456d1 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 16 Feb 2020 12:01:21 +0200 Subject: net: sched: Introduce ingress classification function TC multi chain configuration can cause offloaded tc chains to miss in hardware after jumping to some chain. In such cases the software should continue from the chain that missed in hardware, as the hardware may have manipulated the packet and updated some counters. Currently a single tcf classification function serves both ingress and egress. However, multi chain miss processing (get tc skb extension on hw miss, set tc skb extension on tc miss) should happen only on ingress. Refactor the code to use ingress classification function, and move setting the tc skb extension from general classification to it, as a prestep for supporting the hw miss scenario. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/net/pkt_cls.h | 10 +++++++++ net/core/dev.c | 3 ++- net/sched/cls_api.c | 58 ++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 56 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 53946b509b51..109cbe3a0d51 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -72,6 +72,8 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); +int tcf_classify_ingress(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode); #else static inline bool tcf_block_shared(struct tcf_block *block) @@ -133,6 +135,14 @@ static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, { return TC_ACT_UNSPEC; } + +static inline int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ + return TC_ACT_UNSPEC; +} + #endif static inline unsigned long diff --git a/net/core/dev.c b/net/core/dev.c index a6316b336128..107af00e4932 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4860,7 +4860,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { + switch (tcf_classify_ingress(skb, miniq->filter_list, &cl_res, + false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 13c33eaf1ca1..d52b43c56d5d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1559,8 +1559,11 @@ static int tcf_block_setup(struct tcf_block *block, * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. */ -int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode) +static inline int __tcf_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res, + bool compat_mode, + u32 *last_executed_chain) { #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 4; @@ -1582,21 +1585,11 @@ reclassify: #ifdef CONFIG_NET_CLS_ACT if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) { first_tp = orig_tp; + *last_executed_chain = first_tp->chain->index; goto reset; } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { first_tp = res->goto_tp; - -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - { - struct tc_skb_ext *ext; - - ext = skb_ext_add(skb, TC_SKB_EXT); - if (WARN_ON_ONCE(!ext)) - return TC_ACT_SHOT; - - ext->chain = err & TC_ACT_EXT_VAL_MASK; - } -#endif + *last_executed_chain = err & TC_ACT_EXT_VAL_MASK; goto reset; } #endif @@ -1619,8 +1612,45 @@ reset: goto reclassify; #endif } + +int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ + u32 last_executed_chain = 0; + + return __tcf_classify(skb, tp, res, compat_mode, + &last_executed_chain); +} EXPORT_SYMBOL(tcf_classify); +int tcf_classify_ingress(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 last_executed_chain = 0; + + return __tcf_classify(skb, tp, res, compat_mode, + &last_executed_chain); +#else + u32 last_executed_chain = tp ? tp->chain->index : 0; + struct tc_skb_ext *ext; + int ret; + + ret = __tcf_classify(skb, tp, res, compat_mode, &last_executed_chain); + + /* If we missed on some chain */ + if (ret == TC_ACT_UNSPEC && last_executed_chain) { + ext = skb_ext_add(skb, TC_SKB_EXT); + if (WARN_ON_ONCE(!ext)) + return TC_ACT_SHOT; + ext->chain = last_executed_chain; + } + + return ret; +#endif +} +EXPORT_SYMBOL(tcf_classify_ingress); + struct tcf_chain_info { struct tcf_proto __rcu **pprev; struct tcf_proto __rcu *next; -- cgit v1.2.3 From 7d17c544cd304c15317e64ac77617bc774fb3f55 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 16 Feb 2020 12:01:22 +0200 Subject: net: sched: Pass ingress block to tcf_classify_ingress On ingress and cls_act qdiscs init, save the block on ingress mini_Qdisc and and pass it on to ingress classification, so it can be used for the looking up a specified chain index. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/net/pkt_cls.h | 7 +++++-- include/net/sch_generic.h | 3 +++ net/core/dev.c | 4 ++-- net/sched/cls_api.c | 4 +++- net/sched/sch_generic.c | 8 ++++++++ net/sched/sch_ingress.c | 11 ++++++++++- 6 files changed, 31 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 109cbe3a0d51..75be5c065dba 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -72,8 +72,10 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); -int tcf_classify_ingress(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode); +int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_block *ingress_block, + const struct tcf_proto *tp, struct tcf_result *res, + bool compat_mode); #else static inline bool tcf_block_shared(struct tcf_block *block) @@ -137,6 +139,7 @@ static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, } static inline int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_block *ingress_block, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 151208704ed2..bcdf98d21094 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1269,6 +1269,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, */ struct mini_Qdisc { struct tcf_proto *filter_list; + struct tcf_block *block; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; struct rcu_head rcu; @@ -1295,6 +1296,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, struct tcf_proto *tp_head); void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, struct mini_Qdisc __rcu **p_miniq); +void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, + struct tcf_block *block); static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res) { diff --git a/net/core/dev.c b/net/core/dev.c index 107af00e4932..4866e6198a29 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4860,8 +4860,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify_ingress(skb, miniq->filter_list, &cl_res, - false)) { + switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list, + &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d52b43c56d5d..bbd8b5e7b74b 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1623,7 +1623,9 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, } EXPORT_SYMBOL(tcf_classify); -int tcf_classify_ingress(struct sk_buff *skb, const struct tcf_proto *tp, +int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_block *ingress_block, + const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6c9595f1048a..2efd5b61acef 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1391,6 +1391,14 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, } EXPORT_SYMBOL(mini_qdisc_pair_swap); +void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, + struct tcf_block *block) +{ + miniqp->miniq1.block = block; + miniqp->miniq2.block = block; +} +EXPORT_SYMBOL(mini_qdisc_pair_block_init); + void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, struct mini_Qdisc __rcu **p_miniq) { diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index bf56aa519797..84838128b9c5 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -78,6 +78,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + int err; net_inc_ingress_queue(); @@ -87,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, q->block_info.chain_head_change = clsact_chain_head_change; q->block_info.chain_head_change_priv = &q->miniqp; - return tcf_block_get_ext(&q->block, sch, &q->block_info, extack); + err = tcf_block_get_ext(&q->block, sch, &q->block_info, extack); + if (err) + return err; + + mini_qdisc_pair_block_init(&q->miniqp, q->block); + + return 0; } static void ingress_destroy(struct Qdisc *sch) @@ -226,6 +233,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, if (err) return err; + mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block); + mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress); q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS; -- cgit v1.2.3 From 43719298193224c9a76c355de1622bd70242bc08 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 16 Feb 2020 12:01:23 +0200 Subject: net: sched: Change the block's chain list to an rcu list To allow lookup of a block's chain under atomic context. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- net/sched/cls_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index bbd8b5e7b74b..a634c85f1e0e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -354,7 +355,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (!chain) return NULL; - list_add_tail(&chain->list, &block->chain_list); + list_add_tail_rcu(&chain->list, &block->chain_list); mutex_init(&chain->filter_chain_lock); chain->block = block; chain->index = chain_index; @@ -394,7 +395,7 @@ static bool tcf_chain_detach(struct tcf_chain *chain) ASSERT_BLOCK_LOCKED(block); - list_del(&chain->list); + list_del_rcu(&chain->list); if (!chain->index) block->chain0.chain = NULL; -- cgit v1.2.3 From af699626ee268244423b3c6d43e4daaca40a3ed0 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 16 Feb 2020 12:01:24 +0200 Subject: net: sched: Support specifying a starting chain via tc skb ext Set the starting chain from the tc skb ext chain value. Once we read the tc skb ext, delete it, so cloned/redirect packets won't inherit it. In order to lookup a chain by the chain index on the ingress block at ingress classification, provide a lookup function. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- net/sched/cls_api.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a634c85f1e0e..e604ebec1282 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -454,6 +454,20 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block, return NULL; } +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block, + u32 chain_index) +{ + struct tcf_chain *chain; + + list_for_each_entry_rcu(chain, &block->chain_list, list) { + if (chain->index == chain_index) + return chain; + } + return NULL; +} +#endif + static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, u32 seq, u16 flags, int event, bool unicast); @@ -1562,13 +1576,13 @@ static int tcf_block_setup(struct tcf_block *block, */ static inline int __tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + const struct tcf_proto *orig_tp, struct tcf_result *res, bool compat_mode, u32 *last_executed_chain) { #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 4; - const struct tcf_proto *orig_tp = tp; const struct tcf_proto *first_tp; int limit = 0; @@ -1619,7 +1633,7 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, { u32 last_executed_chain = 0; - return __tcf_classify(skb, tp, res, compat_mode, + return __tcf_classify(skb, tp, tp, res, compat_mode, &last_executed_chain); } EXPORT_SYMBOL(tcf_classify); @@ -1632,14 +1646,31 @@ int tcf_classify_ingress(struct sk_buff *skb, #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) u32 last_executed_chain = 0; - return __tcf_classify(skb, tp, res, compat_mode, + return __tcf_classify(skb, tp, tp, res, compat_mode, &last_executed_chain); #else u32 last_executed_chain = tp ? tp->chain->index : 0; + const struct tcf_proto *orig_tp = tp; struct tc_skb_ext *ext; int ret; - ret = __tcf_classify(skb, tp, res, compat_mode, &last_executed_chain); + ext = skb_ext_find(skb, TC_SKB_EXT); + + if (ext && ext->chain) { + struct tcf_chain *fchain; + + fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain); + if (!fchain) + return TC_ACT_SHOT; + + /* Consume, so cloned/redirect skbs won't inherit ext */ + skb_ext_del(skb, TC_SKB_EXT); + + tp = rcu_dereference_bh(fchain->filter_chain); + } + + ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, + &last_executed_chain); /* If we missed on some chain */ if (ret == TC_ACT_UNSPEC && last_executed_chain) { -- cgit v1.2.3 From 94e512de3e4f039458eb2e9c066b7c67cfeea3de Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 20 Feb 2020 14:49:02 +0800 Subject: net: neigh: remove unused NEIGH_SYSCTL_MS_JIFFIES_ENTRY this macro is never used, so remove it Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/core/neighbour.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 789a73aa7bd8..5bf8d22a47ec 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3553,9 +3553,6 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) -#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \ - NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies) - #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) -- cgit v1.2.3 From 807ea87032c491260bbf5fe6f5c62ec9187d2e8f Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 20 Feb 2020 14:50:19 +0800 Subject: net: remove unused macro from fib_trie.c TNODE_KMALLOC_MAX and VERSION are not used, so remove them Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ff0c24371e33..f4c2ac445b3f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -35,9 +35,6 @@ * Paul E. McKenney * Patrick McHardy */ - -#define VERSION "0.409" - #include #include #include @@ -304,8 +301,6 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa) call_rcu(&fa->rcu, __alias_free_mem); } -#define TNODE_KMALLOC_MAX \ - ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *)) #define TNODE_VMALLOC_MAX \ ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *)) -- cgit v1.2.3 From 458de8a97f107e1b6120d608b93ae9e3de019a2e Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Thu, 20 Feb 2020 09:41:55 +0200 Subject: net: page_pool: API cleanup and comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Functions starting with __ usually indicate those which are exported, but should not be called directly. Update some of those declared in the API and make it more readable. page_pool_unmap_page() and page_pool_release_page() were doing exactly the same thing calling __page_pool_clean_page(). Let's rename __page_pool_clean_page() to page_pool_release_page() and export it in order to show up on perf logs and get rid of page_pool_unmap_page(). Finally rename __page_pool_put_page() to page_pool_put_page() since we can now directly call it from drivers and rename the existing page_pool_put_page() to page_pool_put_full_page() since they do the same thing but the latter is trying to sync the full DMA area. This patch also updates netsec, mvneta and stmmac drivers which use those functions. Suggested-by: Jonathan Lemon Acked-by: Toke Høiland-Jørgensen Acked-by: Jesper Dangaard Brouer Signed-off-by: Ilias Apalodimas Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 19 +++--- drivers/net/ethernet/socionext/netsec.c | 23 ++++---- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +- include/net/page_pool.h | 36 +++++------- net/core/page_pool.c | 70 ++++++++++++----------- net/core/xdp.c | 2 +- 6 files changed, 74 insertions(+), 80 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 8e1feb678cea..1c391f63a26f 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1956,7 +1956,7 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp, if (!data || !(rx_desc->buf_phys_addr)) continue; - page_pool_put_page(rxq->page_pool, data, false); + page_pool_put_full_page(rxq->page_pool, data, false); } if (xdp_rxq_info_is_reg(&rxq->xdp_rxq)) xdp_rxq_info_unreg(&rxq->xdp_rxq); @@ -2154,9 +2154,9 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, err = xdp_do_redirect(pp->dev, xdp, prog); if (err) { ret = MVNETA_XDP_DROPPED; - __page_pool_put_page(rxq->page_pool, - virt_to_head_page(xdp->data), - len, true); + page_pool_put_page(rxq->page_pool, + virt_to_head_page(xdp->data), len, + true); } else { ret = MVNETA_XDP_REDIR; stats->xdp_redirect++; @@ -2166,9 +2166,9 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, case XDP_TX: ret = mvneta_xdp_xmit_back(pp, xdp); if (ret != MVNETA_XDP_TX) - __page_pool_put_page(rxq->page_pool, - virt_to_head_page(xdp->data), - len, true); + page_pool_put_page(rxq->page_pool, + virt_to_head_page(xdp->data), len, + true); break; default: bpf_warn_invalid_xdp_action(act); @@ -2177,9 +2177,8 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, trace_xdp_exception(pp->dev, prog, act); /* fall through */ case XDP_DROP: - __page_pool_put_page(rxq->page_pool, - virt_to_head_page(xdp->data), - len, true); + page_pool_put_page(rxq->page_pool, + virt_to_head_page(xdp->data), len, true); ret = MVNETA_XDP_DROPPED; stats->xdp_drop++; break; diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 6266926fe054..58b9b7ce7195 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -896,9 +896,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog, case XDP_TX: ret = netsec_xdp_xmit_back(priv, xdp); if (ret != NETSEC_XDP_TX) - __page_pool_put_page(dring->page_pool, - virt_to_head_page(xdp->data), - len, true); + page_pool_put_page(dring->page_pool, + virt_to_head_page(xdp->data), len, + true); break; case XDP_REDIRECT: err = xdp_do_redirect(priv->ndev, xdp, prog); @@ -906,9 +906,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog, ret = NETSEC_XDP_REDIR; } else { ret = NETSEC_XDP_CONSUMED; - __page_pool_put_page(dring->page_pool, - virt_to_head_page(xdp->data), - len, true); + page_pool_put_page(dring->page_pool, + virt_to_head_page(xdp->data), len, + true); } break; default: @@ -919,9 +919,8 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog, /* fall through -- handle aborts by dropping packet */ case XDP_DROP: ret = NETSEC_XDP_CONSUMED; - __page_pool_put_page(dring->page_pool, - virt_to_head_page(xdp->data), - len, true); + page_pool_put_page(dring->page_pool, + virt_to_head_page(xdp->data), len, true); break; } @@ -1020,8 +1019,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) * cache state. Since we paid the allocation cost if * building an skb fails try to put the page into cache */ - __page_pool_put_page(dring->page_pool, page, - pkt_len, true); + page_pool_put_page(dring->page_pool, page, pkt_len, + true); netif_err(priv, drv, priv->ndev, "rx failed to build skb\n"); break; @@ -1195,7 +1194,7 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id) if (id == NETSEC_RING_RX) { struct page *page = virt_to_page(desc->addr); - page_pool_put_page(dring->page_pool, page, false); + page_pool_put_full_page(dring->page_pool, page, false); } else if (id == NETSEC_RING_TX) { dma_unmap_single(priv->dev, desc->dma_addr, desc->len, DMA_TO_DEVICE); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5836b21edd7e..37920b4da091 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1251,11 +1251,11 @@ static void stmmac_free_rx_buffer(struct stmmac_priv *priv, u32 queue, int i) struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; if (buf->page) - page_pool_put_page(rx_q->page_pool, buf->page, false); + page_pool_put_full_page(rx_q->page_pool, buf->page, false); buf->page = NULL; if (buf->sec_page) - page_pool_put_page(rx_q->page_pool, buf->sec_page, false); + page_pool_put_full_page(rx_q->page_pool, buf->sec_page, false); buf->sec_page = NULL; } diff --git a/include/net/page_pool.h b/include/net/page_pool.h index cfbed00ba7ee..81d7773f96cd 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -151,6 +151,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params); #ifdef CONFIG_PAGE_POOL void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); +void page_pool_release_page(struct page_pool *pool, struct page *page); #else static inline void page_pool_destroy(struct page_pool *pool) { @@ -160,41 +161,32 @@ static inline void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)) { } +static inline void page_pool_release_page(struct page_pool *pool, + struct page *page) +{ +} #endif -/* Never call this directly, use helpers below */ -void __page_pool_put_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, bool allow_direct); +void page_pool_put_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, bool allow_direct); -static inline void page_pool_put_page(struct page_pool *pool, - struct page *page, bool allow_direct) +/* Same as above but will try to sync the entire area pool->max_len */ +static inline void page_pool_put_full_page(struct page_pool *pool, + struct page *page, bool allow_direct) { /* When page_pool isn't compiled-in, net/core/xdp.c doesn't * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - __page_pool_put_page(pool, page, -1, allow_direct); + page_pool_put_page(pool, page, -1, allow_direct); #endif } -/* Very limited use-cases allow recycle direct */ + +/* Same as above but the caller must guarantee safe context. e.g NAPI */ static inline void page_pool_recycle_direct(struct page_pool *pool, struct page *page) { - __page_pool_put_page(pool, page, -1, true); -} - -/* Disconnects a page (from a page_pool). API users can have a need - * to disconnect a page (from a page_pool), to allow it to be used as - * a regular page (that will eventually be returned to the normal - * page-allocator via put_page). - */ -void page_pool_unmap_page(struct page_pool *pool, struct page *page); -static inline void page_pool_release_page(struct page_pool *pool, - struct page *page) -{ -#ifdef CONFIG_PAGE_POOL - page_pool_unmap_page(pool, page); -#endif + page_pool_put_full_page(pool, page, true); } static inline dma_addr_t page_pool_get_dma_addr(struct page *page) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 10d2b255df5e..626db912fce4 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -96,7 +96,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) } EXPORT_SYMBOL(page_pool_create); -static void __page_pool_return_page(struct page_pool *pool, struct page *page); +static void page_pool_return_page(struct page_pool *pool, struct page *page); noinline static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) @@ -136,7 +136,7 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) * (2) break out to fallthrough to alloc_pages_node. * This limit stress on page buddy alloactor. */ - __page_pool_return_page(pool, page); + page_pool_return_page(pool, page); page = NULL; break; } @@ -274,18 +274,25 @@ static s32 page_pool_inflight(struct page_pool *pool) return inflight; } -/* Cleanup page_pool state from page */ -static void __page_pool_clean_page(struct page_pool *pool, - struct page *page) +/* Disconnects a page (from a page_pool). API users can have a need + * to disconnect a page (from a page_pool), to allow it to be used as + * a regular page (that will eventually be returned to the normal + * page-allocator via put_page). + */ +void page_pool_release_page(struct page_pool *pool, struct page *page) { dma_addr_t dma; int count; if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + /* Always account for inflight pages, even if we didn't + * map them + */ goto skip_dma_unmap; dma = page->dma_addr; - /* DMA unmap */ + + /* When page is unmapped, it cannot be returned our pool */ dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC); @@ -297,21 +304,12 @@ skip_dma_unmap: count = atomic_inc_return(&pool->pages_state_release_cnt); trace_page_pool_state_release(pool, page, count); } - -/* unmap the page and clean our state */ -void page_pool_unmap_page(struct page_pool *pool, struct page *page) -{ - /* When page is unmapped, this implies page will not be - * returned to page_pool. - */ - __page_pool_clean_page(pool, page); -} -EXPORT_SYMBOL(page_pool_unmap_page); +EXPORT_SYMBOL(page_pool_release_page); /* Return a page to the page allocator, cleaning up our state */ -static void __page_pool_return_page(struct page_pool *pool, struct page *page) +static void page_pool_return_page(struct page_pool *pool, struct page *page) { - __page_pool_clean_page(pool, page); + page_pool_release_page(pool, page); put_page(page); /* An optimization would be to call __free_pages(page, pool->p.order) @@ -320,8 +318,7 @@ static void __page_pool_return_page(struct page_pool *pool, struct page *page) */ } -static bool __page_pool_recycle_into_ring(struct page_pool *pool, - struct page *page) +static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) { int ret; /* BH protection not needed if current is serving softirq */ @@ -338,7 +335,7 @@ static bool __page_pool_recycle_into_ring(struct page_pool *pool, * * Caller must provide appropriate safe context. */ -static bool __page_pool_recycle_direct(struct page *page, +static bool page_pool_recycle_in_cache(struct page *page, struct page_pool *pool) { if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) @@ -357,8 +354,14 @@ static bool pool_page_reusable(struct page_pool *pool, struct page *page) return !page_is_pfmemalloc(page); } -void __page_pool_put_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, bool allow_direct) +/* If the page refcnt == 1, this will try to recycle the page. + * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for + * the configured size min(dma_sync_size, pool->max_len). + * If the page refcnt != 1, then the page will be returned to memory + * subsystem. + */ +void page_pool_put_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, bool allow_direct) { /* This allocator is optimized for the XDP mode that uses * one-frame-per-page, but have fallbacks that act like the @@ -375,12 +378,12 @@ void __page_pool_put_page(struct page_pool *pool, struct page *page, dma_sync_size); if (allow_direct && in_serving_softirq()) - if (__page_pool_recycle_direct(page, pool)) + if (page_pool_recycle_in_cache(page, pool)) return; - if (!__page_pool_recycle_into_ring(pool, page)) { + if (!page_pool_recycle_in_ring(pool, page)) { /* Cache full, fallback to free pages */ - __page_pool_return_page(pool, page); + page_pool_return_page(pool, page); } return; } @@ -397,12 +400,13 @@ void __page_pool_put_page(struct page_pool *pool, struct page *page, * doing refcnt based recycle tricks, meaning another process * will be invoking put_page. */ - __page_pool_clean_page(pool, page); + /* Do not replace this with page_pool_return_page() */ + page_pool_release_page(pool, page); put_page(page); } -EXPORT_SYMBOL(__page_pool_put_page); +EXPORT_SYMBOL(page_pool_put_page); -static void __page_pool_empty_ring(struct page_pool *pool) +static void page_pool_empty_ring(struct page_pool *pool) { struct page *page; @@ -413,7 +417,7 @@ static void __page_pool_empty_ring(struct page_pool *pool) pr_crit("%s() page_pool refcnt %d violation\n", __func__, page_ref_count(page)); - __page_pool_return_page(pool, page); + page_pool_return_page(pool, page); } } @@ -443,7 +447,7 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool) */ while (pool->alloc.count) { page = pool->alloc.cache[--pool->alloc.count]; - __page_pool_return_page(pool, page); + page_pool_return_page(pool, page); } } @@ -455,7 +459,7 @@ static void page_pool_scrub(struct page_pool *pool) /* No more consumers should exist, but producers could still * be in-flight. */ - __page_pool_empty_ring(pool); + page_pool_empty_ring(pool); } static int page_pool_release(struct page_pool *pool) @@ -529,7 +533,7 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid) /* Flush pool alloc cache, as refill will check NUMA node */ while (pool->alloc.count) { page = pool->alloc.cache[--pool->alloc.count]; - __page_pool_return_page(pool, page); + page_pool_return_page(pool, page); } } EXPORT_SYMBOL(page_pool_update_nid); diff --git a/net/core/xdp.c b/net/core/xdp.c index 8310714c47fd..4c7ea85486af 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -372,7 +372,7 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); page = virt_to_head_page(data); napi_direct &= !xdp_return_frame_no_direct(); - page_pool_put_page(xa->page_pool, page, napi_direct); + page_pool_put_full_page(xa->page_pool, page, napi_direct); rcu_read_unlock(); break; case MEM_TYPE_PAGE_SHARED: -- cgit v1.2.3 From 2e92a2d0e450740ebe7e7a816162327ad1fde94b Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 20 Feb 2020 09:00:07 +0100 Subject: net: use netif_is_bridge_port() to check for IFF_BRIDGE_PORT Trivial cleanup, so that all bridge port-specific code can be found in one go. CC: Johannes Berg CC: Roopa Prabhu CC: Nikolay Aleksandrov Signed-off-by: Julian Wiedmann Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/ethernet/micrel/ksz884x.c | 2 +- net/core/rtnetlink.c | 12 ++++++------ net/wireless/nl80211.c | 2 +- net/wireless/util.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 48d5ec770b94..c3c524f77fcd 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1265,7 +1265,7 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) skb->dev = bond->dev; if (BOND_MODE(bond) == BOND_MODE_ALB && - bond->dev->priv_flags & IFF_BRIDGE_PORT && + netif_is_bridge_port(bond->dev) && skb->pkt_type == PACKET_HOST) { if (unlikely(skb_cow_head(skb, diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index d1444ba36e10..4fe6aedca22f 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -5694,7 +5694,7 @@ static void dev_set_promiscuous(struct net_device *dev, struct dev_priv *priv, * from the bridge. */ if ((hw->features & STP_SUPPORT) && !promiscuous && - (dev->priv_flags & IFF_BRIDGE_PORT)) { + netif_is_bridge_port(dev)) { struct ksz_switch *sw = hw->ksz_switch; int port = priv->port.first_port; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9b4f8a254a15..6e35742969e6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3911,7 +3911,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, /* Support fdb on master device the net/bridge default case */ if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && - (dev->priv_flags & IFF_BRIDGE_PORT)) { + netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops = br_dev->netdev_ops; @@ -4022,7 +4022,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, /* Support fdb on master device the net/bridge default case */ if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && - (dev->priv_flags & IFF_BRIDGE_PORT)) { + netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops = br_dev->netdev_ops; @@ -4248,13 +4248,13 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; if (!br_idx) { /* user did not specify a specific bridge */ - if (dev->priv_flags & IFF_BRIDGE_PORT) { + if (netif_is_bridge_port(dev)) { br_dev = netdev_master_upper_dev_get(dev); cops = br_dev->netdev_ops; } } else { if (dev != br_dev && - !(dev->priv_flags & IFF_BRIDGE_PORT)) + !netif_is_bridge_port(dev)) continue; if (br_dev != netdev_master_upper_dev_get(dev) && @@ -4266,7 +4266,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; - if (dev->priv_flags & IFF_BRIDGE_PORT) { + if (netif_is_bridge_port(dev)) { if (cops && cops->ndo_fdb_dump) { err = cops->ndo_fdb_dump(skb, cb, br_dev, dev, @@ -4416,7 +4416,7 @@ static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (dev) { if (!ndm_flags || (ndm_flags & NTF_MASTER)) { - if (!(dev->priv_flags & IFF_BRIDGE_PORT)) { + if (!netif_is_bridge_port(dev)) { NL_SET_ERR_MSG(extack, "Device is not a bridge port"); return -EINVAL; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f0112dabe21e..8c2a246099ef 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3531,7 +3531,7 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { if (!use_4addr) { - if (netdev && (netdev->priv_flags & IFF_BRIDGE_PORT)) + if (netdev && netif_is_bridge_port(netdev)) return -EBUSY; return 0; } diff --git a/net/wireless/util.c b/net/wireless/util.c index 8481e9ac33da..80fb47c43bdd 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -934,7 +934,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EOPNOTSUPP; /* if it's part of a bridge, reject changing type to station/ibss */ - if ((dev->priv_flags & IFF_BRIDGE_PORT) && + if (netif_is_bridge_port(dev) && (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION || ntype == NL80211_IFTYPE_P2P_CLIENT)) -- cgit v1.2.3 From 3ee9306b353bdd0c0cd7cd3a549233e1ad44b608 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Wed, 19 Feb 2020 14:41:04 +0530 Subject: cfg80211: Pass lockdep expression to RCU lists rdev->sched_scan_req_list maybe traversed using list_for_each_entry_rcu outside an RCU read-side critical section but under the protection of rtnl_mutex. Hence, add corresponding lockdep expression to silence false-positive warnings, and harden RCU lists. Signed-off-by: Amol Grover Link: https://lore.kernel.org/r/20200219091102.10709-1-frextrite@gmail.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index aef240fdf8df..7f1af8f347b1 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -556,9 +556,8 @@ cfg80211_find_sched_scan_req(struct cfg80211_registered_device *rdev, u64 reqid) { struct cfg80211_sched_scan_request *pos; - WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); - - list_for_each_entry_rcu(pos, &rdev->sched_scan_req_list, list) { + list_for_each_entry_rcu(pos, &rdev->sched_scan_req_list, list, + lockdep_rtnl_is_held()) { if (pos->reqid == reqid) return pos; } -- cgit v1.2.3 From ca98c47d54d76d2c3f59f8f66530362562914b6b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Feb 2020 10:45:45 +0100 Subject: mac80211: check vif pointer before airtime calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case of monitor mode injection, vif may be NULL, don't crash on that in ieee80211_calc_expected_tx_airtime(). Signed-off-by: Johannes Berg Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20200221104544.dddb7a3568fd.I0ede2733a3c76e95daeab07538449ea847e7b78d@changeid Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 38f20a370f2a..8dd93072f6e6 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation * * Transmit and frame generation functions. */ @@ -3682,7 +3682,8 @@ begin: encap_out: IEEE80211_SKB_CB(skb)->control.vif = vif; - if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { + if (vif && + wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { u32 airtime; airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta, -- cgit v1.2.3 From 7bb106eb689101dc56cc205d5fb4ecf5c0554348 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 14 Feb 2020 23:23:38 +0100 Subject: cfg80211: remove support for adjacent channel compensation The only driver that used that was iwlwifi and it removed support for this. Remove the feature here as well. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200214232336.a530de38e511.I393bc395f6037c8cca6421ed550e3072dc248aed@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 ------- net/mac80211/scan.c | 3 +-- net/wireless/scan.c | 6 ++---- 3 files changed, 3 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7709b92f5b6b..7ea23caa7301 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4577,12 +4577,6 @@ struct wiphy_iftype_akm_suites { * and probe responses. This value should be set if the driver * wishes to limit the number of csa counters. Default (0) means * infinite. - * @max_adj_channel_rssi_comp: max offset of between the channel on which the - * frame was sent and the channel on which the frame was heard for which - * the reported rssi is still valid. If a driver is able to compensate the - * low rssi when a frame is heard on different channel, then it should set - * this variable to the maximal offset for which it can compensate. - * This value should be set in MHz. * @bss_select_support: bitmask indicating the BSS selection criteria supported * by the driver in the .connect() callback. The bit position maps to the * attribute indices defined in &enum nl80211_bss_select_attr. @@ -4734,7 +4728,6 @@ struct wiphy { u16 max_ap_assoc_sta; u8 max_num_csa_counters; - u8 max_adj_channel_rssi_comp; u32 bss_select_support; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 4d31d9688dc2..fdac8192a519 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -201,8 +201,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, mgmt->bssid, cbss->bssid); /* In case the signal is invalid update the status */ - signal_valid = abs(channel->center_freq - cbss->channel->center_freq) - <= local->hw.wiphy->max_adj_channel_rssi_comp; + signal_valid = channel == cbss->channel; if (!signal_valid) rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 7f1af8f347b1..dd41e41f9d26 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1433,8 +1433,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, } rcu_assign_pointer(tmp.pub.ies, ies); - signal_valid = abs(data->chan->center_freq - channel->center_freq) <= - wiphy->max_adj_channel_rssi_comp; + signal_valid = data->chan == channel; res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid, ts); if (!res) return NULL; @@ -1851,8 +1850,7 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS); ether_addr_copy(tmp.parent_bssid, data->parent_bssid); - signal_valid = abs(data->chan->center_freq - channel->center_freq) <= - wiphy->max_adj_channel_rssi_comp; + signal_valid = data->chan == channel; res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid, jiffies); if (!res) -- cgit v1.2.3 From 0c2204a4ad710d95d348ea006f14ba926e842ffd Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Feb 2020 20:43:26 +0530 Subject: net: qrtr: Migrate nameservice to kernel from userspace The QRTR nameservice has been maintained in userspace for some time. This commit migrates it to Linux kernel. This change is required in order to eliminate the need of starting a userspace daemon for making the WiFi functional for ath11k based devices. Since the QRTR NS is not usually packed in most of the distros, users need to clone, build and install it to get the WiFi working. It will become a hassle when the user doesn't have any other source of network connectivity. Signed-off-by: Manivannan Sadhasivam Signed-off-by: David S. Miller --- net/qrtr/Makefile | 2 +- net/qrtr/ns.c | 751 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/qrtr/qrtr.c | 48 +--- net/qrtr/qrtr.h | 4 + 4 files changed, 766 insertions(+), 39 deletions(-) create mode 100644 net/qrtr/ns.c (limited to 'net') diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile index 1c6d6c120fb7..32d4e923925d 100644 --- a/net/qrtr/Makefile +++ b/net/qrtr/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_QRTR) := qrtr.o +obj-$(CONFIG_QRTR) := qrtr.o ns.o obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o qrtr-smd-y := smd.o diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c new file mode 100644 index 000000000000..67a4e59cdf4d --- /dev/null +++ b/net/qrtr/ns.c @@ -0,0 +1,751 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* + * Copyright (c) 2015, Sony Mobile Communications Inc. + * Copyright (c) 2013, The Linux Foundation. All rights reserved. + * Copyright (c) 2020, Linaro Ltd. + */ + +#include +#include +#include +#include + +#include "qrtr.h" + +static RADIX_TREE(nodes, GFP_KERNEL); + +static struct { + struct socket *sock; + struct sockaddr_qrtr bcast_sq; + struct list_head lookups; + struct workqueue_struct *workqueue; + struct work_struct work; + int local_node; +} qrtr_ns; + +static const char * const qrtr_ctrl_pkt_strings[] = { + [QRTR_TYPE_HELLO] = "hello", + [QRTR_TYPE_BYE] = "bye", + [QRTR_TYPE_NEW_SERVER] = "new-server", + [QRTR_TYPE_DEL_SERVER] = "del-server", + [QRTR_TYPE_DEL_CLIENT] = "del-client", + [QRTR_TYPE_RESUME_TX] = "resume-tx", + [QRTR_TYPE_EXIT] = "exit", + [QRTR_TYPE_PING] = "ping", + [QRTR_TYPE_NEW_LOOKUP] = "new-lookup", + [QRTR_TYPE_DEL_LOOKUP] = "del-lookup", +}; + +struct qrtr_server_filter { + unsigned int service; + unsigned int instance; + unsigned int ifilter; +}; + +struct qrtr_lookup { + unsigned int service; + unsigned int instance; + + struct sockaddr_qrtr sq; + struct list_head li; +}; + +struct qrtr_server { + unsigned int service; + unsigned int instance; + + unsigned int node; + unsigned int port; + + struct list_head qli; +}; + +struct qrtr_node { + unsigned int id; + struct radix_tree_root servers; +}; + +static struct qrtr_node *node_get(unsigned int node_id) +{ + struct qrtr_node *node; + + node = radix_tree_lookup(&nodes, node_id); + if (node) + return node; + + /* If node didn't exist, allocate and insert it to the tree */ + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->id = node_id; + + radix_tree_insert(&nodes, node_id, node); + + return node; +} + +static int server_match(const struct qrtr_server *srv, + const struct qrtr_server_filter *f) +{ + unsigned int ifilter = f->ifilter; + + if (f->service != 0 && srv->service != f->service) + return 0; + if (!ifilter && f->instance) + ifilter = ~0; + + return (srv->instance & ifilter) == f->instance; +} + +static int service_announce_new(struct sockaddr_qrtr *dest, + struct qrtr_server *srv) +{ + struct qrtr_ctrl_pkt pkt; + struct msghdr msg = { }; + struct kvec iv; + + trace_printk("advertising new server [%d:%x]@[%d:%d]\n", + srv->service, srv->instance, srv->node, srv->port); + + iv.iov_base = &pkt; + iv.iov_len = sizeof(pkt); + + memset(&pkt, 0, sizeof(pkt)); + pkt.cmd = cpu_to_le32(QRTR_TYPE_NEW_SERVER); + pkt.server.service = cpu_to_le32(srv->service); + pkt.server.instance = cpu_to_le32(srv->instance); + pkt.server.node = cpu_to_le32(srv->node); + pkt.server.port = cpu_to_le32(srv->port); + + msg.msg_name = (struct sockaddr *)dest; + msg.msg_namelen = sizeof(*dest); + + return kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); +} + +static int service_announce_del(struct sockaddr_qrtr *dest, + struct qrtr_server *srv) +{ + struct qrtr_ctrl_pkt pkt; + struct msghdr msg = { }; + struct kvec iv; + int ret; + + trace_printk("advertising removal of server [%d:%x]@[%d:%d]\n", + srv->service, srv->instance, srv->node, srv->port); + + iv.iov_base = &pkt; + iv.iov_len = sizeof(pkt); + + memset(&pkt, 0, sizeof(pkt)); + pkt.cmd = cpu_to_le32(QRTR_TYPE_DEL_SERVER); + pkt.server.service = cpu_to_le32(srv->service); + pkt.server.instance = cpu_to_le32(srv->instance); + pkt.server.node = cpu_to_le32(srv->node); + pkt.server.port = cpu_to_le32(srv->port); + + msg.msg_name = (struct sockaddr *)dest; + msg.msg_namelen = sizeof(*dest); + + ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); + if (ret < 0) + pr_err("failed to announce del serivce\n"); + + return ret; +} + +static void lookup_notify(struct sockaddr_qrtr *to, struct qrtr_server *srv, + bool new) +{ + struct qrtr_ctrl_pkt pkt; + struct msghdr msg = { }; + struct kvec iv; + int ret; + + iv.iov_base = &pkt; + iv.iov_len = sizeof(pkt); + + memset(&pkt, 0, sizeof(pkt)); + pkt.cmd = new ? cpu_to_le32(QRTR_TYPE_NEW_SERVER) : + cpu_to_le32(QRTR_TYPE_DEL_SERVER); + if (srv) { + pkt.server.service = cpu_to_le32(srv->service); + pkt.server.instance = cpu_to_le32(srv->instance); + pkt.server.node = cpu_to_le32(srv->node); + pkt.server.port = cpu_to_le32(srv->port); + } + + msg.msg_name = (struct sockaddr *)to; + msg.msg_namelen = sizeof(*to); + + ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); + if (ret < 0) + pr_err("failed to send lookup notification\n"); +} + +static int announce_servers(struct sockaddr_qrtr *sq) +{ + struct radix_tree_iter iter; + struct qrtr_server *srv; + struct qrtr_node *node; + void __rcu **slot; + int ret; + + node = node_get(qrtr_ns.local_node); + if (!node) + return 0; + + /* Announce the list of servers registered in this node */ + radix_tree_for_each_slot(slot, &node->servers, &iter, 0) { + srv = radix_tree_deref_slot(slot); + + ret = service_announce_new(sq, srv); + if (ret < 0) { + pr_err("failed to announce new service\n"); + return ret; + } + } + + return 0; +} + +static struct qrtr_server *server_add(unsigned int service, + unsigned int instance, + unsigned int node_id, + unsigned int port) +{ + struct qrtr_server *srv; + struct qrtr_server *old; + struct qrtr_node *node; + + if (!service || !port) + return NULL; + + srv = kzalloc(sizeof(*srv), GFP_KERNEL); + if (!srv) + return ERR_PTR(-ENOMEM); + + srv->service = service; + srv->instance = instance; + srv->node = node_id; + srv->port = port; + + node = node_get(node_id); + if (!node) + goto err; + + /* Delete the old server on the same port */ + old = radix_tree_lookup(&node->servers, port); + if (old) { + radix_tree_delete(&node->servers, port); + kfree(old); + } + + radix_tree_insert(&node->servers, port, srv); + + trace_printk("add server [%d:%x]@[%d:%d]\n", srv->service, + srv->instance, srv->node, srv->port); + + return srv; + +err: + kfree(srv); + return NULL; +} + +static int server_del(struct qrtr_node *node, unsigned int port) +{ + struct qrtr_lookup *lookup; + struct qrtr_server *srv; + struct list_head *li; + + srv = radix_tree_lookup(&node->servers, port); + if (!srv) + return -ENOENT; + + radix_tree_delete(&node->servers, port); + + /* Broadcast the removal of local servers */ + if (srv->node == qrtr_ns.local_node) + service_announce_del(&qrtr_ns.bcast_sq, srv); + + /* Announce the service's disappearance to observers */ + list_for_each(li, &qrtr_ns.lookups) { + lookup = container_of(li, struct qrtr_lookup, li); + if (lookup->service && lookup->service != srv->service) + continue; + if (lookup->instance && lookup->instance != srv->instance) + continue; + + lookup_notify(&lookup->sq, srv, false); + } + + kfree(srv); + + return 0; +} + +/* Announce the list of servers registered on the local node */ +static int ctrl_cmd_hello(struct sockaddr_qrtr *sq) +{ + return announce_servers(sq); +} + +static int ctrl_cmd_bye(struct sockaddr_qrtr *from) +{ + struct qrtr_node *local_node; + struct radix_tree_iter iter; + struct qrtr_ctrl_pkt pkt; + struct qrtr_server *srv; + struct sockaddr_qrtr sq; + struct msghdr msg = { }; + struct qrtr_node *node; + void __rcu **slot; + struct kvec iv; + int ret; + + iv.iov_base = &pkt; + iv.iov_len = sizeof(pkt); + + node = node_get(from->sq_node); + if (!node) + return 0; + + /* Advertise removal of this client to all servers of remote node */ + radix_tree_for_each_slot(slot, &node->servers, &iter, 0) { + srv = radix_tree_deref_slot(slot); + server_del(node, srv->port); + } + + /* Advertise the removal of this client to all local servers */ + local_node = node_get(qrtr_ns.local_node); + if (!local_node) + return 0; + + memset(&pkt, 0, sizeof(pkt)); + pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE); + pkt.client.node = cpu_to_le32(from->sq_node); + + radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) { + srv = radix_tree_deref_slot(slot); + + sq.sq_family = AF_QIPCRTR; + sq.sq_node = srv->node; + sq.sq_port = srv->port; + + msg.msg_name = (struct sockaddr *)&sq; + msg.msg_namelen = sizeof(sq); + + ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); + if (ret < 0) { + pr_err("failed to send bye cmd\n"); + return ret; + } + } + + return 0; +} + +static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, + unsigned int node_id, unsigned int port) +{ + struct qrtr_node *local_node; + struct radix_tree_iter iter; + struct qrtr_lookup *lookup; + struct qrtr_ctrl_pkt pkt; + struct msghdr msg = { }; + struct qrtr_server *srv; + struct sockaddr_qrtr sq; + struct qrtr_node *node; + struct list_head *tmp; + struct list_head *li; + void __rcu **slot; + struct kvec iv; + int ret; + + iv.iov_base = &pkt; + iv.iov_len = sizeof(pkt); + + /* Don't accept spoofed messages */ + if (from->sq_node != node_id) + return -EINVAL; + + /* Local DEL_CLIENT messages comes from the port being closed */ + if (from->sq_node == qrtr_ns.local_node && from->sq_port != port) + return -EINVAL; + + /* Remove any lookups by this client */ + list_for_each_safe(li, tmp, &qrtr_ns.lookups) { + lookup = container_of(li, struct qrtr_lookup, li); + if (lookup->sq.sq_node != node_id) + continue; + if (lookup->sq.sq_port != port) + continue; + + list_del(&lookup->li); + kfree(lookup); + } + + /* Remove the server belonging to this port */ + node = node_get(node_id); + if (node) + server_del(node, port); + + /* Advertise the removal of this client to all local servers */ + local_node = node_get(qrtr_ns.local_node); + if (!local_node) + return 0; + + memset(&pkt, 0, sizeof(pkt)); + pkt.cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); + pkt.client.node = cpu_to_le32(node_id); + pkt.client.port = cpu_to_le32(port); + + radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) { + srv = radix_tree_deref_slot(slot); + + sq.sq_family = AF_QIPCRTR; + sq.sq_node = srv->node; + sq.sq_port = srv->port; + + msg.msg_name = (struct sockaddr *)&sq; + msg.msg_namelen = sizeof(sq); + + ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); + if (ret < 0) { + pr_err("failed to send del client cmd\n"); + return ret; + } + } + + return 0; +} + +static int ctrl_cmd_new_server(struct sockaddr_qrtr *from, + unsigned int service, unsigned int instance, + unsigned int node_id, unsigned int port) +{ + struct qrtr_lookup *lookup; + struct qrtr_server *srv; + struct list_head *li; + int ret = 0; + + /* Ignore specified node and port for local servers */ + if (from->sq_node == qrtr_ns.local_node) { + node_id = from->sq_node; + port = from->sq_port; + } + + /* Don't accept spoofed messages */ + if (from->sq_node != node_id) + return -EINVAL; + + srv = server_add(service, instance, node_id, port); + if (!srv) + return -EINVAL; + + if (srv->node == qrtr_ns.local_node) { + ret = service_announce_new(&qrtr_ns.bcast_sq, srv); + if (ret < 0) { + pr_err("failed to announce new service\n"); + return ret; + } + } + + /* Notify any potential lookups about the new server */ + list_for_each(li, &qrtr_ns.lookups) { + lookup = container_of(li, struct qrtr_lookup, li); + if (lookup->service && lookup->service != service) + continue; + if (lookup->instance && lookup->instance != instance) + continue; + + lookup_notify(&lookup->sq, srv, true); + } + + return ret; +} + +static int ctrl_cmd_del_server(struct sockaddr_qrtr *from, + unsigned int service, unsigned int instance, + unsigned int node_id, unsigned int port) +{ + struct qrtr_node *node; + + /* Ignore specified node and port for local servers*/ + if (from->sq_node == qrtr_ns.local_node) { + node_id = from->sq_node; + port = from->sq_port; + } + + /* Don't accept spoofed messages */ + if (from->sq_node != node_id) + return -EINVAL; + + /* Local servers may only unregister themselves */ + if (from->sq_node == qrtr_ns.local_node && from->sq_port != port) + return -EINVAL; + + node = node_get(node_id); + if (!node) + return -ENOENT; + + return server_del(node, port); +} + +static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, + unsigned int service, unsigned int instance) +{ + struct radix_tree_iter node_iter; + struct qrtr_server_filter filter; + struct radix_tree_iter srv_iter; + struct qrtr_lookup *lookup; + struct qrtr_node *node; + void __rcu **node_slot; + void __rcu **srv_slot; + + /* Accept only local observers */ + if (from->sq_node != qrtr_ns.local_node) + return -EINVAL; + + lookup = kzalloc(sizeof(*lookup), GFP_KERNEL); + if (!lookup) + return -ENOMEM; + + lookup->sq = *from; + lookup->service = service; + lookup->instance = instance; + list_add_tail(&lookup->li, &qrtr_ns.lookups); + + memset(&filter, 0, sizeof(filter)); + filter.service = service; + filter.instance = instance; + + radix_tree_for_each_slot(node_slot, &nodes, &node_iter, 0) { + node = radix_tree_deref_slot(node_slot); + + radix_tree_for_each_slot(srv_slot, &node->servers, + &srv_iter, 0) { + struct qrtr_server *srv; + + srv = radix_tree_deref_slot(srv_slot); + if (!server_match(srv, &filter)) + continue; + + lookup_notify(from, srv, true); + } + } + + /* Empty notification, to indicate end of listing */ + lookup_notify(from, NULL, true); + + return 0; +} + +static void ctrl_cmd_del_lookup(struct sockaddr_qrtr *from, + unsigned int service, unsigned int instance) +{ + struct qrtr_lookup *lookup; + struct list_head *tmp; + struct list_head *li; + + list_for_each_safe(li, tmp, &qrtr_ns.lookups) { + lookup = container_of(li, struct qrtr_lookup, li); + if (lookup->sq.sq_node != from->sq_node) + continue; + if (lookup->sq.sq_port != from->sq_port) + continue; + if (lookup->service != service) + continue; + if (lookup->instance && lookup->instance != instance) + continue; + + list_del(&lookup->li); + kfree(lookup); + } +} + +static int say_hello(void) +{ + struct qrtr_ctrl_pkt pkt; + struct msghdr msg = { }; + struct kvec iv; + int ret; + + iv.iov_base = &pkt; + iv.iov_len = sizeof(pkt); + + memset(&pkt, 0, sizeof(pkt)); + pkt.cmd = cpu_to_le32(QRTR_TYPE_HELLO); + + msg.msg_name = (struct sockaddr *)&qrtr_ns.bcast_sq; + msg.msg_namelen = sizeof(qrtr_ns.bcast_sq); + + ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); + if (ret < 0) + pr_err("failed to send hello msg\n"); + + return ret; +} + +static void qrtr_ns_worker(struct work_struct *work) +{ + const struct qrtr_ctrl_pkt *pkt; + size_t recv_buf_size = 4096; + struct sockaddr_qrtr sq; + struct msghdr msg = { }; + unsigned int cmd; + ssize_t msglen; + void *recv_buf; + struct kvec iv; + int ret; + + msg.msg_name = (struct sockaddr *)&sq; + msg.msg_namelen = sizeof(sq); + + recv_buf = kzalloc(recv_buf_size, GFP_KERNEL); + if (!recv_buf) + return; + + for (;;) { + iv.iov_base = recv_buf; + iv.iov_len = recv_buf_size; + + msglen = kernel_recvmsg(qrtr_ns.sock, &msg, &iv, 1, + iv.iov_len, MSG_DONTWAIT); + + if (msglen == -EAGAIN) + break; + + if (msglen < 0) { + pr_err("error receiving packet: %zd\n", msglen); + break; + } + + pkt = recv_buf; + cmd = le32_to_cpu(pkt->cmd); + if (cmd < ARRAY_SIZE(qrtr_ctrl_pkt_strings) && + qrtr_ctrl_pkt_strings[cmd]) + trace_printk("%s from %d:%d\n", + qrtr_ctrl_pkt_strings[cmd], sq.sq_node, + sq.sq_port); + + ret = 0; + switch (cmd) { + case QRTR_TYPE_HELLO: + ret = ctrl_cmd_hello(&sq); + break; + case QRTR_TYPE_BYE: + ret = ctrl_cmd_bye(&sq); + break; + case QRTR_TYPE_DEL_CLIENT: + ret = ctrl_cmd_del_client(&sq, + le32_to_cpu(pkt->client.node), + le32_to_cpu(pkt->client.port)); + break; + case QRTR_TYPE_NEW_SERVER: + ret = ctrl_cmd_new_server(&sq, + le32_to_cpu(pkt->server.service), + le32_to_cpu(pkt->server.instance), + le32_to_cpu(pkt->server.node), + le32_to_cpu(pkt->server.port)); + break; + case QRTR_TYPE_DEL_SERVER: + ret = ctrl_cmd_del_server(&sq, + le32_to_cpu(pkt->server.service), + le32_to_cpu(pkt->server.instance), + le32_to_cpu(pkt->server.node), + le32_to_cpu(pkt->server.port)); + break; + case QRTR_TYPE_EXIT: + case QRTR_TYPE_PING: + case QRTR_TYPE_RESUME_TX: + break; + case QRTR_TYPE_NEW_LOOKUP: + ret = ctrl_cmd_new_lookup(&sq, + le32_to_cpu(pkt->server.service), + le32_to_cpu(pkt->server.instance)); + break; + case QRTR_TYPE_DEL_LOOKUP: + ctrl_cmd_del_lookup(&sq, + le32_to_cpu(pkt->server.service), + le32_to_cpu(pkt->server.instance)); + break; + } + + if (ret < 0) + pr_err("failed while handling packet from %d:%d", + sq.sq_node, sq.sq_port); + } + + kfree(recv_buf); +} + +static void qrtr_ns_data_ready(struct sock *sk) +{ + queue_work(qrtr_ns.workqueue, &qrtr_ns.work); +} + +void qrtr_ns_init(struct work_struct *work) +{ + struct sockaddr_qrtr sq; + int ret; + + INIT_LIST_HEAD(&qrtr_ns.lookups); + INIT_WORK(&qrtr_ns.work, qrtr_ns_worker); + + ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM, + PF_QIPCRTR, &qrtr_ns.sock); + if (ret < 0) + return; + + ret = kernel_getsockname(qrtr_ns.sock, (struct sockaddr *)&sq); + if (ret < 0) { + pr_err("failed to get socket name\n"); + goto err_sock; + } + + qrtr_ns.sock->sk->sk_data_ready = qrtr_ns_data_ready; + + sq.sq_port = QRTR_PORT_CTRL; + qrtr_ns.local_node = sq.sq_node; + + ret = kernel_bind(qrtr_ns.sock, (struct sockaddr *)&sq, sizeof(sq)); + if (ret < 0) { + pr_err("failed to bind to socket\n"); + goto err_sock; + } + + qrtr_ns.bcast_sq.sq_family = AF_QIPCRTR; + qrtr_ns.bcast_sq.sq_node = QRTR_NODE_BCAST; + qrtr_ns.bcast_sq.sq_port = QRTR_PORT_CTRL; + + qrtr_ns.workqueue = alloc_workqueue("qrtr_ns_handler", WQ_UNBOUND, 1); + if (!qrtr_ns.workqueue) + goto err_sock; + + ret = say_hello(); + if (ret < 0) + goto err_wq; + + return; + +err_wq: + destroy_workqueue(qrtr_ns.workqueue); +err_sock: + sock_release(qrtr_ns.sock); +} +EXPORT_SYMBOL_GPL(qrtr_ns_init); + +void qrtr_ns_remove(void) +{ + cancel_work_sync(&qrtr_ns.work); + destroy_workqueue(qrtr_ns.workqueue); + sock_release(qrtr_ns.sock); +} +EXPORT_SYMBOL_GPL(qrtr_ns_remove); + +MODULE_AUTHOR("Manivannan Sadhasivam "); +MODULE_DESCRIPTION("Qualcomm IPC Router Nameservice"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 5a8e42ad1504..c758383ba866 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -110,6 +111,8 @@ static DEFINE_MUTEX(qrtr_node_lock); static DEFINE_IDR(qrtr_ports); static DEFINE_MUTEX(qrtr_port_lock); +static struct delayed_work qrtr_ns_work; + /** * struct qrtr_node - endpoint node * @ep_lock: lock for endpoint management and callbacks @@ -1241,38 +1244,6 @@ static int qrtr_create(struct net *net, struct socket *sock, return 0; } -static const struct nla_policy qrtr_policy[IFA_MAX + 1] = { - [IFA_LOCAL] = { .type = NLA_U32 }, -}; - -static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct nlattr *tb[IFA_MAX + 1]; - struct ifaddrmsg *ifm; - int rc; - - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - - if (!netlink_capable(skb, CAP_SYS_ADMIN)) - return -EPERM; - - ASSERT_RTNL(); - - rc = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, - qrtr_policy, extack); - if (rc < 0) - return rc; - - ifm = nlmsg_data(nlh); - if (!tb[IFA_LOCAL]) - return -EINVAL; - - qrtr_local_nid = nla_get_u32(tb[IFA_LOCAL]); - return 0; -} - static const struct net_proto_family qrtr_family = { .owner = THIS_MODULE, .family = AF_QIPCRTR, @@ -1293,11 +1264,11 @@ static int __init qrtr_proto_init(void) return rc; } - rc = rtnl_register_module(THIS_MODULE, PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0); - if (rc) { - sock_unregister(qrtr_family.family); - proto_unregister(&qrtr_proto); - } + /* FIXME: Currently, this 2s delay is required to catch the NEW_SERVER + * messages from routers. But the fix could be somewhere else. + */ + INIT_DELAYED_WORK(&qrtr_ns_work, qrtr_ns_init); + schedule_delayed_work(&qrtr_ns_work, msecs_to_jiffies(2000)); return rc; } @@ -1305,7 +1276,8 @@ postcore_initcall(qrtr_proto_init); static void __exit qrtr_proto_fini(void) { - rtnl_unregister(PF_QIPCRTR, RTM_NEWADDR); + cancel_delayed_work_sync(&qrtr_ns_work); + qrtr_ns_remove(); sock_unregister(qrtr_family.family); proto_unregister(&qrtr_proto); } diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h index b81e6953c04b..53a237a28971 100644 --- a/net/qrtr/qrtr.h +++ b/net/qrtr/qrtr.h @@ -29,4 +29,8 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep); int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len); +void qrtr_ns_init(struct work_struct *work); + +void qrtr_ns_remove(void); + #endif -- cgit v1.2.3 From 31d6cbeeb88010d59e70e4e0ae9a2b17edebb64e Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Feb 2020 20:43:27 +0530 Subject: net: qrtr: Fix the local node ID as 1 In order to start the QRTR nameservice, the local node ID needs to be valid. Hence, fix it to 1. Previously, the node ID was configured through a userspace tool before starting the nameservice daemon. Since we have now integrated the nameservice handling to kernel, this change is necessary for making it functional. Signed-off-by: Manivannan Sadhasivam Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index c758383ba866..423310896285 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -7,7 +7,6 @@ #include #include #include /* For TIOCINQ/OUTQ */ -#include #include #include #include @@ -97,7 +96,7 @@ static inline struct qrtr_sock *qrtr_sk(struct sock *sk) return container_of(sk, struct qrtr_sock, sk); } -static unsigned int qrtr_local_nid = NUMA_NO_NODE; +static unsigned int qrtr_local_nid = 1; /* for node ids */ static RADIX_TREE(qrtr_nodes, GFP_ATOMIC); -- cgit v1.2.3 From b8e202d1d1d0f182f01062804efb523ea9a9008c Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:13 +0000 Subject: net, sk_msg: Annotate lockless access to sk_prot on clone sk_msg and ULP frameworks override protocol callbacks pointer in sk->sk_prot, while tcp accesses it locklessly when cloning the listening socket, that is with neither sk_lock nor sk_callback_lock held. Once we enable use of listening sockets with sockmap (and hence sk_msg), there will be shared access to sk->sk_prot if socket is getting cloned while being inserted/deleted to/from the sockmap from another CPU: Read side: tcp_v4_rcv sk = __inet_lookup_skb(...) tcp_check_req(sk) inet_csk(sk)->icsk_af_ops->syn_recv_sock tcp_v4_syn_recv_sock tcp_create_openreq_child inet_csk_clone_lock sk_clone_lock READ_ONCE(sk->sk_prot) Write side: sock_map_ops->map_update_elem sock_map_update_elem sock_map_update_common sock_map_link_no_progs tcp_bpf_init tcp_bpf_update_sk_prot sk_psock_update_proto WRITE_ONCE(sk->sk_prot, ops) sock_map_ops->map_delete_elem sock_map_delete_elem __sock_map_delete sock_map_unref sk_psock_put sk_psock_drop sk_psock_restore_proto tcp_update_ulp WRITE_ONCE(sk->sk_prot, proto) Mark the shared access with READ_ONCE/WRITE_ONCE annotations. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200218171023.844439-2-jakub@cloudflare.com --- include/linux/skmsg.h | 3 ++- net/core/sock.c | 8 +++++--- net/ipv4/tcp_bpf.c | 4 +++- net/ipv4/tcp_ulp.c | 3 ++- net/tls/tls_main.c | 3 ++- 5 files changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index d90ef61712a1..112765bd146d 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -352,7 +352,8 @@ static inline void sk_psock_update_proto(struct sock *sk, psock->saved_write_space = sk->sk_write_space; psock->sk_proto = sk->sk_prot; - sk->sk_prot = ops; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, ops); } static inline void sk_psock_restore_proto(struct sock *sk, diff --git a/net/core/sock.c b/net/core/sock.c index a4c8fac781ff..bf1173b93eda 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1572,13 +1572,14 @@ static inline void sock_lock_init(struct sock *sk) */ static void sock_copy(struct sock *nsk, const struct sock *osk) { + const struct proto *prot = READ_ONCE(osk->sk_prot); #ifdef CONFIG_SECURITY_NETWORK void *sptr = nsk->sk_security; #endif memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, - osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); + prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; @@ -1792,16 +1793,17 @@ static void sk_init_common(struct sock *sk) */ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) { + struct proto *prot = READ_ONCE(sk->sk_prot); struct sock *newsk; bool is_charged = true; - newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); + newsk = sk_prot_alloc(prot, priority, sk->sk_family); if (newsk != NULL) { struct sk_filter *filter; sock_copy(newsk, sk); - newsk->sk_prot_creator = sk->sk_prot; + newsk->sk_prot_creator = prot; /* SANITY */ if (likely(newsk->sk_net_refcnt)) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 8a01428f80c1..dd183b050642 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -645,8 +645,10 @@ static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock) /* Reinit occurs when program types change e.g. TCP_BPF_TX is removed * or added requiring sk_prot hook updates. We keep original saved * hooks in this case. + * + * Pairs with lockless read in sk_clone_lock(). */ - sk->sk_prot = &tcp_bpf_prots[family][config]; + WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]); } static int tcp_bpf_assert_proto_ops(struct proto *ops) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 38d3ad141161..6c43fa189195 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -106,7 +106,8 @@ void tcp_update_ulp(struct sock *sk, struct proto *proto, if (!icsk->icsk_ulp_ops) { sk->sk_write_space = write_space; - sk->sk_prot = proto; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, proto); return; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 94774c0e5ff3..82225bcc1117 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -742,7 +742,8 @@ static void tls_update(struct sock *sk, struct proto *p, ctx->sk_write_space = write_space; ctx->sk_proto = p; } else { - sk->sk_prot = p; + /* Pairs with lockless read in sk_clone_lock(). */ + WRITE_ONCE(sk->sk_prot, p); sk->sk_write_space = write_space; } } -- cgit v1.2.3 From f1ff5ce2cd5ef3335f19c0f6576582c87045b04f Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:14 +0000 Subject: net, sk_msg: Clear sk_user_data pointer on clone if tagged sk_user_data can hold a pointer to an object that is not intended to be shared between the parent socket and the child that gets a pointer copy on clone. This is the case when sk_user_data points at reference-counted object, like struct sk_psock. One way to resolve it is to tag the pointer with a no-copy flag by repurposing its lowest bit. Based on the bit-flag value we clear the child sk_user_data pointer after cloning the parent socket. The no-copy flag is stored in the pointer itself as opposed to externally, say in socket flags, to guarantee that the pointer and the flag are copied from parent to child socket in an atomic fashion. Parent socket state is subject to change while copying, we don't hold any locks at that time. This approach relies on an assumption that sk_user_data holds a pointer to an object aligned at least 2 bytes. A manual audit of existing users of rcu_dereference_sk_user_data helper confirms our assumption. Also, an RCU-protected sk_user_data is not likely to hold a pointer to a char value or a pathological case of "struct { char c; }". To be safe, warn when the flag-bit is set when setting sk_user_data to catch any future misuses. It is worth considering why clearing sk_user_data unconditionally is not an option. There exist users, DRBD, NVMe, and Xen drivers being among them, that rely on the pointer being copied when cloning the listening socket. Potentially we could distinguish these users by checking if the listening socket has been created in kernel-space via sock_create_kern, and hence has sk_kern_sock flag set. However, this is not the case for NVMe and Xen drivers, which create sockets without marking them as belonging to the kernel. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200218171023.844439-3-jakub@cloudflare.com --- include/net/sock.h | 37 +++++++++++++++++++++++++++++++++++-- net/core/skmsg.c | 2 +- net/core/sock.c | 6 ++++++ 3 files changed, 42 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index 02162b0378f7..9f37fdfd15d4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -502,10 +502,43 @@ enum sk_pacing { SK_PACING_FQ = 2, }; +/* Pointer stored in sk_user_data might not be suitable for copying + * when cloning the socket. For instance, it can point to a reference + * counted object. sk_user_data bottom bit is set if pointer must not + * be copied. + */ +#define SK_USER_DATA_NOCOPY 1UL +#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY) + +/** + * sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied + * @sk: socket + */ +static inline bool sk_user_data_is_nocopy(const struct sock *sk) +{ + return ((uintptr_t)sk->sk_user_data & SK_USER_DATA_NOCOPY); +} + #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) -#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk))) -#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr) +#define rcu_dereference_sk_user_data(sk) \ +({ \ + void *__tmp = rcu_dereference(__sk_user_data((sk))); \ + (void *)((uintptr_t)__tmp & SK_USER_DATA_PTRMASK); \ +}) +#define rcu_assign_sk_user_data(sk, ptr) \ +({ \ + uintptr_t __tmp = (uintptr_t)(ptr); \ + WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \ + rcu_assign_pointer(__sk_user_data((sk)), __tmp); \ +}) +#define rcu_assign_sk_user_data_nocopy(sk, ptr) \ +({ \ + uintptr_t __tmp = (uintptr_t)(ptr); \ + WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \ + rcu_assign_pointer(__sk_user_data((sk)), \ + __tmp | SK_USER_DATA_NOCOPY); \ +}) /* * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK diff --git a/net/core/skmsg.c b/net/core/skmsg.c index ded2d5227678..eeb28cb85664 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -512,7 +512,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED); refcount_set(&psock->refcnt, 1); - rcu_assign_sk_user_data(sk, psock); + rcu_assign_sk_user_data_nocopy(sk, psock); sock_hold(sk); return psock; diff --git a/net/core/sock.c b/net/core/sock.c index bf1173b93eda..e4af4dbc1c9e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1865,6 +1865,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) goto out; } + /* Clear sk_user_data if parent had the pointer tagged + * as not suitable for copying when cloning. + */ + if (sk_user_data_is_nocopy(newsk)) + RCU_INIT_POINTER(newsk->sk_user_data, NULL); + newsk->sk_err = 0; newsk->sk_err_soft = 0; newsk->sk_priority = 0; -- cgit v1.2.3 From e80251555f0befd1271e74b080bccf0ff0348bfc Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:15 +0000 Subject: tcp_bpf: Don't let child socket inherit parent protocol ops on copy Prepare for cloning listening sockets that have their protocol callbacks overridden by sk_msg. Child sockets must not inherit parent callbacks that access state stored in sk_user_data owned by the parent. Restore the child socket protocol callbacks before it gets hashed and any of the callbacks can get invoked. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200218171023.844439-4-jakub@cloudflare.com --- include/net/tcp.h | 7 +++++++ net/ipv4/tcp_bpf.c | 14 ++++++++++++++ net/ipv4/tcp_minisocks.c | 2 ++ 3 files changed, 23 insertions(+) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index a5ea27df3c2b..07f947cc80e6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2203,6 +2203,13 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); +#ifdef CONFIG_NET_SOCK_MSG +void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); +#else +static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) +{ +} +#endif /* Call BPF_SOCK_OPS program that returns an int. If the return value * is < 0, then the BPF op failed (for example if the loaded BPF diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index dd183b050642..7d6e1b75d4d4 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -693,3 +693,17 @@ int tcp_bpf_init(struct sock *sk) rcu_read_unlock(); return 0; } + +/* If a child got cloned from a listening socket that had tcp_bpf + * protocol callbacks installed, we need to restore the callbacks to + * the default ones because the child does not inherit the psock state + * that tcp_bpf callbacks expect. + */ +void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) +{ + int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; + struct proto *prot = newsk->sk_prot; + + if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE]) + newsk->sk_prot = sk->sk_prot_creator; +} diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ad3b56d9fa71..c8274371c3d0 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -548,6 +548,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->fastopen_req = NULL; RCU_INIT_POINTER(newtp->fastopen_rsk, NULL); + tcp_bpf_clone(sk, newsk); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); return newsk; -- cgit v1.2.3 From 8ca30379a40103bf6734ae127ec940da798534dd Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:16 +0000 Subject: bpf, sockmap: Allow inserting listening TCP sockets into sockmap In order for sockmap/sockhash types to become generic collections for storing TCP sockets we need to loosen the checks during map update, while tightening the checks in redirect helpers. Currently sock{map,hash} require the TCP socket to be in established state, which prevents inserting listening sockets. Change the update pre-checks so the socket can also be in listening state. Since it doesn't make sense to redirect with sock{map,hash} to listening sockets, add appropriate socket state checks to BPF redirect helpers too. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200218171023.844439-5-jakub@cloudflare.com --- net/core/sock_map.c | 59 ++++++++++++++++++++++++--------- tools/testing/selftests/bpf/test_maps.c | 6 +--- 2 files changed, 45 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 3a7a96ab088a..dd92a3556d73 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -391,7 +391,8 @@ out_free: static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops) { return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB || - ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB; + ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB || + ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB; } static bool sock_map_sk_is_suitable(const struct sock *sk) @@ -400,6 +401,16 @@ static bool sock_map_sk_is_suitable(const struct sock *sk) sk->sk_protocol == IPPROTO_TCP; } +static bool sock_map_sk_state_allowed(const struct sock *sk) +{ + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); +} + +static bool sock_map_redirect_allowed(const struct sock *sk) +{ + return sk->sk_state != TCP_LISTEN; +} + static int sock_map_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { @@ -423,7 +434,7 @@ static int sock_map_update_elem(struct bpf_map *map, void *key, } sock_map_sk_acquire(sk); - if (sk->sk_state != TCP_ESTABLISHED) + if (!sock_map_sk_state_allowed(sk)) ret = -EOPNOTSUPP; else ret = sock_map_update_common(map, idx, sk, flags); @@ -460,13 +471,17 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, struct bpf_map *, map, u32, key, u64, flags) { struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + struct sock *sk; if (unlikely(flags & ~(BPF_F_INGRESS))) return SK_DROP; - tcb->bpf.flags = flags; - tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key); - if (!tcb->bpf.sk_redir) + + sk = __sock_map_lookup_elem(map, key); + if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + + tcb->bpf.flags = flags; + tcb->bpf.sk_redir = sk; return SK_PASS; } @@ -483,12 +498,17 @@ const struct bpf_func_proto bpf_sk_redirect_map_proto = { BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg, struct bpf_map *, map, u32, key, u64, flags) { + struct sock *sk; + if (unlikely(flags & ~(BPF_F_INGRESS))) return SK_DROP; - msg->flags = flags; - msg->sk_redir = __sock_map_lookup_elem(map, key); - if (!msg->sk_redir) + + sk = __sock_map_lookup_elem(map, key); + if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + + msg->flags = flags; + msg->sk_redir = sk; return SK_PASS; } @@ -748,7 +768,7 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key, } sock_map_sk_acquire(sk); - if (sk->sk_state != TCP_ESTABLISHED) + if (!sock_map_sk_state_allowed(sk)) ret = -EOPNOTSUPP; else ret = sock_hash_update_common(map, key, sk, flags); @@ -916,13 +936,17 @@ BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb, struct bpf_map *, map, void *, key, u64, flags) { struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + struct sock *sk; if (unlikely(flags & ~(BPF_F_INGRESS))) return SK_DROP; - tcb->bpf.flags = flags; - tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key); - if (!tcb->bpf.sk_redir) + + sk = __sock_hash_lookup_elem(map, key); + if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + + tcb->bpf.flags = flags; + tcb->bpf.sk_redir = sk; return SK_PASS; } @@ -939,12 +963,17 @@ const struct bpf_func_proto bpf_sk_redirect_hash_proto = { BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg, struct bpf_map *, map, void *, key, u64, flags) { + struct sock *sk; + if (unlikely(flags & ~(BPF_F_INGRESS))) return SK_DROP; - msg->flags = flags; - msg->sk_redir = __sock_hash_lookup_elem(map, key); - if (!msg->sk_redir) + + sk = __sock_hash_lookup_elem(map, key); + if (unlikely(!sk || !sock_map_redirect_allowed(sk))) return SK_DROP; + + msg->flags = flags; + msg->sk_redir = sk; return SK_PASS; } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 02eae1e864c2..c6766b2cff85 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -756,11 +756,7 @@ static void test_sockmap(unsigned int tasks, void *data) /* Test update without programs */ for (i = 0; i < 6; i++) { err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); - if (i < 2 && !err) { - printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n", - i, sfd[i]); - goto out_sockmap; - } else if (i >= 2 && err) { + if (err) { printf("Failed noprog update sockmap '%i:%i'\n", i, sfd[i]); goto out_sockmap; -- cgit v1.2.3 From 6e830c2f6c9641217e22330cec1372acff78dcef Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:17 +0000 Subject: bpf, sockmap: Don't set up upcalls and progs for listening sockets Now that sockmap/sockhash can hold listening sockets, when setting up the psock we will (i) grab references to verdict/parser progs, and (2) override socket upcalls sk_data_ready and sk_write_space. However, since we cannot redirect to listening sockets so we don't need to link the socket to the BPF progs. And more importantly we don't want the listening socket to have overridden upcalls because they would get inherited by child sockets cloned from it. Introduce a separate initialization path for listening sockets that does not change the upcalls and ignores the BPF progs. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200218171023.844439-6-jakub@cloudflare.com --- net/core/sock_map.c | 52 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index dd92a3556d73..a5103112a344 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -228,6 +228,30 @@ out: return ret; } +static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk) +{ + struct sk_psock *psock; + int ret; + + psock = sk_psock_get_checked(sk); + if (IS_ERR(psock)) + return PTR_ERR(psock); + + if (psock) { + tcp_bpf_reinit(sk); + return 0; + } + + psock = sk_psock_init(sk, map->numa_node); + if (!psock) + return -ENOMEM; + + ret = tcp_bpf_init(sk); + if (ret < 0) + sk_psock_put(sk, psock); + return ret; +} + static void sock_map_free(struct bpf_map *map) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); @@ -334,6 +358,11 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next) return 0; } +static bool sock_map_redirect_allowed(const struct sock *sk) +{ + return sk->sk_state != TCP_LISTEN; +} + static int sock_map_update_common(struct bpf_map *map, u32 idx, struct sock *sk, u64 flags) { @@ -356,7 +385,14 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, if (!link) return -ENOMEM; - ret = sock_map_link(map, &stab->progs, sk); + /* Only sockets we can redirect into/from in BPF need to hold + * refs to parser/verdict progs and have their sk_data_ready + * and sk_write_space callbacks overridden. + */ + if (sock_map_redirect_allowed(sk)) + ret = sock_map_link(map, &stab->progs, sk); + else + ret = sock_map_link_no_progs(map, sk); if (ret < 0) goto out_free; @@ -406,11 +442,6 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); } -static bool sock_map_redirect_allowed(const struct sock *sk) -{ - return sk->sk_state != TCP_LISTEN; -} - static int sock_map_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { @@ -700,7 +731,14 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, if (!link) return -ENOMEM; - ret = sock_map_link(map, &htab->progs, sk); + /* Only sockets we can redirect into/from in BPF need to hold + * refs to parser/verdict progs and have their sk_data_ready + * and sk_write_space callbacks overridden. + */ + if (sock_map_redirect_allowed(sk)) + ret = sock_map_link(map, &htab->progs, sk); + else + ret = sock_map_link_no_progs(map, sk); if (ret < 0) goto out_free; -- cgit v1.2.3 From c1cdf65da060a8e047a9f4433306fd6dac1f51a6 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:18 +0000 Subject: bpf, sockmap: Return socket cookie on lookup from syscall Tooling that populates the SOCK{MAP,HASH} with sockets from user-space needs a way to inspect its contents. Returning the struct sock * that the map holds to user-space is neither safe nor useful. An approach established by REUSEPORT_SOCKARRAY is to return a socket cookie (a unique identifier) instead. Since socket cookies are u64 values, SOCK{MAP,HASH} need to support such a value size for lookup to be possible. This requires special handling on update, though. Attempts to do a lookup on a map holding u32 values will be met with ENOSPC error. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200218171023.844439-7-jakub@cloudflare.com --- net/core/sock_map.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index a5103112a344..f48c934d5da0 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -10,6 +10,7 @@ #include #include #include +#include struct bpf_stab { struct bpf_map map; @@ -31,7 +32,8 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) return ERR_PTR(-EPERM); if (attr->max_entries == 0 || attr->key_size != 4 || - attr->value_size != 4 || + (attr->value_size != sizeof(u32) && + attr->value_size != sizeof(u64)) || attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); @@ -302,6 +304,21 @@ static void *sock_map_lookup(struct bpf_map *map, void *key) return ERR_PTR(-EOPNOTSUPP); } +static void *sock_map_lookup_sys(struct bpf_map *map, void *key) +{ + struct sock *sk; + + if (map->value_size != sizeof(u64)) + return ERR_PTR(-ENOSPC); + + sk = __sock_map_lookup_elem(map, *(u32 *)key); + if (!sk) + return ERR_PTR(-ENOENT); + + sock_gen_cookie(sk); + return &sk->sk_cookie; +} + static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, struct sock **psk) { @@ -445,11 +462,18 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) static int sock_map_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { - u32 ufd = *(u32 *)value; u32 idx = *(u32 *)key; struct socket *sock; struct sock *sk; int ret; + u64 ufd; + + if (map->value_size == sizeof(u64)) + ufd = *(u64 *)value; + else + ufd = *(u32 *)value; + if (ufd > S32_MAX) + return -EINVAL; sock = sockfd_lookup(ufd, &ret); if (!sock) @@ -557,6 +581,7 @@ const struct bpf_map_ops sock_map_ops = { .map_alloc = sock_map_alloc, .map_free = sock_map_free, .map_get_next_key = sock_map_get_next_key, + .map_lookup_elem_sys_only = sock_map_lookup_sys, .map_update_elem = sock_map_update_elem, .map_delete_elem = sock_map_delete_elem, .map_lookup_elem = sock_map_lookup, @@ -787,10 +812,17 @@ out_free: static int sock_hash_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { - u32 ufd = *(u32 *)value; struct socket *sock; struct sock *sk; int ret; + u64 ufd; + + if (map->value_size == sizeof(u64)) + ufd = *(u64 *)value; + else + ufd = *(u32 *)value; + if (ufd > S32_MAX) + return -EINVAL; sock = sockfd_lookup(ufd, &ret); if (!sock) @@ -866,7 +898,8 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) return ERR_PTR(-EPERM); if (attr->max_entries == 0 || attr->key_size == 0 || - attr->value_size != 4 || + (attr->value_size != sizeof(u32) && + attr->value_size != sizeof(u64)) || attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); if (attr->key_size > MAX_BPF_STACK) @@ -943,6 +976,21 @@ static void sock_hash_free(struct bpf_map *map) kfree(htab); } +static void *sock_hash_lookup_sys(struct bpf_map *map, void *key) +{ + struct sock *sk; + + if (map->value_size != sizeof(u64)) + return ERR_PTR(-ENOSPC); + + sk = __sock_hash_lookup_elem(map, key); + if (!sk) + return ERR_PTR(-ENOENT); + + sock_gen_cookie(sk); + return &sk->sk_cookie; +} + static void sock_hash_release_progs(struct bpf_map *map) { psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs); @@ -1032,6 +1080,7 @@ const struct bpf_map_ops sock_hash_ops = { .map_update_elem = sock_hash_update_elem, .map_delete_elem = sock_hash_delete_elem, .map_lookup_elem = sock_map_lookup, + .map_lookup_elem_sys_only = sock_hash_lookup_sys, .map_release_uref = sock_hash_release_progs, .map_check_btf = map_check_no_btf, }; -- cgit v1.2.3 From 1d59f3bcee356caa933646dc45ff0836455535e8 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:19 +0000 Subject: bpf, sockmap: Let all kernel-land lookup values in SOCKMAP/SOCKHASH Don't require the kernel code, like BPF helpers, that needs access to SOCK{MAP,HASH} map contents to live in net/core/sock_map.c. Expose the lookup operation to all kernel-land. Lookup from BPF context is not whitelisted yet. While syscalls have a dedicated lookup handler. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200218171023.844439-8-jakub@cloudflare.com --- net/core/sock_map.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index f48c934d5da0..2e0f465295c3 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -301,7 +301,7 @@ static struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) static void *sock_map_lookup(struct bpf_map *map, void *key) { - return ERR_PTR(-EOPNOTSUPP); + return __sock_map_lookup_elem(map, *(u32 *)key); } static void *sock_map_lookup_sys(struct bpf_map *map, void *key) @@ -991,6 +991,11 @@ static void *sock_hash_lookup_sys(struct bpf_map *map, void *key) return &sk->sk_cookie; } +static void *sock_hash_lookup(struct bpf_map *map, void *key) +{ + return __sock_hash_lookup_elem(map, key); +} + static void sock_hash_release_progs(struct bpf_map *map) { psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs); @@ -1079,7 +1084,7 @@ const struct bpf_map_ops sock_hash_ops = { .map_get_next_key = sock_hash_get_next_key, .map_update_elem = sock_hash_update_elem, .map_delete_elem = sock_hash_delete_elem, - .map_lookup_elem = sock_map_lookup, + .map_lookup_elem = sock_hash_lookup, .map_lookup_elem_sys_only = sock_hash_lookup_sys, .map_release_uref = sock_hash_release_progs, .map_check_btf = map_check_no_btf, -- cgit v1.2.3 From 9fed9000c5c6cacfcaaa48aff74818072ae294cc Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:20 +0000 Subject: bpf: Allow selecting reuseport socket from a SOCKMAP/SOCKHASH SOCKMAP & SOCKHASH now support storing references to listening sockets. Nothing keeps us from using these map types a collection of sockets to select from in BPF reuseport programs. Whitelist the map types with the bpf_sk_select_reuseport helper. The restriction that the socket has to be a member of a reuseport group still applies. Sockets in SOCKMAP/SOCKHASH that don't have sk_reuseport_cb set are not a valid target and we signal it with -EINVAL. The main benefit from this change is that, in contrast to REUSEPORT_SOCKARRAY, SOCK{MAP,HASH} don't impose a restriction that a listening socket can be just one BPF map at the same time. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200218171023.844439-9-jakub@cloudflare.com --- kernel/bpf/verifier.c | 10 +++++++--- net/core/filter.c | 15 ++++++++++----- 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1cc945daa9c8..6d15dfbd4b88 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3693,14 +3693,16 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (func_id != BPF_FUNC_sk_redirect_map && func_id != BPF_FUNC_sock_map_update && func_id != BPF_FUNC_map_delete_elem && - func_id != BPF_FUNC_msg_redirect_map) + func_id != BPF_FUNC_msg_redirect_map && + func_id != BPF_FUNC_sk_select_reuseport) goto error; break; case BPF_MAP_TYPE_SOCKHASH: if (func_id != BPF_FUNC_sk_redirect_hash && func_id != BPF_FUNC_sock_hash_update && func_id != BPF_FUNC_map_delete_elem && - func_id != BPF_FUNC_msg_redirect_hash) + func_id != BPF_FUNC_msg_redirect_hash && + func_id != BPF_FUNC_sk_select_reuseport) goto error; break; case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: @@ -3774,7 +3776,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, goto error; break; case BPF_FUNC_sk_select_reuseport: - if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) + if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && + map->map_type != BPF_MAP_TYPE_SOCKMAP && + map->map_type != BPF_MAP_TYPE_SOCKHASH) goto error; break; case BPF_FUNC_map_peek_elem: diff --git a/net/core/filter.c b/net/core/filter.c index c180871e606d..77d2f471b3bb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8620,6 +8620,7 @@ struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, struct bpf_map *, map, void *, key, u32, flags) { + bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY; struct sock_reuseport *reuse; struct sock *selected_sk; @@ -8628,12 +8629,16 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, return -ENOENT; reuse = rcu_dereference(selected_sk->sk_reuseport_cb); - if (!reuse) - /* selected_sk is unhashed (e.g. by close()) after the - * above map_lookup_elem(). Treat selected_sk has already - * been removed from the map. + if (!reuse) { + /* reuseport_array has only sk with non NULL sk_reuseport_cb. + * The only (!reuse) case here is - the sk has already been + * unhashed (e.g. by close()), so treat it as -ENOENT. + * + * Other maps (e.g. sock_map) do not provide this guarantee and + * the sk may never be in the reuseport group to begin with. */ - return -ENOENT; + return is_sockarray ? -ENOENT : -EINVAL; + } if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) { struct sock *sk; -- cgit v1.2.3 From 035ff358f2d9e2f5e1639ba4defe4dc40ac642dd Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:21 +0000 Subject: net: Generate reuseport group ID on group creation Commit 736b46027eb4 ("net: Add ID (if needed) to sock_reuseport and expose reuseport_lock") has introduced lazy generation of reuseport group IDs that survive group resize. By comparing the identifier we check if BPF reuseport program is not trying to select a socket from a BPF map that belongs to a different reuseport group than the one the packet is for. Because SOCKARRAY used to be the only BPF map type that can be used with reuseport BPF, it was possible to delay the generation of reuseport group ID until a socket from the group was inserted into BPF map for the first time. Now that SOCK{MAP,HASH} can be used with reuseport BPF we have two options, either generate the reuseport ID on map update, like SOCKARRAY does, or allocate an ID from the start when reuseport group gets created. This patch takes the latter approach to keep sockmap free of calls into reuseport code. This streamlines the reuseport_id access as its lifetime now matches the longevity of reuseport object. The cost of this simplification, however, is that we allocate reuseport IDs for all SO_REUSEPORT users. Even those that don't use SOCKARRAY in their setups. With the way identifiers are currently generated, we can have at most S32_MAX reuseport groups, which hopefully is sufficient. If we ever get close to the limit, we can switch an u64 counter like sk_cookie. Another change is that we now always call into SOCKARRAY logic to unlink the socket from the map when unhashing or closing the socket. Previously we did it only when at least one socket from the group was in a BPF map. It is worth noting that this doesn't conflict with sockmap tear-down in case a socket is in a SOCK{MAP,HASH} and belongs to a reuseport group. sockmap tear-down happens first: prot->unhash `- tcp_bpf_unhash |- tcp_bpf_remove | `- while (sk_psock_link_pop(psock)) | `- sk_psock_unlink | `- sock_map_delete_from_link | `- __sock_map_delete | `- sock_map_unref | `- sk_psock_put | `- sk_psock_drop | `- rcu_assign_sk_user_data(sk, NULL) `- inet_unhash `- reuseport_detach_sock `- bpf_sk_reuseport_detach `- WRITE_ONCE(sk->sk_user_data, NULL) Suggested-by: Martin Lau Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200218171023.844439-10-jakub@cloudflare.com --- include/net/sock_reuseport.h | 2 -- kernel/bpf/reuseport_array.c | 5 ----- net/core/filter.c | 12 +---------- net/core/sock_reuseport.c | 50 +++++++++++++++++++------------------------- 4 files changed, 22 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index 43f4a818d88f..3ecaa15d1850 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -55,6 +55,4 @@ static inline bool reuseport_has_conns(struct sock *sk, bool set) return ret; } -int reuseport_get_id(struct sock_reuseport *reuse); - #endif /* _SOCK_REUSEPORT_H */ diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 50c083ba978c..01badd3eda7a 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -305,11 +305,6 @@ int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, if (err) goto put_file_unlock; - /* Ensure reuse->reuseport_id is set */ - err = reuseport_get_id(reuse); - if (err < 0) - goto put_file_unlock; - WRITE_ONCE(nsk->sk_user_data, &array->ptrs[index]); rcu_assign_pointer(array->ptrs[index], nsk); free_osk = osk; diff --git a/net/core/filter.c b/net/core/filter.c index 77d2f471b3bb..925b23de218b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8641,18 +8641,8 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, } if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) { - struct sock *sk; - - if (unlikely(!reuse_kern->reuseport_id)) - /* There is a small race between adding the - * sk to the map and setting the - * reuse_kern->reuseport_id. - * Treat it as the sk has not been added to - * the bpf map yet. - */ - return -ENOENT; + struct sock *sk = reuse_kern->sk; - sk = reuse_kern->sk; if (sk->sk_protocol != selected_sk->sk_protocol) return -EPROTOTYPE; else if (sk->sk_family != selected_sk->sk_family) diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 91e9f2223c39..adcb3aea576d 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -16,27 +16,8 @@ DEFINE_SPINLOCK(reuseport_lock); -#define REUSEPORT_MIN_ID 1 static DEFINE_IDA(reuseport_ida); -int reuseport_get_id(struct sock_reuseport *reuse) -{ - int id; - - if (reuse->reuseport_id) - return reuse->reuseport_id; - - id = ida_simple_get(&reuseport_ida, REUSEPORT_MIN_ID, 0, - /* Called under reuseport_lock */ - GFP_ATOMIC); - if (id < 0) - return id; - - reuse->reuseport_id = id; - - return reuse->reuseport_id; -} - static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks) { unsigned int size = sizeof(struct sock_reuseport) + @@ -55,6 +36,7 @@ static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks) int reuseport_alloc(struct sock *sk, bool bind_inany) { struct sock_reuseport *reuse; + int id, ret = 0; /* bh lock used since this function call may precede hlist lock in * soft irq of receive path or setsockopt from process context @@ -78,10 +60,18 @@ int reuseport_alloc(struct sock *sk, bool bind_inany) reuse = __reuseport_alloc(INIT_SOCKS); if (!reuse) { - spin_unlock_bh(&reuseport_lock); - return -ENOMEM; + ret = -ENOMEM; + goto out; } + id = ida_alloc(&reuseport_ida, GFP_ATOMIC); + if (id < 0) { + kfree(reuse); + ret = id; + goto out; + } + + reuse->reuseport_id = id; reuse->socks[0] = sk; reuse->num_socks = 1; reuse->bind_inany = bind_inany; @@ -90,7 +80,7 @@ int reuseport_alloc(struct sock *sk, bool bind_inany) out: spin_unlock_bh(&reuseport_lock); - return 0; + return ret; } EXPORT_SYMBOL(reuseport_alloc); @@ -134,8 +124,7 @@ static void reuseport_free_rcu(struct rcu_head *head) reuse = container_of(head, struct sock_reuseport, rcu); sk_reuseport_prog_free(rcu_dereference_protected(reuse->prog, 1)); - if (reuse->reuseport_id) - ida_simple_remove(&reuseport_ida, reuse->reuseport_id); + ida_free(&reuseport_ida, reuse->reuseport_id); kfree(reuse); } @@ -199,12 +188,15 @@ void reuseport_detach_sock(struct sock *sk) reuse = rcu_dereference_protected(sk->sk_reuseport_cb, lockdep_is_held(&reuseport_lock)); - /* At least one of the sk in this reuseport group is added to - * a bpf map. Notify the bpf side. The bpf map logic will - * remove the sk if it is indeed added to a bpf map. + /* Notify the bpf side. The sk may be added to a sockarray + * map. If so, sockarray logic will remove it from the map. + * + * Other bpf map types that work with reuseport, like sockmap, + * don't need an explicit callback from here. They override sk + * unhash/close ops to remove the sk from the map before we + * get to this point. */ - if (reuse->reuseport_id) - bpf_sk_reuseport_detach(sk); + bpf_sk_reuseport_detach(sk); rcu_assign_pointer(sk->sk_reuseport_cb, NULL); -- cgit v1.2.3 From c3e042f54107376cfbbd215e11878a2a75e1e228 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Sun, 23 Feb 2020 16:05:43 +0800 Subject: igmp: remove unused macro IGMP_Vx_UNSOLICITED_REPORT_INTERVAL After commit 2690048c01f3 ("net: igmp: Allow user-space configuration of igmp unsolicited report interval"), they are not used now Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 3b9c7a2725a9..47f0502b2101 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -107,8 +107,6 @@ #ifdef CONFIG_IP_MULTICAST /* Parameter names and values are taken from igmp-v2-06 draft */ -#define IGMP_V2_UNSOLICITED_REPORT_INTERVAL (10*HZ) -#define IGMP_V3_UNSOLICITED_REPORT_INTERVAL (1*HZ) #define IGMP_QUERY_INTERVAL (125*HZ) #define IGMP_QUERY_RESPONSE_INTERVAL (10*HZ) -- cgit v1.2.3 From 8079e4fee5639cc01374cc2d2ea3f952598896b4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Feb 2020 10:19:11 +0100 Subject: Revert "mac80211: support NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS" This reverts commit 9b125c27998719288e4dcf2faf54511039526692. As Jouni points out, there's really no need for this, since the RSN pre-authentication frames are normal data frames, not port control frames (locally). Fixes: 9b125c279987 ("mac80211: support NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200224101910.b87da63a3cd6.Ic94bc51a370c4aa7d19fbca9b96d90ab703257dc@changeid Signed-off-by: Johannes Berg --- net/mac80211/main.c | 2 -- net/mac80211/tx.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index cae3a34d3503..944e86da5c65 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -589,8 +589,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_FILS_STA); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211); - wiphy_ext_feature_set(wiphy, - NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS); if (!ops->hw_scan) { wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8dd93072f6e6..2645a39cce88 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5315,7 +5315,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, ehdr = skb_push(skb, sizeof(struct ethhdr)); memcpy(ehdr->h_dest, dest, ETH_ALEN); - memcpy(ehdr->h_source, src, ETH_ALEN); + memcpy(ehdr->h_source, sdata->vif.addr, ETH_ALEN); ehdr->h_proto = proto; skb->dev = dev; -- cgit v1.2.3 From 8d74a623cc3cecda89da628b8f3d115d8cf1ee8f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Feb 2020 10:19:12 +0100 Subject: Revert "nl80211: add src and dst addr attributes for control port tx/rx" This reverts commit 8c3ed7aa2b9ef666195b789e9b02e28383243fa8. As Jouni points out, there's really no need for this, since the RSN pre-authentication frames are normal data frames, not port control frames (locally). We can still revert this now since it hasn't actually gone beyond -next. Fixes: 8c3ed7aa2b9e ("nl80211: add src and dst addr attributes for control port tx/rx") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200224101910.b746e263287a.I9eb15d6895515179d50964dec3550c9dc784bb93@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +-- include/uapi/linux/nl80211.h | 16 +--------------- net/mac80211/ieee80211_i.h | 3 +-- net/mac80211/tx.c | 3 +-- net/wireless/nl80211.c | 18 +++--------------- net/wireless/rdev-ops.h | 8 ++++---- net/wireless/trace.h | 27 ++++++++++----------------- 7 files changed, 21 insertions(+), 57 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7ea23caa7301..089ffdd6dba5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3974,8 +3974,7 @@ struct cfg80211_ops { int (*tx_control_port)(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, const u8 *src, - const __be16 proto, + const u8 *dest, const __be16 proto, const bool noencrypt); int (*get_ftm_responder_stats)(struct wiphy *wiphy, diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 158bccb4a47b..350ab86fe20e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1039,14 +1039,11 @@ * a control port frame and as a notification that a control port frame * has been received. %NL80211_ATTR_FRAME is used to specify the * frame contents. The frame is the raw EAPoL data, without ethernet or - * 802.11 headers. An optional %NL80211_ATTR_SRC_MAC can be used to send - * pre-auth frames to STAs on behalf of other APs. + * 802.11 headers. * When used as an event indication %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT and %NL80211_ATTR_MAC are added * indicating the protocol type of the received frame; whether the frame * was received unencrypted and the MAC address of the peer respectively. - * %NL80211_ATTR_DST_MAC can be used to forward pre-auth frames in - * userspace while using AP mode. * * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded. * @@ -2412,9 +2409,6 @@ enum nl80211_commands { * %NL80211_ATTR_AKM_SUITES are default capabilities if AKM suites not * advertised for a specific interface type. * - * @NL80211_ATTR_SRC_MAC: MAC address used in control port over nl80211 transmit - * @NL80211_ATTR_DST_MAC: MAC address used in control port over nl80211 receive - * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2883,9 +2877,6 @@ enum nl80211_attrs { NL80211_ATTR_IFTYPE_AKM_SUITES, - NL80211_ATTR_SRC_MAC, - NL80211_ATTR_DST_MAC, - /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5548,10 +5539,6 @@ enum nl80211_feature_flags { * feature, which prevents bufferbloat by using the expected transmission * time to limit the amount of data buffered in the hardware. * - * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS: The driver - * can use src and dst MAC addresses with control port over nl80211 rx - * and tx operations. - * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5599,7 +5586,6 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SAE_OFFLOAD, NL80211_EXT_FEATURE_VLAN_OFFLOAD, NL80211_EXT_FEATURE_AQL, - NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index da9eaa9ee37e..8a49d78ad7c9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1792,8 +1792,7 @@ void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata); void ieee80211_clear_fast_xmit(struct sta_info *sta); int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, const u8 *src, __be16 proto, - bool unencrypted); + const u8 *dest, __be16 proto, bool unencrypted); int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2645a39cce88..cddaacaa31a3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5283,8 +5283,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, const u8 *src, __be16 proto, - bool unencrypted) + const u8 *dest, __be16 proto, bool unencrypted) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f0112dabe21e..d795552f97b2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -634,8 +634,6 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy), [NL80211_ATTR_VLAN_ID] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), [NL80211_ATTR_HE_BSS_COLOR] = NLA_POLICY_NESTED(he_bss_color_policy), - [NL80211_ATTR_SRC_MAC] = NLA_POLICY_ETH_ADDR, - [NL80211_ATTR_DST_MAC] = NLA_POLICY_ETH_ADDR, }; /* policy for the key attributes */ @@ -13698,7 +13696,6 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) const u8 *buf; size_t len; u8 *dest; - u8 src[ETH_ALEN]; u16 proto; bool noencrypt; int err; @@ -13736,13 +13733,6 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) goto out; } - /* copy src address under wdev_lock, as we may copy wdev_address */ - if (info->attrs[NL80211_ATTR_SRC_MAC]) - ether_addr_copy(src, - nla_data(info->attrs[NL80211_ATTR_SRC_MAC])); - else - ether_addr_copy(src, wdev_address(wdev)); - wdev_unlock(wdev); buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); @@ -13753,7 +13743,7 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]); return rdev_tx_control_port(rdev, dev, buf, len, - dest, src, cpu_to_be16(proto), noencrypt); + dest, cpu_to_be16(proto), noencrypt); out: wdev_unlock(wdev); @@ -16010,8 +16000,7 @@ static int __nl80211_rx_control_port(struct net_device *dev, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ethhdr *ehdr = eth_hdr(skb); - const u8 *daddr = ehdr->h_dest; - const u8 *saddr = ehdr->h_source; + const u8 *addr = ehdr->h_source; u16 proto = be16_to_cpu(skb->protocol); struct sk_buff *msg; void *hdr; @@ -16036,8 +16025,7 @@ static int __nl80211_rx_control_port(struct net_device *dev, nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, saddr) || - nla_put(msg, NL80211_ATTR_DST_MAC, ETH_ALEN, daddr) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) || (unencrypted && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT))) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 5ea34c1b60fe..e0d34f796d0b 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -734,14 +734,14 @@ static inline int rdev_mgmt_tx(struct cfg80211_registered_device *rdev, static inline int rdev_tx_control_port(struct cfg80211_registered_device *rdev, struct net_device *dev, const void *buf, size_t len, - const u8 *dest, const u8 *src, - __be16 proto, const bool noencrypt) + const u8 *dest, __be16 proto, + const bool noencrypt) { int ret; trace_rdev_tx_control_port(&rdev->wiphy, dev, buf, len, - dest, src, proto, noencrypt); + dest, proto, noencrypt); ret = rdev->ops->tx_control_port(&rdev->wiphy, dev, buf, len, - dest, src, proto, noencrypt); + dest, proto, noencrypt); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index b6b60e3aea41..3ef1679b0e66 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1928,31 +1928,27 @@ TRACE_EVENT(rdev_mgmt_tx, TRACE_EVENT(rdev_tx_control_port, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - const u8 *buf, size_t len, - const u8 *dest, const u8 *src, __be16 proto, + const u8 *buf, size_t len, const u8 *dest, __be16 proto, bool unencrypted), - TP_ARGS(wiphy, netdev, buf, len, dest, src, proto, unencrypted), + TP_ARGS(wiphy, netdev, buf, len, dest, proto, unencrypted), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(dest) - MAC_ENTRY(src) - __field(u16, proto) + __field(__be16, proto) __field(bool, unencrypted) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(dest, dest); - MAC_ASSIGN(src, src); - __entry->proto = be16_to_cpu(proto); + __entry->proto = proto; __entry->unencrypted = unencrypted; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", dest: " MAC_PR_FMT - ", src: " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", - WIPHY_PR_ARG, NETDEV_PR_ARG, - MAC_PR_ARG(dest), MAC_PR_ARG(src), - __entry->proto, + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT "," + " proto: 0x%x, unencrypted: %s", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest), + be16_to_cpu(__entry->proto), BOOL_TO_STR(__entry->unencrypted)) ); @@ -2844,7 +2840,6 @@ TRACE_EVENT(cfg80211_rx_control_port, TP_STRUCT__entry( NETDEV_ENTRY __field(int, len) - MAC_ENTRY(to) MAC_ENTRY(from) __field(u16, proto) __field(bool, unencrypted) @@ -2852,14 +2847,12 @@ TRACE_EVENT(cfg80211_rx_control_port, TP_fast_assign( NETDEV_ASSIGN; __entry->len = skb->len; - MAC_ASSIGN(to, eth_hdr(skb)->h_dest); MAC_ASSIGN(from, eth_hdr(skb)->h_source); __entry->proto = be16_to_cpu(skb->protocol); __entry->unencrypted = unencrypted; ), - TP_printk(NETDEV_PR_FMT ", len=%d, dest: " MAC_PR_FMT - ", src: " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", - NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(to), MAC_PR_ARG(from), + TP_printk(NETDEV_PR_FMT ", len=%d, " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", + NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(from), __entry->proto, BOOL_TO_STR(__entry->unencrypted)) ); -- cgit v1.2.3 From f8af764bf1cb86bed615baae84e3bbb6e4d3912d Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 22 Feb 2020 15:25:42 +0200 Subject: cfg80211: More error messages for key addition failures These were helpful while working with extensions to NL80211_CMD_NEW_KEY, so add more explicit error reporting for additional cases that can fail while that command is being processed. Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20200222132548.20835-1-jouni@codeaurora.org Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d795552f97b2..ce55a2c05fe9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3978,8 +3978,10 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (!key.p.key) + if (!key.p.key) { + GENL_SET_ERR_MSG(info, "no key"); return -EINVAL; + } if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -3993,8 +3995,10 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) /* for now */ if (key.type != NL80211_KEYTYPE_PAIRWISE && - key.type != NL80211_KEYTYPE_GROUP) + key.type != NL80211_KEYTYPE_GROUP) { + GENL_SET_ERR_MSG(info, "key type not pairwise or group"); return -EINVAL; + } if (key.type == NL80211_KEYTYPE_GROUP && info->attrs[NL80211_ATTR_VLAN_ID]) @@ -4005,15 +4009,22 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) if (cfg80211_validate_key_settings(rdev, &key.p, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, - mac_addr)) + mac_addr)) { + GENL_SET_ERR_MSG(info, "key setting validation failed"); return -EINVAL; + } wdev_lock(dev->ieee80211_ptr); err = nl80211_key_allowed(dev->ieee80211_ptr); - if (!err) + if (err) + GENL_SET_ERR_MSG(info, "key not allowed"); + if (!err) { err = rdev_add_key(rdev, dev, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr, &key.p); + if (err) + GENL_SET_ERR_MSG(info, "key addition failed"); + } wdev_unlock(dev->ieee80211_ptr); return err; -- cgit v1.2.3 From 56be393fa8b40db2d4f54f97614f645eb8d3c32e Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 22 Feb 2020 15:25:43 +0200 Subject: cfg80211: Support key configuration for Beacon protection (BIGTK) IEEE P802.11-REVmd/D3.0 adds support for protecting Beacon frames using a new set of keys (BIGTK; key index 6..7) similarly to the way group-addressed Robust Management frames are protected (IGTK; key index 4..5). Extend cfg80211 and nl80211 to allow the new BIGTK to be configured. Add an extended feature flag to indicate driver support for the new key index values to avoid array overflows in driver implementations and also to indicate to user space when this functionality is available. Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20200222132548.20835-2-jouni@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 ++++ include/uapi/linux/nl80211.h | 6 +++++ net/wireless/nl80211.c | 56 +++++++++++++++++++++++++++++++++++--------- net/wireless/rdev-ops.h | 13 ++++++++++ net/wireless/sme.c | 11 +++++++-- net/wireless/trace.h | 17 ++++++++++++++ net/wireless/util.c | 7 +++++- 7 files changed, 101 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 682ee4dd6963..04b6df15706c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3369,6 +3369,8 @@ struct cfg80211_update_owe_info { * @set_default_key: set the default key on an interface * * @set_default_mgmt_key: set the default management frame key on an interface + + * @set_default_beacon_key: set the default Beacon frame key on an interface * * @set_rekey_data: give the data necessary for GTK rekeying to the driver * @@ -3702,6 +3704,9 @@ struct cfg80211_ops { int (*set_default_mgmt_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index); + int (*set_default_beacon_key)(struct wiphy *wiphy, + struct net_device *netdev, + u8 key_index); int (*start_ap)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *settings); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 350ab86fe20e..934e62fe8705 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4550,6 +4550,7 @@ enum nl80211_key_default_types { * See &enum nl80211_key_default_types. * @NL80211_KEY_MODE: the mode from enum nl80211_key_mode. * Defaults to @NL80211_KEY_RX_TX. + * @NL80211_KEY_DEFAULT_BEACON: flag indicating default Beacon frame key * * @__NL80211_KEY_AFTER_LAST: internal * @NL80211_KEY_MAX: highest key attribute @@ -4565,6 +4566,7 @@ enum nl80211_key_attributes { NL80211_KEY_TYPE, NL80211_KEY_DEFAULT_TYPES, NL80211_KEY_MODE, + NL80211_KEY_DEFAULT_BEACON, /* keep last */ __NL80211_KEY_AFTER_LAST, @@ -5539,6 +5541,9 @@ enum nl80211_feature_flags { * feature, which prevents bufferbloat by using the expected transmission * time to limit the amount of data buffered in the hardware. * + * @NL80211_EXT_FEATURE_BEACON_PROTECTION: The driver supports Beacon protection + * and can receive key configuration for BIGTK using key indexes 6 and 7. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5586,6 +5591,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SAE_OFFLOAD, NL80211_EXT_FEATURE_VLAN_OFFLOAD, NL80211_EXT_FEATURE_AQL, + NL80211_EXT_FEATURE_BEACON_PROTECTION, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ce55a2c05fe9..a75f72288139 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -368,7 +368,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_KEY] = { .type = NLA_NESTED, }, [NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, - [NL80211_ATTR_KEY_IDX] = NLA_POLICY_MAX(NLA_U8, 5), + [NL80211_ATTR_KEY_IDX] = NLA_POLICY_MAX(NLA_U8, 7), [NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 }, [NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG }, [NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 16 }, @@ -1037,7 +1037,7 @@ struct key_parse { struct key_params p; int idx; int type; - bool def, defmgmt; + bool def, defmgmt, defbeacon; bool def_uni, def_multi; }; @@ -1053,12 +1053,13 @@ static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key, k->def = !!tb[NL80211_KEY_DEFAULT]; k->defmgmt = !!tb[NL80211_KEY_DEFAULT_MGMT]; + k->defbeacon = !!tb[NL80211_KEY_DEFAULT_BEACON]; if (k->def) { k->def_uni = true; k->def_multi = true; } - if (k->defmgmt) + if (k->defmgmt || k->defbeacon) k->def_multi = true; if (tb[NL80211_KEY_IDX]) @@ -1165,14 +1166,17 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) if (err) return err; - if (k->def && k->defmgmt) { - GENL_SET_ERR_MSG(info, "key with def && defmgmt is invalid"); + if ((k->def ? 1 : 0) + (k->defmgmt ? 1 : 0) + + (k->defbeacon ? 1 : 0) > 1) { + GENL_SET_ERR_MSG(info, + "key with multiple default flags is invalid"); return -EINVAL; } - if (k->defmgmt) { + if (k->defmgmt || k->defbeacon) { if (k->def_uni || !k->def_multi) { - GENL_SET_ERR_MSG(info, "defmgmt key must be mcast"); + GENL_SET_ERR_MSG(info, + "defmgmt/defbeacon key must be mcast"); return -EINVAL; } } @@ -1184,14 +1188,20 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) "defmgmt key idx not 4 or 5"); return -EINVAL; } + } else if (k->defbeacon) { + if (k->idx < 6 || k->idx > 7) { + GENL_SET_ERR_MSG(info, + "defbeacon key idx not 6 or 7"); + return -EINVAL; + } } else if (k->def) { if (k->idx < 0 || k->idx > 3) { GENL_SET_ERR_MSG(info, "def key idx not 0-3"); return -EINVAL; } } else { - if (k->idx < 0 || k->idx > 5) { - GENL_SET_ERR_MSG(info, "key idx not 0-5"); + if (k->idx < 0 || k->idx > 7) { + GENL_SET_ERR_MSG(info, "key idx not 0-7"); return -EINVAL; } } @@ -3817,8 +3827,14 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) void *hdr; struct sk_buff *msg; - if (info->attrs[NL80211_ATTR_KEY_IDX]) + if (info->attrs[NL80211_ATTR_KEY_IDX]) { key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); + if (key_idx > 5 && + !wiphy_ext_feature_isset( + &rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION)) + return -EINVAL; + } if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -3894,7 +3910,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) /* Only support setting default key and * Extended Key ID action NL80211_KEY_SET_TX. */ - if (!key.def && !key.defmgmt && + if (!key.def && !key.defmgmt && !key.defbeacon && !(key.p.mode == NL80211_KEY_SET_TX)) return -EINVAL; @@ -3941,6 +3957,24 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_CFG80211_WEXT dev->ieee80211_ptr->wext.default_mgmt_key = key.idx; #endif + } else if (key.defbeacon) { + if (key.def_uni || !key.def_multi) { + err = -EINVAL; + goto out; + } + + if (!rdev->ops->set_default_beacon_key) { + err = -EOPNOTSUPP; + goto out; + } + + err = nl80211_key_allowed(dev->ieee80211_ptr); + if (err) + goto out; + + err = rdev_set_default_beacon_key(rdev, dev, key.idx); + if (err) + goto out; } else if (key.p.mode == NL80211_KEY_SET_TX && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID)) { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index e0d34f796d0b..af7fcf2a3b4a 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -136,6 +136,19 @@ rdev_set_default_mgmt_key(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_default_beacon_key(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u8 key_index) +{ + int ret; + + trace_rdev_set_default_beacon_key(&rdev->wiphy, netdev, key_index); + ret = rdev->ops->set_default_beacon_key(&rdev->wiphy, netdev, + key_index); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_start_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_ap_settings *settings) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index d32a2ec4d96a..ac3e60aa1fc8 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1111,9 +1111,16 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, * Delete all the keys ... pairwise keys can't really * exist any more anyway, but default keys might. */ - if (rdev->ops->del_key) - for (i = 0; i < 6; i++) + if (rdev->ops->del_key) { + int max_key_idx = 5; + + if (wiphy_ext_feature_isset( + wdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION)) + max_key_idx = 7; + for (i = 0; i <= max_key_idx; i++) rdev_del_key(rdev, dev, i, false, NULL); + } rdev_set_qos_map(rdev, dev, NULL); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 3ef1679b0e66..56b78222746c 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -510,6 +510,23 @@ TRACE_EVENT(rdev_set_default_mgmt_key, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index) ); +TRACE_EVENT(rdev_set_default_beacon_key, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index), + TP_ARGS(wiphy, netdev, key_index), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u8, key_index) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->key_index = key_index; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key index: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index) +); + TRACE_EVENT(rdev_start_ap, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_ap_settings *settings), diff --git a/net/wireless/util.c b/net/wireless/util.c index 8481e9ac33da..72926f87c913 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -231,7 +231,12 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr) { - if (key_idx < 0 || key_idx > 5) + int max_key_idx = 5; + + if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION)) + max_key_idx = 7; + if (key_idx < 0 || key_idx > max_key_idx) return -EINVAL; if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) -- cgit v1.2.3 From e5473e80d46767ebc64dac4958f30299a3b14b1b Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 22 Feb 2020 15:25:44 +0200 Subject: mac80211: Support BIGTK configuration for Beacon protection Extend mac80211 key configuration to support the new BIGTK with key index values 6 and 7. Support for actually protecting Beacon frames (adding the MME in AP mode and checking it in STA mode) is covered in separate commits. Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20200222132548.20835-3-jouni@codeaurora.org Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 15 ++++++++++++++- net/mac80211/debugfs_key.c | 31 +++++++++++++++++++++++++++++++ net/mac80211/debugfs_key.h | 10 ++++++++++ net/mac80211/ieee80211_i.h | 6 +++++- net/mac80211/key.c | 40 ++++++++++++++++++++++++++++++++++++++-- net/mac80211/key.h | 3 +++ net/mac80211/sta_info.h | 4 +++- 7 files changed, 104 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a66eff1ee26a..a762ba6e4c5d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -568,7 +568,8 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, if (pairwise && key_idx < NUM_DEFAULT_KEYS) key = rcu_dereference(sta->ptk[key_idx]); else if (!pairwise && - key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) + key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + + NUM_DEFAULT_BEACON_KEYS) key = rcu_dereference(sta->gtk[key_idx]); } else key = rcu_dereference(sdata->keys[key_idx]); @@ -680,6 +681,17 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, return 0; } +static int ieee80211_config_default_beacon_key(struct wiphy *wiphy, + struct net_device *dev, + u8 key_idx) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + ieee80211_set_default_beacon_key(sdata, key_idx); + + return 0; +} + void sta_set_rate_info_tx(struct sta_info *sta, const struct ieee80211_tx_rate *rate, struct rate_info *rinfo) @@ -3885,6 +3897,7 @@ const struct cfg80211_ops mac80211_config_ops = { .get_key = ieee80211_get_key, .set_default_key = ieee80211_config_default_key, .set_default_mgmt_key = ieee80211_config_default_mgmt_key, + .set_default_beacon_key = ieee80211_config_default_beacon_key, .start_ap = ieee80211_start_ap, .change_beacon = ieee80211_change_beacon, .stop_ap = ieee80211_stop_ap, diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 7b8735ced2a1..98a713475e0f 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -433,6 +433,37 @@ void ieee80211_debugfs_key_remove_mgmt_default(struct ieee80211_sub_if_data *sda sdata->debugfs.default_mgmt_key = NULL; } +void +ieee80211_debugfs_key_add_beacon_default(struct ieee80211_sub_if_data *sdata) +{ + char buf[50]; + struct ieee80211_key *key; + + if (!sdata->vif.debugfs_dir) + return; + + key = key_mtx_dereference(sdata->local, + sdata->default_beacon_key); + if (key) { + sprintf(buf, "../keys/%d", key->debugfs.cnt); + sdata->debugfs.default_beacon_key = + debugfs_create_symlink("default_beacon_key", + sdata->vif.debugfs_dir, buf); + } else { + ieee80211_debugfs_key_remove_beacon_default(sdata); + } +} + +void +ieee80211_debugfs_key_remove_beacon_default(struct ieee80211_sub_if_data *sdata) +{ + if (!sdata) + return; + + debugfs_remove(sdata->debugfs.default_beacon_key); + sdata->debugfs.default_beacon_key = NULL; +} + void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, struct sta_info *sta) { diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h index 1cd7b8bff56c..af7cf495f8d1 100644 --- a/net/mac80211/debugfs_key.h +++ b/net/mac80211/debugfs_key.h @@ -10,6 +10,10 @@ void ieee80211_debugfs_key_add_mgmt_default( struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_key_remove_mgmt_default( struct ieee80211_sub_if_data *sdata); +void ieee80211_debugfs_key_add_beacon_default( + struct ieee80211_sub_if_data *sdata); +void ieee80211_debugfs_key_remove_beacon_default( + struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, struct sta_info *sta); #else @@ -26,6 +30,12 @@ static inline void ieee80211_debugfs_key_add_mgmt_default( static inline void ieee80211_debugfs_key_remove_mgmt_default( struct ieee80211_sub_if_data *sdata) {} +static inline void ieee80211_debugfs_key_add_beacon_default( + struct ieee80211_sub_if_data *sdata) +{} +static inline void ieee80211_debugfs_key_remove_beacon_default( + struct ieee80211_sub_if_data *sdata) +{} static inline void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, struct sta_info *sta) {} diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8a49d78ad7c9..de39f9ca9935 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -901,10 +901,13 @@ struct ieee80211_sub_if_data { /* bit field of ACM bits (BIT(802.1D tag)) */ u8 wmm_acm; - struct ieee80211_key __rcu *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; + struct ieee80211_key __rcu *keys[NUM_DEFAULT_KEYS + + NUM_DEFAULT_MGMT_KEYS + + NUM_DEFAULT_BEACON_KEYS]; struct ieee80211_key __rcu *default_unicast_key; struct ieee80211_key __rcu *default_multicast_key; struct ieee80211_key __rcu *default_mgmt_key; + struct ieee80211_key __rcu *default_beacon_key; u16 sequence_number; __be16 control_port_protocol; @@ -978,6 +981,7 @@ struct ieee80211_sub_if_data { struct dentry *default_unicast_key; struct dentry *default_multicast_key; struct dentry *default_mgmt_key; + struct dentry *default_beacon_key; } debugfs; #endif diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 54934eff4ac1..6354491c5a09 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -407,6 +407,31 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, mutex_unlock(&sdata->local->key_mtx); } +static void +__ieee80211_set_default_beacon_key(struct ieee80211_sub_if_data *sdata, int idx) +{ + struct ieee80211_key *key = NULL; + + assert_key_lock(sdata->local); + + if (idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS && + idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + + NUM_DEFAULT_BEACON_KEYS) + key = key_mtx_dereference(sdata->local, sdata->keys[idx]); + + rcu_assign_pointer(sdata->default_beacon_key, key); + + ieee80211_debugfs_key_update_default(sdata); +} + +void ieee80211_set_default_beacon_key(struct ieee80211_sub_if_data *sdata, + int idx) +{ + mutex_lock(&sdata->local->key_mtx); + __ieee80211_set_default_beacon_key(sdata, idx); + mutex_unlock(&sdata->local->key_mtx); +} + static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, bool pairwise, @@ -415,7 +440,7 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, { int idx; int ret = 0; - bool defunikey, defmultikey, defmgmtkey; + bool defunikey, defmultikey, defmgmtkey, defbeaconkey; /* caller must provide at least one old/new */ if (WARN_ON(!new && !old)) @@ -480,6 +505,9 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, defmgmtkey = old && old == key_mtx_dereference(sdata->local, sdata->default_mgmt_key); + defbeaconkey = old && + old == key_mtx_dereference(sdata->local, + sdata->default_beacon_key); if (defunikey && !new) __ieee80211_set_default_key(sdata, -1, true, false); @@ -487,6 +515,8 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, __ieee80211_set_default_key(sdata, -1, false, true); if (defmgmtkey && !new) __ieee80211_set_default_mgmt_key(sdata, -1); + if (defbeaconkey && !new) + __ieee80211_set_default_beacon_key(sdata, -1); rcu_assign_pointer(sdata->keys[idx], new); if (defunikey && new) @@ -498,6 +528,9 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, if (defmgmtkey && new) __ieee80211_set_default_mgmt_key(sdata, new->conf.keyidx); + if (defbeaconkey && new) + __ieee80211_set_default_beacon_key(sdata, + new->conf.keyidx); } if (old) @@ -515,7 +548,9 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, struct ieee80211_key *key; int i, j, err; - if (WARN_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)) + if (WARN_ON(idx < 0 || + idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + + NUM_DEFAULT_BEACON_KEYS)) return ERR_PTR(-EINVAL); key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); @@ -978,6 +1013,7 @@ static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata, sdata->crypto_tx_tailroom_pending_dec = 0; ieee80211_debugfs_key_remove_mgmt_default(sdata); + ieee80211_debugfs_key_remove_beacon_default(sdata); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) { ieee80211_key_replace(key->sdata, key->sta, diff --git a/net/mac80211/key.h b/net/mac80211/key.h index d6d6e89cf7dd..7ad72e9b4991 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -17,6 +17,7 @@ #define NUM_DEFAULT_KEYS 4 #define NUM_DEFAULT_MGMT_KEYS 2 +#define NUM_DEFAULT_BEACON_KEYS 2 #define INVALID_PTK_KEYIDX 2 /* Keyidx always pointing to a NULL key for PTK */ struct ieee80211_local; @@ -153,6 +154,8 @@ void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, bool uni, bool multi); void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx); +void ieee80211_set_default_beacon_key(struct ieee80211_sub_if_data *sdata, + int idx); void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, bool force_synchronize); void ieee80211_free_sta_keys(struct ieee80211_local *local, diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index c00e28585f9d..364a35414d05 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -533,7 +533,9 @@ struct sta_info { u8 addr[ETH_ALEN]; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; - struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; + struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + + NUM_DEFAULT_MGMT_KEYS + + NUM_DEFAULT_BEACON_KEYS]; struct ieee80211_key __rcu *ptk[NUM_DEFAULT_KEYS]; u8 ptk_idx; struct rate_control_ref *rate_ctrl; -- cgit v1.2.3 From 2d5d4b0a6da1271a7dfa9a7052870361e72ba424 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 22 Feb 2020 15:25:45 +0200 Subject: mac80211: Update BIP to support Beacon frames When BIP is used to protect Beacon frames, the Timestamp field is masked to zero. Otherwise, the BIP processing is identical to the way it was already used with group-addressed Robust Management frames. Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20200222132548.20835-4-jouni@codeaurora.org Signed-off-by: Johannes Berg --- net/mac80211/aes_cmac.c | 21 +++++++++++++++++++-- net/mac80211/aes_gmac.c | 24 ++++++++++++++++++------ 2 files changed, 37 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index 57748cab0e28..b31f1021ad9c 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -26,12 +26,20 @@ void ieee80211_aes_cmac(struct crypto_shash *tfm, const u8 *aad, { SHASH_DESC_ON_STACK(desc, tfm); u8 out[AES_BLOCK_SIZE]; + const __le16 *fc; desc->tfm = tfm; crypto_shash_init(desc); crypto_shash_update(desc, aad, AAD_LEN); - crypto_shash_update(desc, data, data_len - CMAC_TLEN); + fc = (const __le16 *)aad; + if (ieee80211_is_beacon(*fc)) { + /* mask Timestamp field to zero */ + crypto_shash_update(desc, zero, 8); + crypto_shash_update(desc, data + 8, data_len - 8 - CMAC_TLEN); + } else { + crypto_shash_update(desc, data, data_len - CMAC_TLEN); + } crypto_shash_finup(desc, zero, CMAC_TLEN, out); memcpy(mic, out, CMAC_TLEN); @@ -41,12 +49,21 @@ void ieee80211_aes_cmac_256(struct crypto_shash *tfm, const u8 *aad, const u8 *data, size_t data_len, u8 *mic) { SHASH_DESC_ON_STACK(desc, tfm); + const __le16 *fc; desc->tfm = tfm; crypto_shash_init(desc); crypto_shash_update(desc, aad, AAD_LEN); - crypto_shash_update(desc, data, data_len - CMAC_TLEN_256); + fc = (const __le16 *)aad; + if (ieee80211_is_beacon(*fc)) { + /* mask Timestamp field to zero */ + crypto_shash_update(desc, zero, 8); + crypto_shash_update(desc, data + 8, + data_len - 8 - CMAC_TLEN_256); + } else { + crypto_shash_update(desc, data, data_len - CMAC_TLEN_256); + } crypto_shash_finup(desc, zero, CMAC_TLEN_256, mic); } diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c index 363ad1c1dc0c..16ba09cb5def 100644 --- a/net/mac80211/aes_gmac.c +++ b/net/mac80211/aes_gmac.c @@ -17,10 +17,11 @@ int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, const u8 *data, size_t data_len, u8 *mic) { - struct scatterlist sg[4]; + struct scatterlist sg[5]; u8 *zero, *__aad, iv[AES_BLOCK_SIZE]; struct aead_request *aead_req; int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + const __le16 *fc; if (data_len < GMAC_MIC_LEN) return -EINVAL; @@ -33,11 +34,22 @@ int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, __aad = zero + GMAC_MIC_LEN; memcpy(__aad, aad, GMAC_AAD_LEN); - sg_init_table(sg, 4); - sg_set_buf(&sg[0], __aad, GMAC_AAD_LEN); - sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN); - sg_set_buf(&sg[2], zero, GMAC_MIC_LEN); - sg_set_buf(&sg[3], mic, GMAC_MIC_LEN); + fc = (const __le16 *)aad; + if (ieee80211_is_beacon(*fc)) { + /* mask Timestamp field to zero */ + sg_init_table(sg, 5); + sg_set_buf(&sg[0], __aad, GMAC_AAD_LEN); + sg_set_buf(&sg[1], zero, 8); + sg_set_buf(&sg[2], data + 8, data_len - 8 - GMAC_MIC_LEN); + sg_set_buf(&sg[3], zero, GMAC_MIC_LEN); + sg_set_buf(&sg[4], mic, GMAC_MIC_LEN); + } else { + sg_init_table(sg, 4); + sg_set_buf(&sg[0], __aad, GMAC_AAD_LEN); + sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN); + sg_set_buf(&sg[2], zero, GMAC_MIC_LEN); + sg_set_buf(&sg[3], mic, GMAC_MIC_LEN); + } memcpy(iv, nonce, GMAC_NONCE_LEN); memset(iv + GMAC_NONCE_LEN, 0, sizeof(iv) - GMAC_NONCE_LEN); -- cgit v1.2.3 From 0a3a84360b376e474f8cc0b6d03b7fcf2dd5c592 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 22 Feb 2020 15:25:46 +0200 Subject: mac80211: Beacon protection using the new BIGTK (AP) This adds support for mac80211 to add an MME into Beacon frames in AP mode when a BIGTK is configured. Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20200222132548.20835-5-jouni@codeaurora.org Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index cddaacaa31a3..83147385c200 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4664,6 +4664,28 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif) } EXPORT_SYMBOL(ieee80211_csa_is_complete); +static int ieee80211_beacon_protect(struct sk_buff *skb, + struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + ieee80211_tx_result res; + struct ieee80211_tx_data tx; + + memset(&tx, 0, sizeof(tx)); + tx.key = rcu_dereference(sdata->default_beacon_key); + if (!tx.key) + return 0; + tx.local = local; + tx.sdata = sdata; + __skb_queue_head_init(&tx.skbs); + __skb_queue_tail(&tx.skbs, skb); + res = ieee80211_tx_h_encrypt(&tx); + if (WARN_ON_ONCE(res != TX_CONTINUE)) + return -1; + + return 0; +} + static struct sk_buff * __ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -4731,6 +4753,9 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, if (beacon->tail) skb_put_data(skb, beacon->tail, beacon->tail_len); + + if (ieee80211_beacon_protect(skb, local, sdata) < 0) + goto out; } else goto out; } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { -- cgit v1.2.3 From af2d14b01c32d7cba65f73503586e5b621afb139 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 22 Feb 2020 15:25:47 +0200 Subject: mac80211: Beacon protection using the new BIGTK (STA) This adds support for mac80211 to verify that received Beacon frames have a valid MME in station mode when a BIGTK is configured. Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20200222132548.20835-6-jouni@codeaurora.org Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ec3a04a1db20..6bd24123456d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -983,7 +983,8 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) if (skb->len < 24 + sizeof(*mmie) || !is_multicast_ether_addr(hdr->da)) return -1; - if (!ieee80211_is_robust_mgmt_frame(skb)) + if (!ieee80211_is_robust_mgmt_frame(skb) && + !ieee80211_is_beacon(hdr->frame_control)) return -1; /* not a robust management frame */ mmie = (struct ieee80211_mmie *) @@ -1868,6 +1869,41 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) return RX_CONTINUE; } /* ieee80211_rx_h_sta_process */ +static struct ieee80211_key * +ieee80211_rx_get_bigtk(struct ieee80211_rx_data *rx, int idx) +{ + struct ieee80211_key *key = NULL; + struct ieee80211_sub_if_data *sdata = rx->sdata; + int idx2; + + /* Make sure key gets set if either BIGTK key index is set so that + * ieee80211_drop_unencrypted_mgmt() can properly drop both unprotected + * Beacon frames and Beacon frames that claim to use another BIGTK key + * index (i.e., a key that we do not have). + */ + + if (idx < 0) { + idx = NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS; + idx2 = idx + 1; + } else { + if (idx == NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) + idx2 = idx + 1; + else + idx2 = idx - 1; + } + + if (rx->sta) + key = rcu_dereference(rx->sta->gtk[idx]); + if (!key) + key = rcu_dereference(sdata->keys[idx]); + if (!key && rx->sta) + key = rcu_dereference(rx->sta->gtk[idx2]); + if (!key) + key = rcu_dereference(sdata->keys[idx2]); + + return key; +} + static ieee80211_rx_result debug_noinline ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) { @@ -1885,17 +1921,18 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* * Key selection 101 * - * There are four types of keys: + * There are five types of keys: * - GTK (group keys) * - IGTK (group keys for management frames) + * - BIGTK (group keys for Beacon frames) * - PTK (pairwise keys) * - STK (station-to-station pairwise keys) * * When selecting a key, we have to distinguish between multicast * (including broadcast) and unicast frames, the latter can only - * use PTKs and STKs while the former always use GTKs and IGTKs. - * Unless, of course, actual WEP keys ("pre-RSNA") are used, then - * unicast frames can also use key indices like GTKs. Hence, if we + * use PTKs and STKs while the former always use GTKs, IGTKs, and + * BIGTKs. Unless, of course, actual WEP keys ("pre-RSNA") are used, + * then unicast frames can also use key indices like GTKs. Hence, if we * don't have a PTK/STK we check the key index for a WEP key. * * Note that in a regular BSS, multicast frames are sent by the @@ -1939,6 +1976,20 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* Skip decryption if the frame is not protected. */ if (!ieee80211_has_protected(fc)) return RX_CONTINUE; + } else if (mmie_keyidx >= 0 && ieee80211_is_beacon(fc)) { + /* Broadcast/multicast robust management frame / BIP */ + if ((status->flag & RX_FLAG_DECRYPTED) && + (status->flag & RX_FLAG_IV_STRIPPED)) + return RX_CONTINUE; + + if (mmie_keyidx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS || + mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + + NUM_DEFAULT_BEACON_KEYS) + return RX_DROP_MONITOR; /* unexpected BIP keyidx */ + + rx->key = ieee80211_rx_get_bigtk(rx, mmie_keyidx); + if (!rx->key) + return RX_CONTINUE; /* Beacon protection not in use */ } else if (mmie_keyidx >= 0) { /* Broadcast/multicast robust management frame / BIP */ if ((status->flag & RX_FLAG_DECRYPTED) && @@ -1968,11 +2019,12 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_sub_if_data *sdata = rx->sdata; int i; - if (ieee80211_is_mgmt(fc) && - is_multicast_ether_addr(hdr->addr1) && - (key = rcu_dereference(rx->sdata->default_mgmt_key))) - rx->key = key; - else { + if (ieee80211_is_beacon(fc)) { + key = ieee80211_rx_get_bigtk(rx, -1); + } else if (ieee80211_is_mgmt(fc) && + is_multicast_ether_addr(hdr->addr1)) { + key = rcu_dereference(rx->sdata->default_mgmt_key); + } else { if (rx->sta) { for (i = 0; i < NUM_DEFAULT_KEYS; i++) { key = rcu_dereference(rx->sta->gtk[i]); @@ -1987,9 +2039,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) break; } } - if (key) - rx->key = key; } + if (key) + rx->key = key; return RX_CONTINUE; } else { /* @@ -2358,6 +2410,9 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) rx->skb->len); return -EACCES; } + if (unlikely(ieee80211_is_beacon(fc) && rx->key && + ieee80211_get_mmie_keyidx(rx->skb) < 0)) + return -EACCES; /* * When using MFP, Action frames are not allowed prior to * having configured keys. -- cgit v1.2.3 From 77f576deaa393b54a0f2ca8ab1ab5b2d3c6b971b Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Mon, 20 Jan 2020 13:21:22 +0530 Subject: nl80211: Add NL command to support TID speicific configurations Add the new NL80211_CMD_SET_TID_CONFIG command to support data TID specific configuration. Per TID configuration is passed in the nested NL80211_ATTR_TID_CONFIG attribute. This patch adds support to configure per TID noack policy through the NL80211_TID_CONFIG_ATTR_NOACK attribute. Signed-off-by: Tamizh chelvam Link: https://lore.kernel.org/r/1579506687-18296-2-git-send-email-tamizhr@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 40 ++++++++++++ include/uapi/linux/nl80211.h | 71 +++++++++++++++++++++ net/wireless/nl80211.c | 148 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 24 +++++++ net/wireless/trace.h | 37 +++++++++++ 5 files changed, 320 insertions(+) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 04b6df15706c..65a8bf73d21e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -626,6 +626,38 @@ struct cfg80211_chan_def { struct ieee80211_edmg edmg; }; +enum ieee80211_tid_conf_mask { + IEEE80211_TID_CONF_NOACK = BIT(NL80211_TID_CONFIG_ATTR_NOACK), +}; + +/** + * struct ieee80211_tid_cfg - TID specific configuration + * @config_override: Flag to notify driver to reset TID configuration + * of the peer. + * @tid: TID number + * @tid_conf_mask: bitmap indicating which parameter changed + * see &enum ieee80211_tid_conf_mask + * @noack: noack configuration value for the TID + */ +struct ieee80211_tid_cfg { + bool config_override; + u8 tid; + u32 tid_conf_mask; + enum nl80211_tid_config noack; +}; + +/** + * struct ieee80211_tid_config - TID configuration + * @peer: Station's MAC address + * @n_tid_conf: Number of TID specific configurations to be applied + * @tid_conf: Configuration change info + */ +struct ieee80211_tid_config { + const u8 *peer; + u32 n_tid_conf; + struct ieee80211_tid_cfg tid_conf[]; +}; + /** * cfg80211_get_chandef_type - return old channel type from chandef * @chandef: the channel definition @@ -3671,6 +3703,10 @@ struct cfg80211_update_owe_info { * * @probe_mesh_link: Probe direct Mesh peer's link quality by sending data frame * and overrule HWMP path selection algorithm. + * @set_tid_config: TID specific configuration, this can be peer or BSS specific + * This callback may sleep. + * @reset_tid_config: Reset TID specific configuration for the peer. + * This callback may sleep. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -3994,6 +4030,10 @@ struct cfg80211_ops { struct cfg80211_update_owe_info *owe_info); int (*probe_mesh_link)(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len); + int (*set_tid_config)(struct wiphy *wiphy, struct net_device *dev, + struct ieee80211_tid_config *tid_conf); + int (*reset_tid_config)(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 tid); }; /* diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 934e62fe8705..0b12fcffb518 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -264,6 +264,27 @@ * %NL80211_ATTR_VLAN_ID. */ +/** + * DOC: TID configuration + * + * TID configuration support can be advertised by drivers by setting + * @NL80211_EXT_FEATURE_PER_TID_* and/or @NL80211_EXT_FEATURE_PER_STA_* config + * mentioned in &enum nl80211_tid_config_attr. + * Needed configuration parameters are mentioned in + * &enum nl80211_tid_config_attr and it will be passed using + * %NL80211_CMD_SET_TID_CONFIG through %NL80211_ATTR_TID_CONFIG. + * If the configuration needs to be applied for specific peer then MAC address + * of the peer needs to be passed in %NL80211_ATT_MAC, otherwise the + * configuration will be applied for all the connected peers in the vif except + * the peer which has peer specific configuration for the TID. + * And the peer specific configuration will be overridden if + * %NL80211_TID_CONFIG_ATTR_OVERRIDE flag is set. + * All this configurations are valid only for STA's current connection + * i.e. the configurations will be reset to default when the STA connects back + * after disconnection/roaming, and this configuration will be cleared when + * the interface goes down. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -1125,6 +1146,9 @@ * peer MAC address and %NL80211_ATTR_FRAME is used to specify the frame * content. The frame is ethernet data. * + * @NL80211_CMD_SET_TID_CONFIG: Data frame TID specific configuration + * is passed using %NL80211_ATTR_TID_CONFIG attribute. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1349,6 +1373,8 @@ enum nl80211_commands { NL80211_CMD_PROBE_MESH_LINK, + NL80211_CMD_SET_TID_CONFIG, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2409,6 +2435,9 @@ enum nl80211_commands { * %NL80211_ATTR_AKM_SUITES are default capabilities if AKM suites not * advertised for a specific interface type. * + * @NL80211_ATTR_TID_CONFIG: TID specific configuration in a + * nested attribute with &enum nl80211_tid_config_attr sub-attributes. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2877,6 +2906,8 @@ enum nl80211_attrs { NL80211_ATTR_IFTYPE_AKM_SUITES, + NL80211_ATTR_TID_CONFIG, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -4722,6 +4753,40 @@ enum nl80211_tx_power_setting { NL80211_TX_POWER_FIXED, }; +/** + * enum nl80211_tid_config - TID config state + * @NL80211_TID_CONFIG_ENABLE: Enable config for the TID + * @NL80211_TID_CONFIG_DISABLE: Disable config for the TID + */ +enum nl80211_tid_config { + NL80211_TID_CONFIG_ENABLE, + NL80211_TID_CONFIG_DISABLE, +}; + +/* enum nl80211_tid_config_attr - TID specific configuration. + * @NL80211_TID_CONFIG_ATTR_OVERRIDE: flag attribue, if no peer + * is selected, if set indicates that the new configuration overrides + * all previous peer configurations, otherwise previous peer specific + * configurations should be left untouched. If peer is selected then + * it will reset particular TID configuration of that peer and it will + * not accept other TID config attributes along with peer. + * @NL80211_TID_CONFIG_ATTR_TIDS: a bitmask value of TIDs(bit 0 to 7) + * Its type is u8. + * @NL80211_TID_CONFIG_ATTR_NOACK: Configure ack policy for the TID. + * specified in %NL80211_TID_CONFIG_ATTR_TID. see %enum nl80211_tid_config. + * Its type is u8. + */ +enum nl80211_tid_config_attr { + __NL80211_TID_CONFIG_ATTR_INVALID, + NL80211_TID_CONFIG_ATTR_OVERRIDE, + NL80211_TID_CONFIG_ATTR_TIDS, + NL80211_TID_CONFIG_ATTR_NOACK, + + /* keep last */ + __NL80211_TID_CONFIG_ATTR_AFTER_LAST, + NL80211_TID_CONFIG_ATTR_MAX = __NL80211_TID_CONFIG_ATTR_AFTER_LAST - 1 +}; + /** * enum nl80211_packet_pattern_attr - packet pattern attribute * @__NL80211_PKTPAT_INVALID: invalid number for nested attribute @@ -5540,6 +5605,10 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_AQL: The driver supports the Airtime Queue Limit (AQL) * feature, which prevents bufferbloat by using the expected transmission * time to limit the amount of data buffered in the hardware. + * @NL80211_EXT_FEATURE_PER_TID_NOACK_CONFIG: Driver supports per TID NoAck + * policy functionality. + * @NL80211_EXT_FEATURE_PER_STA_NOACK_CONFIG: Driver supports STA specific NoAck + * policy functionality. * * @NL80211_EXT_FEATURE_BEACON_PROTECTION: The driver supports Beacon protection * and can receive key configuration for BIGTK using key indexes 6 and 7. @@ -5592,6 +5661,8 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_VLAN_OFFLOAD, NL80211_EXT_FEATURE_AQL, NL80211_EXT_FEATURE_BEACON_PROTECTION, + NL80211_EXT_FEATURE_PER_TID_NOACK_CONFIG, + NL80211_EXT_FEATURE_PER_STA_NOACK_CONFIG, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a75f72288139..a0839fae6555 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -328,6 +328,14 @@ he_bss_color_policy[NL80211_HE_BSS_COLOR_ATTR_MAX + 1] = { [NL80211_HE_BSS_COLOR_ATTR_PARTIAL] = { .type = NLA_FLAG }, }; +static const struct nla_policy +nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { + [NL80211_TID_CONFIG_ATTR_OVERRIDE] = { .type = NLA_FLAG }, + [NL80211_TID_CONFIG_ATTR_TIDS] = { .type = NLA_U8 }, + [NL80211_TID_CONFIG_ATTR_NOACK] = + NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), +}; + const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, @@ -634,6 +642,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy), [NL80211_ATTR_VLAN_ID] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), [NL80211_ATTR_HE_BSS_COLOR] = NLA_POLICY_NESTED(he_bss_color_policy), + [NL80211_ATTR_TID_CONFIG] = + NLA_POLICY_NESTED_ARRAY(nl80211_tid_config_attr_policy), }; /* policy for the key attributes */ @@ -13934,6 +13944,137 @@ static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info) return rdev_probe_mesh_link(rdev, dev, dest, buf, len); } +static int +__nl80211_check_tid_conf_support(struct cfg80211_registered_device *rdev, + struct netlink_ext_ack *extack, + const u8 *peer, struct nlattr *attrs[], + struct ieee80211_tid_cfg *tid_conf, + enum nl80211_tid_config_attr attr, + enum nl80211_ext_feature_index per_tid_config, + enum nl80211_ext_feature_index per_sta_config) +{ + if (!wiphy_ext_feature_isset(&rdev->wiphy, per_tid_config)) { + NL_SET_ERR_MSG_ATTR(extack, attrs[attr], + "TID specific configuration not supported"); + return -ENOTSUPP; + } + + if (peer && !wiphy_ext_feature_isset(&rdev->wiphy, per_sta_config)) { + NL_SET_ERR_MSG_ATTR(extack, attrs[attr], + "peer specific TID configuration not supported"); + return -ENOTSUPP; + } + + tid_conf->tid_conf_mask |= BIT(attr); + return 0; +} + +#define nl80211_check_tid_config_support(rdev, extack, peer, attrs, tid_conf, \ + conf) \ + __nl80211_check_tid_conf_support(rdev, extack, peer, attrs, tid_conf, \ + NL80211_TID_CONFIG_ATTR_##conf, \ + NL80211_EXT_FEATURE_PER_TID_##conf##_CONFIG, \ + NL80211_EXT_FEATURE_PER_STA_##conf##_CONFIG) + +static int parse_tid_conf(struct cfg80211_registered_device *rdev, + struct nlattr *attrs[], struct net_device *dev, + struct ieee80211_tid_cfg *tid_conf, + struct genl_info *info, const u8 *peer) +{ + struct netlink_ext_ack *extack = info->extack; + int err; + + if (!attrs[NL80211_TID_CONFIG_ATTR_TIDS]) + return -EINVAL; + + tid_conf->config_override = + nla_get_flag(attrs[NL80211_TID_CONFIG_ATTR_OVERRIDE]); + tid_conf->tid = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_TIDS]); + + if (tid_conf->config_override) { + if (rdev->ops->reset_tid_config) { + err = rdev_reset_tid_config(rdev, dev, peer, + tid_conf->tid); + /* If peer is there no other configuration will be + * allowed + */ + if (err || peer) + return err; + } else { + return -EINVAL; + } + } + + if (attrs[NL80211_TID_CONFIG_ATTR_NOACK]) { + err = nl80211_check_tid_config_support(rdev, extack, peer, + attrs, tid_conf, + NOACK); + if (err) + return err; + + tid_conf->noack = + nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_NOACK]); + } + + return 0; +} + +static int nl80211_set_tid_config(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct nlattr *attrs[NL80211_TID_CONFIG_ATTR_MAX + 1]; + struct net_device *dev = info->user_ptr[1]; + struct ieee80211_tid_config *tid_config; + struct nlattr *tid; + int conf_idx = 0, rem_conf; + int ret = -EINVAL; + u32 num_conf = 0; + + if (!info->attrs[NL80211_ATTR_TID_CONFIG]) + return -EINVAL; + + if (!rdev->ops->set_tid_config) + return -EOPNOTSUPP; + + nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG], + rem_conf) + num_conf++; + + tid_config = kzalloc(struct_size(tid_config, tid_conf, num_conf), + GFP_KERNEL); + if (!tid_config) + return -ENOMEM; + + tid_config->n_tid_conf = num_conf; + + if (info->attrs[NL80211_ATTR_MAC]) + tid_config->peer = nla_data(info->attrs[NL80211_ATTR_MAC]); + + nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG], + rem_conf) { + ret = nla_parse_nested(attrs, NL80211_TID_CONFIG_ATTR_MAX, + tid, NULL, NULL); + + if (ret) + goto bad_tid_conf; + + ret = parse_tid_conf(rdev, attrs, dev, + &tid_config->tid_conf[conf_idx], + info, tid_config->peer); + if (ret) + goto bad_tid_conf; + + conf_idx++; + } + + ret = rdev_set_tid_config(rdev, dev, tid_config); + +bad_tid_conf: + kfree(tid_config); + return ret; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -14888,6 +15029,13 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_TID_CONFIG, + .doit = nl80211_set_tid_config, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_family nl80211_fam __ro_after_init = { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index af7fcf2a3b4a..a754e0496b6c 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1326,4 +1326,28 @@ rdev_probe_mesh_link(struct cfg80211_registered_device *rdev, return ret; } +static inline int rdev_set_tid_config(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_tid_config *tid_conf) +{ + int ret; + + trace_rdev_set_tid_config(&rdev->wiphy, dev, tid_conf); + ret = rdev->ops->set_tid_config(&rdev->wiphy, dev, tid_conf); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline int rdev_reset_tid_config(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *peer, + u8 tid) +{ + int ret; + + trace_rdev_reset_tid_config(&rdev->wiphy, dev, peer, tid); + ret = rdev->ops->reset_tid_config(&rdev->wiphy, dev, peer, tid); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 56b78222746c..167b18896cd6 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3480,6 +3480,43 @@ TRACE_EVENT(rdev_probe_mesh_link, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest)) ); +TRACE_EVENT(rdev_set_tid_config, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct ieee80211_tid_config *tid_conf), + TP_ARGS(wiphy, netdev, tid_conf), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(peer) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(peer, tid_conf->peer); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer)) +); + +TRACE_EVENT(rdev_reset_tid_config, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + const u8 *peer, u8 tid), + TP_ARGS(wiphy, netdev, peer, tid), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(peer) + __field(u8, tid) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(peer, peer); + __entry->tid = tid; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", tid: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tid) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 3710a8a6284f58a78ba4fe9c4b6672207636a223 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Feb 2020 11:34:25 +0100 Subject: nl80211: modify TID-config API Make some changes to the TID-config API: * use u16 in nl80211 (only, and restrict to using 8 bits for now), to avoid issues in the future if we ever want to use higher TIDs. * reject empty TIDs mask (via netlink policy) * change feature advertising to not use extended feature flags but have own mechanism for this, which simplifies the code * fix all variable names from 'tid' to 'tids' since it's a mask * change to cfg80211_ name prefixes, not ieee80211_ * fix some minor docs/spelling things. Change-Id: Ia234d464b3f914cdeab82f540e018855be580dce Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 43 +++++++++++-------- include/uapi/linux/nl80211.h | 51 +++++++++++++---------- net/wireless/nl80211.c | 99 +++++++++++++++++++++++++------------------- net/wireless/rdev-ops.h | 8 ++-- net/wireless/trace.h | 14 +++---- 5 files changed, 121 insertions(+), 94 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 65a8bf73d21e..bbe4acef729d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -626,36 +626,32 @@ struct cfg80211_chan_def { struct ieee80211_edmg edmg; }; -enum ieee80211_tid_conf_mask { - IEEE80211_TID_CONF_NOACK = BIT(NL80211_TID_CONFIG_ATTR_NOACK), -}; - /** - * struct ieee80211_tid_cfg - TID specific configuration + * struct cfg80211_tid_cfg - TID specific configuration * @config_override: Flag to notify driver to reset TID configuration * of the peer. - * @tid: TID number - * @tid_conf_mask: bitmap indicating which parameter changed - * see &enum ieee80211_tid_conf_mask + * @tids: bitmap of TIDs to modify + * @mask: bitmap of attributes indicating which parameter changed, + * similar to &nl80211_tid_config_supp. * @noack: noack configuration value for the TID */ -struct ieee80211_tid_cfg { +struct cfg80211_tid_cfg { bool config_override; - u8 tid; - u32 tid_conf_mask; + u8 tids; + u32 mask; enum nl80211_tid_config noack; }; /** - * struct ieee80211_tid_config - TID configuration + * struct cfg80211_tid_config - TID configuration * @peer: Station's MAC address * @n_tid_conf: Number of TID specific configurations to be applied * @tid_conf: Configuration change info */ -struct ieee80211_tid_config { +struct cfg80211_tid_config { const u8 *peer; u32 n_tid_conf; - struct ieee80211_tid_cfg tid_conf[]; + struct cfg80211_tid_cfg tid_conf[]; }; /** @@ -3705,8 +3701,8 @@ struct cfg80211_update_owe_info { * and overrule HWMP path selection algorithm. * @set_tid_config: TID specific configuration, this can be peer or BSS specific * This callback may sleep. - * @reset_tid_config: Reset TID specific configuration for the peer. - * This callback may sleep. + * @reset_tid_config: Reset TID specific configuration for the peer, for the + * given TIDs. This callback may sleep. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4031,9 +4027,9 @@ struct cfg80211_ops { int (*probe_mesh_link)(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len); int (*set_tid_config)(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_tid_config *tid_conf); + struct cfg80211_tid_config *tid_conf); int (*reset_tid_config)(struct wiphy *wiphy, struct net_device *dev, - const u8 *peer, u8 tid); + const u8 *peer, u8 tids); }; /* @@ -4641,6 +4637,13 @@ struct wiphy_iftype_akm_suites { * @support_mbssid must be set for this to have any effect. * * @pmsr_capa: peer measurement capabilities + * + * @tid_config_support: describes the per-TID config support that the + * device has + * @tid_config_support.vif: bitmap of attributes (configurations) + * supported by the driver for each vif + * @tid_config_support.peer: bitmap of attributes (configurations) + * supported by the driver for each peer */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -4772,6 +4775,10 @@ struct wiphy { const struct cfg80211_pmsr_capabilities *pmsr_capa; + struct { + u64 peer, vif; + } tid_config_support; + char priv[0] __aligned(NETDEV_ALIGN); }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0b12fcffb518..591d843eda72 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -267,20 +267,22 @@ /** * DOC: TID configuration * - * TID configuration support can be advertised by drivers by setting - * @NL80211_EXT_FEATURE_PER_TID_* and/or @NL80211_EXT_FEATURE_PER_STA_* config - * mentioned in &enum nl80211_tid_config_attr. - * Needed configuration parameters are mentioned in - * &enum nl80211_tid_config_attr and it will be passed using - * %NL80211_CMD_SET_TID_CONFIG through %NL80211_ATTR_TID_CONFIG. - * If the configuration needs to be applied for specific peer then MAC address - * of the peer needs to be passed in %NL80211_ATT_MAC, otherwise the + * TID config support can be checked in the %NL80211_ATTR_TID_CONFIG + * attribute given in wiphy capabilities. + * + * The necessary configuration parameters are mentioned in + * &enum nl80211_tid_config_attr and it will be passed to the + * %NL80211_CMD_SET_TID_CONFIG command in %NL80211_ATTR_TID_CONFIG. + * + * If the configuration needs to be applied for specific peer then the MAC + * address of the peer needs to be passed in %NL80211_ATTR_MAC, otherwise the * configuration will be applied for all the connected peers in the vif except - * the peer which has peer specific configuration for the TID. - * And the peer specific configuration will be overridden if - * %NL80211_TID_CONFIG_ATTR_OVERRIDE flag is set. - * All this configurations are valid only for STA's current connection - * i.e. the configurations will be reset to default when the STA connects back + * any peers that have peer specific configuration for the TID by default; if + * the %NL80211_TID_CONFIG_ATTR_OVERRIDE flag is set, peer specific values + * will be overwritten. + * + * All this configuration is valid only for STA's current connection + * i.e. the configuration will be reset to default when the STA connects back * after disconnection/roaming, and this configuration will be cleared when * the interface goes down. */ @@ -2436,7 +2438,9 @@ enum nl80211_commands { * advertised for a specific interface type. * * @NL80211_ATTR_TID_CONFIG: TID specific configuration in a - * nested attribute with &enum nl80211_tid_config_attr sub-attributes. + * nested attribute with &enum nl80211_tid_config_attr sub-attributes; + * on output (in wiphy attributes) it contains only the feature sub- + * attributes. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -4764,20 +4768,29 @@ enum nl80211_tid_config { }; /* enum nl80211_tid_config_attr - TID specific configuration. + * @NL80211_TID_CONFIG_ATTR_PAD: pad attribute for 64-bit values + * @NL80211_TID_CONFIG_ATTR_VIF_SUPP: a bitmap (u64) of attributes supported + * for per-vif configuration; doesn't list the ones that are generic + * (%NL80211_TID_CONFIG_ATTR_TIDS, %NL80211_TID_CONFIG_ATTR_OVERRIDE). + * @NL80211_TID_CONFIG_ATTR_PEER_SUPP: same as the previous per-vif one, but + * per peer instead. * @NL80211_TID_CONFIG_ATTR_OVERRIDE: flag attribue, if no peer * is selected, if set indicates that the new configuration overrides * all previous peer configurations, otherwise previous peer specific * configurations should be left untouched. If peer is selected then * it will reset particular TID configuration of that peer and it will * not accept other TID config attributes along with peer. - * @NL80211_TID_CONFIG_ATTR_TIDS: a bitmask value of TIDs(bit 0 to 7) - * Its type is u8. + * @NL80211_TID_CONFIG_ATTR_TIDS: a bitmask value of TIDs (bit 0 to 7) + * Its type is u16. * @NL80211_TID_CONFIG_ATTR_NOACK: Configure ack policy for the TID. * specified in %NL80211_TID_CONFIG_ATTR_TID. see %enum nl80211_tid_config. * Its type is u8. */ enum nl80211_tid_config_attr { __NL80211_TID_CONFIG_ATTR_INVALID, + NL80211_TID_CONFIG_ATTR_PAD, + NL80211_TID_CONFIG_ATTR_VIF_SUPP, + NL80211_TID_CONFIG_ATTR_PEER_SUPP, NL80211_TID_CONFIG_ATTR_OVERRIDE, NL80211_TID_CONFIG_ATTR_TIDS, NL80211_TID_CONFIG_ATTR_NOACK, @@ -5605,10 +5618,6 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_AQL: The driver supports the Airtime Queue Limit (AQL) * feature, which prevents bufferbloat by using the expected transmission * time to limit the amount of data buffered in the hardware. - * @NL80211_EXT_FEATURE_PER_TID_NOACK_CONFIG: Driver supports per TID NoAck - * policy functionality. - * @NL80211_EXT_FEATURE_PER_STA_NOACK_CONFIG: Driver supports STA specific NoAck - * policy functionality. * * @NL80211_EXT_FEATURE_BEACON_PROTECTION: The driver supports Beacon protection * and can receive key configuration for BIGTK using key indexes 6 and 7. @@ -5661,8 +5670,6 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_VLAN_OFFLOAD, NL80211_EXT_FEATURE_AQL, NL80211_EXT_FEATURE_BEACON_PROTECTION, - NL80211_EXT_FEATURE_PER_TID_NOACK_CONFIG, - NL80211_EXT_FEATURE_PER_STA_NOACK_CONFIG, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a0839fae6555..56ac851ccee1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -330,8 +330,10 @@ he_bss_color_policy[NL80211_HE_BSS_COLOR_ATTR_MAX + 1] = { static const struct nla_policy nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { + [NL80211_TID_CONFIG_ATTR_VIF_SUPP] = { .type = NLA_U64 }, + [NL80211_TID_CONFIG_ATTR_PEER_SUPP] = { .type = NLA_U64 }, [NL80211_TID_CONFIG_ATTR_OVERRIDE] = { .type = NLA_FLAG }, - [NL80211_TID_CONFIG_ATTR_TIDS] = { .type = NLA_U8 }, + [NL80211_TID_CONFIG_ATTR_TIDS] = NLA_POLICY_RANGE(NLA_U16, 1, 0xff), [NL80211_TID_CONFIG_ATTR_NOACK] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), }; @@ -1957,6 +1959,40 @@ nl80211_put_iftype_akm_suites(struct cfg80211_registered_device *rdev, return 0; } +static int +nl80211_put_tid_config_support(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) +{ + struct nlattr *supp; + + if (!rdev->wiphy.tid_config_support.vif && + !rdev->wiphy.tid_config_support.peer) + return 0; + + supp = nla_nest_start(msg, NL80211_ATTR_TID_CONFIG); + if (!supp) + return -ENOSPC; + + if (rdev->wiphy.tid_config_support.vif && + nla_put_u64_64bit(msg, NL80211_TID_CONFIG_ATTR_VIF_SUPP, + rdev->wiphy.tid_config_support.vif, + NL80211_TID_CONFIG_ATTR_PAD)) + goto fail; + + if (rdev->wiphy.tid_config_support.peer && + nla_put_u64_64bit(msg, NL80211_TID_CONFIG_ATTR_PEER_SUPP, + rdev->wiphy.tid_config_support.peer, + NL80211_TID_CONFIG_ATTR_PAD)) + goto fail; + + nla_nest_end(msg, supp); + + return 0; +fail: + nla_nest_cancel(msg, supp); + return -ENOBUFS; +} + struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; @@ -2518,6 +2554,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (nl80211_put_iftype_akm_suites(rdev, msg)) goto nla_put_failure; + if (nl80211_put_tid_config_support(rdev, msg)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; @@ -13944,44 +13983,13 @@ static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info) return rdev_probe_mesh_link(rdev, dev, dest, buf, len); } -static int -__nl80211_check_tid_conf_support(struct cfg80211_registered_device *rdev, - struct netlink_ext_ack *extack, - const u8 *peer, struct nlattr *attrs[], - struct ieee80211_tid_cfg *tid_conf, - enum nl80211_tid_config_attr attr, - enum nl80211_ext_feature_index per_tid_config, - enum nl80211_ext_feature_index per_sta_config) -{ - if (!wiphy_ext_feature_isset(&rdev->wiphy, per_tid_config)) { - NL_SET_ERR_MSG_ATTR(extack, attrs[attr], - "TID specific configuration not supported"); - return -ENOTSUPP; - } - - if (peer && !wiphy_ext_feature_isset(&rdev->wiphy, per_sta_config)) { - NL_SET_ERR_MSG_ATTR(extack, attrs[attr], - "peer specific TID configuration not supported"); - return -ENOTSUPP; - } - - tid_conf->tid_conf_mask |= BIT(attr); - return 0; -} - -#define nl80211_check_tid_config_support(rdev, extack, peer, attrs, tid_conf, \ - conf) \ - __nl80211_check_tid_conf_support(rdev, extack, peer, attrs, tid_conf, \ - NL80211_TID_CONFIG_ATTR_##conf, \ - NL80211_EXT_FEATURE_PER_TID_##conf##_CONFIG, \ - NL80211_EXT_FEATURE_PER_STA_##conf##_CONFIG) - static int parse_tid_conf(struct cfg80211_registered_device *rdev, struct nlattr *attrs[], struct net_device *dev, - struct ieee80211_tid_cfg *tid_conf, + struct cfg80211_tid_cfg *tid_conf, struct genl_info *info, const u8 *peer) { struct netlink_ext_ack *extack = info->extack; + u64 mask; int err; if (!attrs[NL80211_TID_CONFIG_ATTR_TIDS]) @@ -13989,12 +13997,12 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, tid_conf->config_override = nla_get_flag(attrs[NL80211_TID_CONFIG_ATTR_OVERRIDE]); - tid_conf->tid = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_TIDS]); + tid_conf->tids = nla_get_u16(attrs[NL80211_TID_CONFIG_ATTR_TIDS]); if (tid_conf->config_override) { if (rdev->ops->reset_tid_config) { err = rdev_reset_tid_config(rdev, dev, peer, - tid_conf->tid); + tid_conf->tids); /* If peer is there no other configuration will be * allowed */ @@ -14006,16 +14014,21 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, } if (attrs[NL80211_TID_CONFIG_ATTR_NOACK]) { - err = nl80211_check_tid_config_support(rdev, extack, peer, - attrs, tid_conf, - NOACK); - if (err) - return err; - + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_NOACK); tid_conf->noack = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_NOACK]); } + if (peer) + mask = rdev->wiphy.tid_config_support.peer; + else + mask = rdev->wiphy.tid_config_support.vif; + + if (tid_conf->mask & ~mask) { + NL_SET_ERR_MSG(extack, "unsupported TID configuration"); + return -ENOTSUPP; + } + return 0; } @@ -14025,7 +14038,7 @@ static int nl80211_set_tid_config(struct sk_buff *skb, struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *attrs[NL80211_TID_CONFIG_ATTR_MAX + 1]; struct net_device *dev = info->user_ptr[1]; - struct ieee80211_tid_config *tid_config; + struct cfg80211_tid_config *tid_config; struct nlattr *tid; int conf_idx = 0, rem_conf; int ret = -EINVAL; diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index a754e0496b6c..99462f0c4e08 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1328,7 +1328,7 @@ rdev_probe_mesh_link(struct cfg80211_registered_device *rdev, static inline int rdev_set_tid_config(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct ieee80211_tid_config *tid_conf) + struct cfg80211_tid_config *tid_conf) { int ret; @@ -1340,12 +1340,12 @@ static inline int rdev_set_tid_config(struct cfg80211_registered_device *rdev, static inline int rdev_reset_tid_config(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *peer, - u8 tid) + u8 tids) { int ret; - trace_rdev_reset_tid_config(&rdev->wiphy, dev, peer, tid); - ret = rdev->ops->reset_tid_config(&rdev->wiphy, dev, peer, tid); + trace_rdev_reset_tid_config(&rdev->wiphy, dev, peer, tids); + ret = rdev->ops->reset_tid_config(&rdev->wiphy, dev, peer, tids); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 167b18896cd6..839df54cee21 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3482,7 +3482,7 @@ TRACE_EVENT(rdev_probe_mesh_link, TRACE_EVENT(rdev_set_tid_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct ieee80211_tid_config *tid_conf), + struct cfg80211_tid_config *tid_conf), TP_ARGS(wiphy, netdev, tid_conf), TP_STRUCT__entry( WIPHY_ENTRY @@ -3500,22 +3500,22 @@ TRACE_EVENT(rdev_set_tid_config, TRACE_EVENT(rdev_reset_tid_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - const u8 *peer, u8 tid), - TP_ARGS(wiphy, netdev, peer, tid), + const u8 *peer, u8 tids), + TP_ARGS(wiphy, netdev, peer, tids), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) - __field(u8, tid) + __field(u8, tids) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, peer); - __entry->tid = tid; + __entry->tids = tids; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", tid: %u", - WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tid) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", tids: 0x%x", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tids) ); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ -- cgit v1.2.3 From 6a21d16c4db08398c737e0ffd03e4eca7131ac78 Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Mon, 20 Jan 2020 13:21:23 +0530 Subject: nl80211: Add support to configure TID specific retry configuration This patch adds support to configure per TID retry configuration through the NL80211_TID_CONFIG_ATTR_RETRY_SHORT and NL80211_TID_CONFIG_ATTR_RETRY_LONG attributes. This TID specific retry configuration will have more precedence than phy level configuration. Signed-off-by: Tamizh chelvam Link: https://lore.kernel.org/r/1579506687-18296-3-git-send-email-tamizhr@codeaurora.org [rebase completely on top of my previous API changes] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++++++ include/uapi/linux/nl80211.h | 12 ++++++++++++ net/wireless/nl80211.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bbe4acef729d..98981d1a026b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -634,12 +634,15 @@ struct cfg80211_chan_def { * @mask: bitmap of attributes indicating which parameter changed, * similar to &nl80211_tid_config_supp. * @noack: noack configuration value for the TID + * @retry_long: retry count value + * @retry_short: retry count value */ struct cfg80211_tid_cfg { bool config_override; u8 tids; u32 mask; enum nl80211_tid_config noack; + u8 retry_long, retry_short; }; /** @@ -4644,6 +4647,8 @@ struct wiphy_iftype_akm_suites { * supported by the driver for each vif * @tid_config_support.peer: bitmap of attributes (configurations) * supported by the driver for each peer + * @tid_config_support.max_retry: maximum supported retry count for + * long/short retry configuration */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -4777,8 +4782,11 @@ struct wiphy { struct { u64 peer, vif; + u8 max_retry; } tid_config_support; + u8 max_data_retry_count; + char priv[0] __aligned(NETDEV_ALIGN); }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 591d843eda72..c3481e1feebe 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4785,6 +4785,16 @@ enum nl80211_tid_config { * @NL80211_TID_CONFIG_ATTR_NOACK: Configure ack policy for the TID. * specified in %NL80211_TID_CONFIG_ATTR_TID. see %enum nl80211_tid_config. * Its type is u8. + * @NL80211_TID_CONFIG_ATTR_RETRY_SHORT: Number of retries used with data frame + * transmission, user-space sets this configuration in + * &NL80211_CMD_SET_TID_CONFIG. It is u8 type, min value is 1 and + * the max value is advertised by the driver in this attribute on + * output in wiphy capabilities. + * @NL80211_TID_CONFIG_ATTR_RETRY_LONG: Number of retries used with data frame + * transmission, user-space sets this configuration in + * &NL80211_CMD_SET_TID_CONFIG. Its type is u8, min value is 1 and + * the max value is advertised by the driver in this attribute on + * output in wiphy capabilities. */ enum nl80211_tid_config_attr { __NL80211_TID_CONFIG_ATTR_INVALID, @@ -4794,6 +4804,8 @@ enum nl80211_tid_config_attr { NL80211_TID_CONFIG_ATTR_OVERRIDE, NL80211_TID_CONFIG_ATTR_TIDS, NL80211_TID_CONFIG_ATTR_NOACK, + NL80211_TID_CONFIG_ATTR_RETRY_SHORT, + NL80211_TID_CONFIG_ATTR_RETRY_LONG, /* keep last */ __NL80211_TID_CONFIG_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 56ac851ccee1..4c79ba685992 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -336,6 +336,8 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { [NL80211_TID_CONFIG_ATTR_TIDS] = NLA_POLICY_RANGE(NLA_U16, 1, 0xff), [NL80211_TID_CONFIG_ATTR_NOACK] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), + [NL80211_TID_CONFIG_ATTR_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1), + [NL80211_TID_CONFIG_ATTR_RETRY_LONG] = NLA_POLICY_MIN(NLA_U8, 1), }; const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { @@ -1985,6 +1987,14 @@ nl80211_put_tid_config_support(struct cfg80211_registered_device *rdev, NL80211_TID_CONFIG_ATTR_PAD)) goto fail; + /* for now we just use the same value ... makes more sense */ + if (nla_put_u8(msg, NL80211_TID_CONFIG_ATTR_RETRY_SHORT, + rdev->wiphy.tid_config_support.max_retry)) + goto fail; + if (nla_put_u8(msg, NL80211_TID_CONFIG_ATTR_RETRY_LONG, + rdev->wiphy.tid_config_support.max_retry)) + goto fail; + nla_nest_end(msg, supp); return 0; @@ -14019,6 +14029,24 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_NOACK]); } + if (attrs[NL80211_TID_CONFIG_ATTR_RETRY_SHORT]) { + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_RETRY_SHORT); + tid_conf->retry_short = + nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RETRY_SHORT]); + + if (tid_conf->retry_short > rdev->wiphy.max_data_retry_count) + return -EINVAL; + } + + if (attrs[NL80211_TID_CONFIG_ATTR_RETRY_LONG]) { + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG); + tid_conf->retry_long = + nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RETRY_LONG]); + + if (tid_conf->retry_long > rdev->wiphy.max_data_retry_count) + return -EINVAL; + } + if (peer) mask = rdev->wiphy.tid_config_support.peer; else -- cgit v1.2.3 From ade274b23e41886091a7e105ab3de71baef112e7 Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Mon, 20 Jan 2020 13:21:24 +0530 Subject: nl80211: Add support to configure TID specific AMPDU configuration This patch adds support to configure per TID AMPDU control configuration to enable/disable aggregation through the NL80211_TID_CONFIG_ATTR_AMPDU_CTRL attribute. Signed-off-by: Tamizh chelvam Link: https://lore.kernel.org/r/1579506687-18296-4-git-send-email-tamizhr@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 4 ++++ net/wireless/nl80211.c | 8 ++++++++ 3 files changed, 14 insertions(+) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 98981d1a026b..78171ae01406 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -636,6 +636,7 @@ struct cfg80211_chan_def { * @noack: noack configuration value for the TID * @retry_long: retry count value * @retry_short: retry count value + * @ampdu: Enable/Disable aggregation */ struct cfg80211_tid_cfg { bool config_override; @@ -643,6 +644,7 @@ struct cfg80211_tid_cfg { u32 mask; enum nl80211_tid_config noack; u8 retry_long, retry_short; + enum nl80211_tid_config ampdu; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c3481e1feebe..71cae33e6679 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4795,6 +4795,9 @@ enum nl80211_tid_config { * &NL80211_CMD_SET_TID_CONFIG. Its type is u8, min value is 1 and * the max value is advertised by the driver in this attribute on * output in wiphy capabilities. + * @NL80211_TID_CONFIG_ATTR_AMPDU_CTRL: Enable/Disable aggregation for the TIDs + * specified in %NL80211_TID_CONFIG_ATTR_TIDS. Its type is u8, using + * the values from &nl80211_tid_config. */ enum nl80211_tid_config_attr { __NL80211_TID_CONFIG_ATTR_INVALID, @@ -4806,6 +4809,7 @@ enum nl80211_tid_config_attr { NL80211_TID_CONFIG_ATTR_NOACK, NL80211_TID_CONFIG_ATTR_RETRY_SHORT, NL80211_TID_CONFIG_ATTR_RETRY_LONG, + NL80211_TID_CONFIG_ATTR_AMPDU_CTRL, /* keep last */ __NL80211_TID_CONFIG_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4c79ba685992..078d30756b3e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -338,6 +338,8 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), [NL80211_TID_CONFIG_ATTR_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_TID_CONFIG_ATTR_RETRY_LONG] = NLA_POLICY_MIN(NLA_U8, 1), + [NL80211_TID_CONFIG_ATTR_AMPDU_CTRL] = + NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), }; const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { @@ -14047,6 +14049,12 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, return -EINVAL; } + if (attrs[NL80211_TID_CONFIG_ATTR_AMPDU_CTRL]) { + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL); + tid_conf->ampdu = + nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_AMPDU_CTRL]); + } + if (peer) mask = rdev->wiphy.tid_config_support.peer; else -- cgit v1.2.3 From 04f7d142f51c6019a695cfd70c09bb60233472c5 Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Mon, 20 Jan 2020 13:21:25 +0530 Subject: nl80211: Add support to configure TID specific RTSCTS configuration This patch adds support to configure per TID RTSCTS control configuration to enable/disable through the NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL attribute. Signed-off-by: Tamizh chelvam Link: https://lore.kernel.org/r/1579506687-18296-5-git-send-email-tamizhr@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 4 ++++ net/wireless/nl80211.c | 8 ++++++++ 3 files changed, 14 insertions(+) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 78171ae01406..f7c84c32ba39 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -637,6 +637,7 @@ struct cfg80211_chan_def { * @retry_long: retry count value * @retry_short: retry count value * @ampdu: Enable/Disable aggregation + * @rtscts: Enable/Disable RTS/CTS */ struct cfg80211_tid_cfg { bool config_override; @@ -645,6 +646,7 @@ struct cfg80211_tid_cfg { enum nl80211_tid_config noack; u8 retry_long, retry_short; enum nl80211_tid_config ampdu; + enum nl80211_tid_config rtscts; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 71cae33e6679..b002ef2060fa 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4798,6 +4798,9 @@ enum nl80211_tid_config { * @NL80211_TID_CONFIG_ATTR_AMPDU_CTRL: Enable/Disable aggregation for the TIDs * specified in %NL80211_TID_CONFIG_ATTR_TIDS. Its type is u8, using * the values from &nl80211_tid_config. + * @NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL: Enable/Disable RTS_CTS for the TIDs + * specified in %NL80211_TID_CONFIG_ATTR_TIDS. It is u8 type, using + * the values from &nl80211_tid_config. */ enum nl80211_tid_config_attr { __NL80211_TID_CONFIG_ATTR_INVALID, @@ -4810,6 +4813,7 @@ enum nl80211_tid_config_attr { NL80211_TID_CONFIG_ATTR_RETRY_SHORT, NL80211_TID_CONFIG_ATTR_RETRY_LONG, NL80211_TID_CONFIG_ATTR_AMPDU_CTRL, + NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL, /* keep last */ __NL80211_TID_CONFIG_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 078d30756b3e..ae5e10fe1196 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -340,6 +340,8 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { [NL80211_TID_CONFIG_ATTR_RETRY_LONG] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_TID_CONFIG_ATTR_AMPDU_CTRL] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), + [NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL] = + NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), }; const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { @@ -14055,6 +14057,12 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_AMPDU_CTRL]); } + if (attrs[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL]) { + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL); + tid_conf->rtscts = + nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL]); + } + if (peer) mask = rdev->wiphy.tid_config_support.peer; else -- cgit v1.2.3 From 370f51d5edac83bfdb9a078d7098f06403dfa4bc Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Mon, 20 Jan 2020 13:21:27 +0530 Subject: mac80211: Add api to support configuring TID specific configuration Implement drv_set_tid_config api to allow TID specific configuration and drv_reset_tid_config api to reset peer specific TID configuration. This per-TID onfiguration will be applied for all the connected stations when MAC is NULL. Signed-off-by: Tamizh chelvam Link: https://lore.kernel.org/r/1579506687-18296-7-git-send-email-tamizhr@codeaurora.org Signed-off-by: Johannes Berg --- include/net/mac80211.h | 10 +++++++++ net/mac80211/cfg.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/driver-ops.h | 27 +++++++++++++++++++++++ 3 files changed, 93 insertions(+) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ce80773dfeb2..5ef34a2ba3c7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3776,6 +3776,9 @@ enum ieee80211_reconfig_type { * * @start_pmsr: start peer measurement (e.g. FTM) (this call can sleep) * @abort_pmsr: abort peer measurement (this call can sleep) + * @set_tid_config: Apply TID specific configurations. This callback may sleep. + * @reset_tid_config: Reset TID specific configuration for the peer. + * This callback may sleep. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4080,6 +4083,13 @@ struct ieee80211_ops { struct cfg80211_pmsr_request *request); void (*abort_pmsr)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct cfg80211_pmsr_request *request); + int (*set_tid_config)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct cfg80211_tid_config *tid_conf); + int (*reset_tid_config)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, u8 tids); }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a762ba6e4c5d..7b654d2b8bb2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3886,6 +3886,60 @@ ieee80211_abort_pmsr(struct wiphy *wiphy, struct wireless_dev *dev, return drv_abort_pmsr(local, sdata, request); } +static int ieee80211_set_tid_config(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_tid_config *tid_conf) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct sta_info *sta; + int ret; + + if (!sdata->local->ops->set_tid_config) + return -EOPNOTSUPP; + + if (!tid_conf->peer) + return drv_set_tid_config(sdata->local, sdata, NULL, tid_conf); + + mutex_lock(&sdata->local->sta_mtx); + sta = sta_info_get_bss(sdata, tid_conf->peer); + if (!sta) { + mutex_unlock(&sdata->local->sta_mtx); + return -ENOENT; + } + + ret = drv_set_tid_config(sdata->local, sdata, &sta->sta, tid_conf); + mutex_unlock(&sdata->local->sta_mtx); + + return ret; +} + +static int ieee80211_reset_tid_config(struct wiphy *wiphy, + struct net_device *dev, + const u8 *peer, u8 tid) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct sta_info *sta; + int ret; + + if (!sdata->local->ops->reset_tid_config) + return -EOPNOTSUPP; + + if (!peer) + return drv_reset_tid_config(sdata->local, sdata, NULL, tid); + + mutex_lock(&sdata->local->sta_mtx); + sta = sta_info_get_bss(sdata, peer); + if (!sta) { + mutex_unlock(&sdata->local->sta_mtx); + return -ENOENT; + } + + ret = drv_reset_tid_config(sdata->local, sdata, &sta->sta, tid); + mutex_unlock(&sdata->local->sta_mtx); + + return ret; +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -3986,4 +4040,6 @@ const struct cfg80211_ops mac80211_config_ops = { .start_pmsr = ieee80211_start_pmsr, .abort_pmsr = ieee80211_abort_pmsr, .probe_mesh_link = ieee80211_probe_mesh_link, + .set_tid_config = ieee80211_set_tid_config, + .reset_tid_config = ieee80211_reset_tid_config, }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 2c9b3eb8b652..3877710e3b48 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1358,4 +1358,31 @@ static inline void drv_del_nan_func(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline int drv_set_tid_config(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + struct cfg80211_tid_config *tid_conf) +{ + int ret; + + might_sleep(); + ret = local->ops->set_tid_config(&local->hw, &sdata->vif, sta, + tid_conf); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline int drv_reset_tid_config(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, u8 tid) +{ + int ret; + + might_sleep(); + ret = local->ops->reset_tid_config(&local->hw, &sdata->vif, sta, tid); + trace_drv_return_int(local, ret); + + return ret; +} #endif /* __MAC80211_DRIVER_OPS */ -- cgit v1.2.3 From 366ed1aca6e02a90eff5387bd6ace34eba7e64cf Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 20 Feb 2020 22:03:13 -0700 Subject: net: Remove unneeded export of a couple of xdp generic functions generic_xdp_tx and xdp_do_generic_redirect are only used by builtin code, so remove the EXPORT_SYMBOL_GPL for them. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/dev.c | 1 - net/core/filter.c | 1 - 2 files changed, 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index e10bd680dc03..4770dde3448d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4636,7 +4636,6 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) kfree_skb(skb); } } -EXPORT_SYMBOL_GPL(generic_xdp_tx); static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); diff --git a/net/core/filter.c b/net/core/filter.c index 925b23de218b..4a08c9fb2be7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3626,7 +3626,6 @@ err: _trace_xdp_redirect_err(dev, xdp_prog, index, err); return err; } -EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) { -- cgit v1.2.3 From ecd942a0ef3a30f6037870bfc0a294d7e9fe9d4f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 24 Feb 2020 08:35:47 +0100 Subject: devlink: add ACL generic packet traps Add packet traps that can report packets that were dropped during ACL processing. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- Documentation/networking/devlink/devlink-trap.rst | 9 +++++++++ include/net/devlink.h | 9 +++++++++ net/core/devlink.c | 3 +++ 3 files changed, 21 insertions(+) (limited to 'net') diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst index 47a429bb8658..63350e7332ce 100644 --- a/Documentation/networking/devlink/devlink-trap.rst +++ b/Documentation/networking/devlink/devlink-trap.rst @@ -238,6 +238,12 @@ be added to the following table: - ``drop`` - Traps NVE packets that the device decided to drop because their overlay source MAC is multicast + * - ``ingress_flow_action_drop`` + - ``drop`` + - Traps packets dropped during processing of ingress flow action drop + * - ``egress_flow_action_drop`` + - ``drop`` + - Traps packets dropped during processing of egress flow action drop Driver-specific Packet Traps ============================ @@ -277,6 +283,9 @@ narrow. The description of these groups must be added to the following table: * - ``tunnel_drops`` - Contains packet traps for packets that were dropped by the device during tunnel encapsulation / decapsulation + * - ``acl_drops`` + - Contains packet traps for packets that were dropped by the device during + ACL processing Testing ======= diff --git a/include/net/devlink.h b/include/net/devlink.h index 149c108be66f..07923e619206 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -596,6 +596,8 @@ enum devlink_trap_generic_id { DEVLINK_TRAP_GENERIC_ID_NON_ROUTABLE, DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, DEVLINK_TRAP_GENERIC_ID_OVERLAY_SMAC_MC, + DEVLINK_TRAP_GENERIC_ID_INGRESS_FLOW_ACTION_DROP, + DEVLINK_TRAP_GENERIC_ID_EGRESS_FLOW_ACTION_DROP, /* Add new generic trap IDs above */ __DEVLINK_TRAP_GENERIC_ID_MAX, @@ -610,6 +612,7 @@ enum devlink_trap_group_generic_id { DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS, DEVLINK_TRAP_GROUP_GENERIC_ID_BUFFER_DROPS, DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS, + DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_DROPS, /* Add new generic trap group IDs above */ __DEVLINK_TRAP_GROUP_GENERIC_ID_MAX, @@ -671,6 +674,10 @@ enum devlink_trap_group_generic_id { "decap_error" #define DEVLINK_TRAP_GENERIC_NAME_OVERLAY_SMAC_MC \ "overlay_smac_is_mc" +#define DEVLINK_TRAP_GENERIC_NAME_INGRESS_FLOW_ACTION_DROP \ + "ingress_flow_action_drop" +#define DEVLINK_TRAP_GENERIC_NAME_EGRESS_FLOW_ACTION_DROP \ + "egress_flow_action_drop" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \ "l2_drops" @@ -680,6 +687,8 @@ enum devlink_trap_group_generic_id { "buffer_drops" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_TUNNEL_DROPS \ "tunnel_drops" +#define DEVLINK_TRAP_GROUP_GENERIC_NAME_ACL_DROPS \ + "acl_drops" #define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group, _metadata_cap) \ { \ diff --git a/net/core/devlink.c b/net/core/devlink.c index 216bdd25ce39..0d7c5d3443d2 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -7795,6 +7795,8 @@ static const struct devlink_trap devlink_trap_generic[] = { DEVLINK_TRAP(NON_ROUTABLE, DROP), DEVLINK_TRAP(DECAP_ERROR, EXCEPTION), DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP), + DEVLINK_TRAP(INGRESS_FLOW_ACTION_DROP, DROP), + DEVLINK_TRAP(EGRESS_FLOW_ACTION_DROP, DROP), }; #define DEVLINK_TRAP_GROUP(_id) \ @@ -7808,6 +7810,7 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = { DEVLINK_TRAP_GROUP(L3_DROPS), DEVLINK_TRAP_GROUP(BUFFER_DROPS), DEVLINK_TRAP_GROUP(TUNNEL_DROPS), + DEVLINK_TRAP_GROUP(ACL_DROPS), }; static int devlink_trap_generic_verify(const struct devlink_trap *trap) -- cgit v1.2.3 From 958a93c15466c69e2ec531332e67011f549943bd Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Fri, 21 Feb 2020 20:45:38 +0530 Subject: tcp, ulp: Pass lockdep expression to RCU lists tcp_ulp_list is traversed using list_for_each_entry_rcu outside an RCU read-side critical section but under the protection of tcp_ulp_list_lock. Hence, add corresponding lockdep expression to silence false-positive warnings, and harden RCU lists.t Signed-off-by: Amol Grover Signed-off-by: David S. Miller --- net/ipv4/tcp_ulp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 6c43fa189195..2703f24c5d1a 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -22,7 +22,8 @@ static struct tcp_ulp_ops *tcp_ulp_find(const char *name) { struct tcp_ulp_ops *e; - list_for_each_entry_rcu(e, &tcp_ulp_list, list) { + list_for_each_entry_rcu(e, &tcp_ulp_list, list, + lockdep_is_held(&tcp_ulp_list_lock)) { if (strcmp(e->name, name) == 0) return e; } -- cgit v1.2.3 From 0a087bf232c35dbec3769c4402ca737995d7b734 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Fri, 21 Feb 2020 21:49:47 +0530 Subject: net: 802: psnap.c: Use built-in RCU list checking list_for_each_entry_rcu() has built-in RCU and lock checking. Pass cond argument to list_for_each_entry_rcu() to silence false lockdep warning when CONFIG_PROVE_RCU_LIST is enabled by default. Signed-off-by: Madhuparna Bhowmik Signed-off-by: David S. Miller --- net/802/psnap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/802/psnap.c b/net/802/psnap.c index 40ab2aea7b31..4492e8d7ad20 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -30,7 +30,7 @@ static struct datalink_proto *find_snap_client(const unsigned char *desc) { struct datalink_proto *proto = NULL, *p; - list_for_each_entry_rcu(p, &snap_list, node) { + list_for_each_entry_rcu(p, &snap_list, node, lockdep_is_held(&snap_lock)) { if (!memcmp(p->type, desc, 5)) { proto = p; break; -- cgit v1.2.3 From c8b91770f54a1e832e086b3768fe115583128519 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Fri, 21 Feb 2020 23:27:14 +0530 Subject: tcp: ipv4: Pass lockdep expression to RCU lists md5sig->head maybe traversed using hlist_for_each_entry_rcu outside an RCU read-side critical section but under the protection of socket lock. Hence, add corresponding lockdep expression to silence false-positive warnings, and harden RCU lists. Signed-off-by: Amol Grover Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index df1166b76126..52acf0bc2ee5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1019,7 +1019,8 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, if (!md5sig) return NULL; - hlist_for_each_entry_rcu(key, &md5sig->head, node) { + hlist_for_each_entry_rcu(key, &md5sig->head, node, + lockdep_sock_is_held(sk)) { if (key->family != family) continue; if (key->l3index && key->l3index != l3index) @@ -1064,7 +1065,8 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, if (family == AF_INET6) size = sizeof(struct in6_addr); #endif - hlist_for_each_entry_rcu(key, &md5sig->head, node) { + hlist_for_each_entry_rcu(key, &md5sig->head, node, + lockdep_sock_is_held(sk)) { if (key->family != family) continue; if (key->l3index && key->l3index != l3index) -- cgit v1.2.3 From 28b380e28925cad4ccc70b1cd2faef7aa7ba707d Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Sat, 22 Feb 2020 22:27:27 +0530 Subject: ip6mr: Fix RCU list debugging warning ip6mr_for_each_table() macro uses list_for_each_entry_rcu() for traversing outside an RCU read side critical section but under the protection of rtnl_mutex. Hence add the corresponding lockdep expression to silence the following false-positive warnings: [ 4.319479] ============================= [ 4.319480] WARNING: suspicious RCU usage [ 4.319482] 5.5.4-stable #17 Tainted: G E [ 4.319483] ----------------------------- [ 4.319485] net/ipv6/ip6mr.c:1243 RCU-list traversed in non-reader section!! [ 4.456831] ============================= [ 4.456832] WARNING: suspicious RCU usage [ 4.456834] 5.5.4-stable #17 Tainted: G E [ 4.456835] ----------------------------- [ 4.456837] net/ipv6/ip6mr.c:1582 RCU-list traversed in non-reader section!! Signed-off-by: Amol Grover Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index bfa49ff70531..d6483926f449 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -97,7 +97,8 @@ static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES #define ip6mr_for_each_table(mrt, net) \ - list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) + list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \ + lockdep_rtnl_is_held()) static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) -- cgit v1.2.3 From 887cf3d139347fed6e0a11b08cf7ed21f6f0fc3b Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 23 Feb 2020 23:16:45 +0000 Subject: sctp: Add missing annotation for sctp_err_finish() Sparse reports a warning at sctp_err_finish() warning: context imbalance in sctp_err_finish() - unexpected unlock The root cause is a missing annotation at sctp_err_finish() Add the missing __releases(&((__sk)->sk_lock.slock)) annotation Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/sctp/input.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index efaaefc3bb1c..55d4fc6f371d 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -548,6 +548,7 @@ out: /* Common cleanup code for icmp/icmpv6 error handler. */ void sctp_err_finish(struct sock *sk, struct sctp_transport *t) + __releases(&((__sk)->sk_lock.slock)) { bh_unlock_sock(sk); sctp_transport_put(t); -- cgit v1.2.3 From 6c72b7740c8665671c0d5d3001490c0e41939c1f Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 23 Feb 2020 23:16:46 +0000 Subject: sctp: Add missing annotation for sctp_transport_walk_start() Sparse reports a warning at sctp_transport_walk_start() warning: context imbalance in sctp_transport_walk_start - wrong count at exit The root cause is the missing annotation at sctp_transport_walk_start() Add the missing __acquires(RCU) annotation Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1b56fc440606..05be67bb0474 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5333,7 +5333,7 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, EXPORT_SYMBOL_GPL(sctp_get_sctp_info); /* use callback to avoid exporting the core structure */ -void sctp_transport_walk_start(struct rhashtable_iter *iter) +void sctp_transport_walk_start(struct rhashtable_iter *iter) __acquires(RCU) { rhltable_walk_enter(&sctp_transport_hashtable, iter); -- cgit v1.2.3 From b77b4f634e5f7b0477e682ad643cbad43b7d9d93 Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 23 Feb 2020 23:16:47 +0000 Subject: sctp: Add missing annotation for sctp_transport_walk_stop() Sparse reports a warning at sctp_transport_walk_stop() warning: context imbalance in sctp_transport_walk_stop - wrong count at exit The root cause is the missing annotation at sctp_transport_walk_stop() Add the missing __releases(RCU) annotation Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 05be67bb0474..fed26a1e9518 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5340,7 +5340,7 @@ void sctp_transport_walk_start(struct rhashtable_iter *iter) __acquires(RCU) rhashtable_walk_start(iter); } -void sctp_transport_walk_stop(struct rhashtable_iter *iter) +void sctp_transport_walk_stop(struct rhashtable_iter *iter) __releases(RCU) { rhashtable_walk_stop(iter); rhashtable_walk_exit(iter); -- cgit v1.2.3 From 8e0f8ccfb0d290eaac3db086c600f4522584c7f2 Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 23 Feb 2020 23:16:48 +0000 Subject: net: Add missing annotation for llc_seq_start() Sparse reports a warning at llc_seq_start() warning: context imbalance in llc_seq_start() - wrong count at exit The root cause is the msiing annotation at llc_seq_start() Add the missing __acquires(RCU) annotation Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/llc/llc_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index f3a36c16a5e7..a4eccb98220a 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -56,7 +56,7 @@ found: return sk; } -static void *llc_seq_start(struct seq_file *seq, loff_t *pos) +static void *llc_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { loff_t l = *pos; -- cgit v1.2.3 From d087f183787e43bb803e6f529cb97b0bf6743bea Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 23 Feb 2020 23:16:49 +0000 Subject: netrom: Add missing annotation for nr_info_start() Sparse reports a warning at nr_info_start() warning: context imbalance in nr_info_start() - wrong count at exit The root cause is the missing annotation at nr_info_start() Add the missing __acquires(&nr_list_lock) Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 58d5373c513c..8be06e61ff03 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1230,6 +1230,7 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) #ifdef CONFIG_PROC_FS static void *nr_info_start(struct seq_file *seq, loff_t *pos) + __acquires(&nr_list_lock) { spin_lock_bh(&nr_list_lock); return seq_hlist_start_head(&nr_list, *pos); -- cgit v1.2.3 From 8b003f0d5c2e98d3748ed1f642030ddeac9e6481 Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 23 Feb 2020 23:16:50 +0000 Subject: netrom: Add missing annotation for nr_info_stop() Sparse reports a warning at nr_info_stop() warning: context imbalance in nr_info_stop() - unexpected unlock The root cause is the missing annotation at nr_info_stop() Add the missing __releases(&nr_list_lock) Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 8be06e61ff03..7b1a74f74aad 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1242,6 +1242,7 @@ static void *nr_info_next(struct seq_file *seq, void *v, loff_t *pos) } static void nr_info_stop(struct seq_file *seq, void *v) + __releases(&nr_list_lock) { spin_unlock_bh(&nr_list_lock); } -- cgit v1.2.3 From 5018adfd7a18219c77eeb255555f7d7052819542 Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 23 Feb 2020 23:16:51 +0000 Subject: net: netrom: Add missing annotation for nr_node_start() Sparse reports a warning at nr_node_start() warning: context imbalance in nr_node_start() - wrong count at exit The root cause is the missing annotation at nr_node_start() Add the missing __acquires(&nr_node_list_lock) annotation Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/netrom/nr_route.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index d41335bad1f8..fe278fc24153 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -838,6 +838,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) #ifdef CONFIG_PROC_FS static void *nr_node_start(struct seq_file *seq, loff_t *pos) + __acquires(&nr_node_list_lock) { spin_lock_bh(&nr_node_list_lock); return seq_hlist_start_head(&nr_node_list, *pos); -- cgit v1.2.3 From 0eb713fb667de4be60ad645f88de6e5bd15d41df Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 23 Feb 2020 23:16:52 +0000 Subject: net: netrom: Add missing annotation for nr_node_stop() Sparse reports a warning at nr_node_stop() warning: context imbalance in nr_node_stop() - wrong count at exit The root cause is the missing annotation at nr_node_stop() Add the missing __releases(&nr_node_list_lock) annotation Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/netrom/nr_route.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index fe278fc24153..637a743c060d 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -850,6 +850,7 @@ static void *nr_node_next(struct seq_file *seq, void *v, loff_t *pos) } static void nr_node_stop(struct seq_file *seq, void *v) + __releases(&nr_node_list_lock) { spin_unlock_bh(&nr_node_list_lock); } -- cgit v1.2.3 From 2d6b6acfce5f0b1e0241251d2a7aaf570e149809 Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 23 Feb 2020 23:16:53 +0000 Subject: net: netrom: Add missing annotation for nr_neigh_start() Sparse reports a warning at nr_neigh_start() warning: context imbalance in nr_neigh_start() - wrong count at exit The root cause is the missing annotation at nr_neigh_start() Add the missing __acquires(&nr_neigh_list_lock) annotation Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/netrom/nr_route.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 637a743c060d..33e7b91fc805 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -895,6 +895,7 @@ const struct seq_operations nr_node_seqops = { }; static void *nr_neigh_start(struct seq_file *seq, loff_t *pos) + __acquires(&nr_neigh_list_lock) { spin_lock_bh(&nr_neigh_list_lock); return seq_hlist_start_head(&nr_neigh_list, *pos); -- cgit v1.2.3 From be21139f3539350b33112e88183992e1ee086f53 Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 23 Feb 2020 23:16:54 +0000 Subject: net: netrom: Add missing annotation for nr_neigh_stop() Sparse reports a warning at nr_neigh_stop() warning: context imbalance in nr_neigh_stop() - unexpected unlock The root cause is the missing annotation at nr_neigh_stop() Add the missing __releases(&nr_neigh_list_lock) annotation Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/netrom/nr_route.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 33e7b91fc805..79f12d8c7b86 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -907,6 +907,7 @@ static void *nr_neigh_next(struct seq_file *seq, void *v, loff_t *pos) } static void nr_neigh_stop(struct seq_file *seq, void *v) + __releases(&nr_neigh_list_lock) { spin_unlock_bh(&nr_neigh_list_lock); } -- cgit v1.2.3 From 3283ff2ea7ffd02decb1a813b97d4bbcdbb4e66a Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 23 Feb 2020 23:16:55 +0000 Subject: dccp: Add missing annotation for dccp_child_process() Sparse reports a warning at dccp_child_process() warning: context imbalance in dccp_child_process() - unexpected unlock The root cause is the missing annotation at dccp_child_process() Add the missing __releases(child) annotation Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/dccp/minisocks.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 25187528c308..c5c74a34d139 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -216,6 +216,7 @@ EXPORT_SYMBOL_GPL(dccp_check_req); */ int dccp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb) + __releases(child) { int ret = 0; const int state = child->sk_state; -- cgit v1.2.3 From 48851e9e802de3ede4ce81c3f7896c63e841adbc Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 23 Feb 2020 23:16:56 +0000 Subject: af_unix: Add missing annotation for unix_wait_for_peer() Sparse reports a warning unix_wait_for_peer() warning: context imbalance in unix_wait_for_peer() - unexpected unlock The root cause is the missing annotation at unix_wait_for_peer() Add the missing annotation __releases(&unix_sk(other)->lock) Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/unix/af_unix.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 62c12cb5763e..cbd7dc01e147 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1207,6 +1207,7 @@ out: } static long unix_wait_for_peer(struct sock *other, long timeo) + __releases(&unix_sk(other)->lock) { struct unix_sock *u = unix_sk(other); int sched; -- cgit v1.2.3 From 571912c69f0ed731bd1e071ade9dc7ca4aa52065 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Mon, 24 Feb 2020 10:57:50 +0530 Subject: net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc. The Bareudp tunnel module provides a generic L3 encapsulation tunnelling module for tunnelling different protocols like MPLS, IP,NSH etc inside a UDP tunnel. Signed-off-by: Martin Varghese Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/bareudp.rst | 34 ++ Documentation/networking/index.rst | 1 + drivers/net/Kconfig | 13 + drivers/net/Makefile | 1 + drivers/net/bareudp.c | 743 +++++++++++++++++++++++++++++++++++ include/net/bareudp.h | 19 + include/net/ipv6.h | 6 + include/net/route.h | 6 + include/uapi/linux/if_link.h | 11 + net/ipv4/route.c | 48 +++ net/ipv6/ip6_output.c | 70 ++++ 11 files changed, 952 insertions(+) create mode 100644 Documentation/networking/bareudp.rst create mode 100644 drivers/net/bareudp.c create mode 100644 include/net/bareudp.h (limited to 'net') diff --git a/Documentation/networking/bareudp.rst b/Documentation/networking/bareudp.rst new file mode 100644 index 000000000000..6ee68c16cf5b --- /dev/null +++ b/Documentation/networking/bareudp.rst @@ -0,0 +1,34 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================== +Bare UDP Tunnelling Module Documentation +======================================== + +There are various L3 encapsulation standards using UDP being discussed to +leverage the UDP based load balancing capability of different networks. +MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. + +The Bareudp tunnel module provides a generic L3 encapsulation tunnelling +support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside +a UDP tunnel. + +Usage +------ + +1) Device creation & deletion + + a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype 0x8847. + + This creates a bareudp tunnel device which tunnels L3 traffic with ethertype + 0x8847 (MPLS traffic). The destination port of the UDP header will be set to + 6635.The device will listen on UDP port 6635 to receive traffic. + + b) ip link delete bareudp0 + +2) Device Usage + +The bareudp device could be used along with OVS or flower filter in TC. +The OVS or TC flower layer must set the tunnel information in SKB dst field before +sending packet buffer to the bareudp device for transmission. On reception the +bareudp device extracts and stores the tunnel information in SKB dst field before +passing the packet buffer to the network stack. diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index d07d9855dcd3..3a83cfb66704 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -8,6 +8,7 @@ Contents: netdev-FAQ af_xdp + bareudp batman-adv can can_ucan_protocol diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 25a8f9387d5a..66e410e58c8e 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -258,6 +258,19 @@ config GENEVE To compile this driver as a module, choose M here: the module will be called geneve. +config BAREUDP + tristate "Bare UDP Encapsulation" + depends on INET + depends on IPV6 || !IPV6 + select NET_UDP_TUNNEL + select GRO_CELLS + help + This adds a bare UDP tunnel module for tunnelling different + kinds of traffic like MPLS, IP, etc. inside a UDP tunnel. + + To compile this driver as a module, choose M here: the module + will be called bareudp. + config GTP tristate "GPRS Tunneling Protocol datapath (GTP-U)" depends on INET diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 71b88ffc5587..65967246f240 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o obj-$(CONFIG_GENEVE) += geneve.o +obj-$(CONFIG_BAREUDP) += bareudp.o obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c new file mode 100644 index 000000000000..32518969139e --- /dev/null +++ b/drivers/net/bareudp.c @@ -0,0 +1,743 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Bareudp: UDP tunnel encasulation for different Payload types like + * MPLS, NSH, IP, etc. + * Copyright (c) 2019 Nokia, Inc. + * Authors: Martin Varghese, + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BAREUDP_BASE_HLEN sizeof(struct udphdr) +#define BAREUDP_IPV4_HLEN (sizeof(struct iphdr) + \ + sizeof(struct udphdr)) +#define BAREUDP_IPV6_HLEN (sizeof(struct ipv6hdr) + \ + sizeof(struct udphdr)) + +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + +/* per-network namespace private data for this module */ + +static unsigned int bareudp_net_id; + +struct bareudp_net { + struct list_head bareudp_list; +}; + +/* Pseudo network device */ +struct bareudp_dev { + struct net *net; /* netns for packet i/o */ + struct net_device *dev; /* netdev for bareudp tunnel */ + __be16 ethertype; + __be16 port; + u16 sport_min; + struct socket __rcu *sock; + struct list_head next; /* bareudp node on namespace list */ + struct gro_cells gro_cells; +}; + +static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) +{ + struct metadata_dst *tun_dst = NULL; + struct pcpu_sw_netstats *stats; + struct bareudp_dev *bareudp; + unsigned short family; + unsigned int len; + __be16 proto; + void *oiph; + int err; + + bareudp = rcu_dereference_sk_user_data(sk); + if (!bareudp) + goto drop; + + if (skb->protocol == htons(ETH_P_IP)) + family = AF_INET; + else + family = AF_INET6; + + proto = bareudp->ethertype; + + if (iptunnel_pull_header(skb, BAREUDP_BASE_HLEN, + proto, + !net_eq(bareudp->net, + dev_net(bareudp->dev)))) { + bareudp->dev->stats.rx_dropped++; + goto drop; + } + + tun_dst = udp_tun_rx_dst(skb, family, TUNNEL_KEY, 0, 0); + if (!tun_dst) { + bareudp->dev->stats.rx_dropped++; + goto drop; + } + skb_dst_set(skb, &tun_dst->dst); + skb->dev = bareudp->dev; + oiph = skb_network_header(skb); + skb_reset_network_header(skb); + + if (family == AF_INET) + err = IP_ECN_decapsulate(oiph, skb); +#if IS_ENABLED(CONFIG_IPV6) + else + err = IP6_ECN_decapsulate(oiph, skb); +#endif + + if (unlikely(err)) { + if (log_ecn_error) { + if (family == AF_INET) + net_info_ratelimited("non-ECT from %pI4 " + "with TOS=%#x\n", + &((struct iphdr *)oiph)->saddr, + ((struct iphdr *)oiph)->tos); +#if IS_ENABLED(CONFIG_IPV6) + else + net_info_ratelimited("non-ECT from %pI6\n", + &((struct ipv6hdr *)oiph)->saddr); +#endif + } + if (err > 1) { + ++bareudp->dev->stats.rx_frame_errors; + ++bareudp->dev->stats.rx_errors; + goto drop; + } + } + + len = skb->len; + err = gro_cells_receive(&bareudp->gro_cells, skb); + if (likely(err == NET_RX_SUCCESS)) { + stats = this_cpu_ptr(bareudp->dev->tstats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += len; + u64_stats_update_end(&stats->syncp); + } + return 0; +drop: + /* Consume bad packet */ + kfree_skb(skb); + + return 0; +} + +static int bareudp_err_lookup(struct sock *sk, struct sk_buff *skb) +{ + return 0; +} + +static int bareudp_init(struct net_device *dev) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + int err; + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + err = gro_cells_init(&bareudp->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + return 0; +} + +static void bareudp_uninit(struct net_device *dev) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + + gro_cells_destroy(&bareudp->gro_cells); + free_percpu(dev->tstats); +} + +static struct socket *bareudp_create_sock(struct net *net, __be16 port) +{ + struct udp_port_cfg udp_conf; + struct socket *sock; + int err; + + memset(&udp_conf, 0, sizeof(udp_conf)); +#if IS_ENABLED(CONFIG_IPV6) + udp_conf.family = AF_INET6; +#else + udp_conf.family = AF_INET; +#endif + udp_conf.local_udp_port = port; + /* Open UDP socket */ + err = udp_sock_create(net, &udp_conf, &sock); + if (err < 0) + return ERR_PTR(err); + + return sock; +} + +/* Create new listen socket if needed */ +static int bareudp_socket_create(struct bareudp_dev *bareudp, __be16 port) +{ + struct udp_tunnel_sock_cfg tunnel_cfg; + struct socket *sock; + + sock = bareudp_create_sock(bareudp->net, port); + if (IS_ERR(sock)) + return PTR_ERR(sock); + + /* Mark socket as an encapsulation socket */ + memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); + tunnel_cfg.sk_user_data = bareudp; + tunnel_cfg.encap_type = 1; + tunnel_cfg.encap_rcv = bareudp_udp_encap_recv; + tunnel_cfg.encap_err_lookup = bareudp_err_lookup; + tunnel_cfg.encap_destroy = NULL; + setup_udp_tunnel_sock(bareudp->net, sock, &tunnel_cfg); + + if (sock->sk->sk_family == AF_INET6) + udp_encap_enable(); + + rcu_assign_pointer(bareudp->sock, sock); + return 0; +} + +static int bareudp_open(struct net_device *dev) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + int ret = 0; + + ret = bareudp_socket_create(bareudp, bareudp->port); + return ret; +} + +static void bareudp_sock_release(struct bareudp_dev *bareudp) +{ + struct socket *sock; + + sock = bareudp->sock; + rcu_assign_pointer(bareudp->sock, NULL); + synchronize_net(); + udp_tunnel_sock_release(sock); +} + +static int bareudp_stop(struct net_device *dev) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + + bareudp_sock_release(bareudp); + return 0; +} + +static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, + struct bareudp_dev *bareudp, + const struct ip_tunnel_info *info) +{ + bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev)); + bool use_cache = ip_tunnel_dst_cache_usable(skb, info); + struct socket *sock = rcu_dereference(bareudp->sock); + bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); + const struct ip_tunnel_key *key = &info->key; + struct rtable *rt; + __be16 sport, df; + int min_headroom; + __u8 tos, ttl; + __be32 saddr; + int err; + + if (!sock) + return -ESHUTDOWN; + + rt = ip_route_output_tunnel(skb, dev, bareudp->net, &saddr, info, + IPPROTO_UDP, use_cache); + + if (IS_ERR(rt)) + return PTR_ERR(rt); + + skb_tunnel_check_pmtu(skb, &rt->dst, + BAREUDP_IPV4_HLEN + info->options_len); + + sport = udp_flow_src_port(bareudp->net, skb, + bareudp->sport_min, USHRT_MAX, + true); + tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); + ttl = key->ttl; + df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + skb_scrub_packet(skb, xnet); + + if (!skb_pull(skb, skb_network_offset(skb))) + goto free_dst; + + min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + + BAREUDP_BASE_HLEN + info->options_len + sizeof(struct iphdr); + + err = skb_cow_head(skb, min_headroom); + if (unlikely(err)) + goto free_dst; + + err = udp_tunnel_handle_offloads(skb, udp_sum); + if (err) + goto free_dst; + + skb_set_inner_protocol(skb, bareudp->ethertype); + udp_tunnel_xmit_skb(rt, sock->sk, skb, saddr, info->key.u.ipv4.dst, + tos, ttl, df, sport, bareudp->port, + !net_eq(bareudp->net, dev_net(bareudp->dev)), + !(info->key.tun_flags & TUNNEL_CSUM)); + return 0; + +free_dst: + dst_release(&rt->dst); + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, + struct bareudp_dev *bareudp, + const struct ip_tunnel_info *info) +{ + bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev)); + bool use_cache = ip_tunnel_dst_cache_usable(skb, info); + struct socket *sock = rcu_dereference(bareudp->sock); + bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); + const struct ip_tunnel_key *key = &info->key; + struct dst_entry *dst = NULL; + struct in6_addr saddr, daddr; + int min_headroom; + __u8 prio, ttl; + __be16 sport; + int err; + + if (!sock) + return -ESHUTDOWN; + + dst = ip6_dst_lookup_tunnel(skb, dev, bareudp->net, sock, &saddr, info, + IPPROTO_UDP, use_cache); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + skb_tunnel_check_pmtu(skb, dst, BAREUDP_IPV6_HLEN + info->options_len); + + sport = udp_flow_src_port(bareudp->net, skb, + bareudp->sport_min, USHRT_MAX, + true); + prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); + ttl = key->ttl; + + skb_scrub_packet(skb, xnet); + + if (!skb_pull(skb, skb_network_offset(skb))) + goto free_dst; + + min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + + BAREUDP_BASE_HLEN + info->options_len + sizeof(struct iphdr); + + err = skb_cow_head(skb, min_headroom); + if (unlikely(err)) + goto free_dst; + + err = udp_tunnel_handle_offloads(skb, udp_sum); + if (err) + goto free_dst; + + daddr = info->key.u.ipv6.dst; + udp_tunnel6_xmit_skb(dst, sock->sk, skb, dev, + &saddr, &daddr, prio, ttl, + info->key.label, sport, bareudp->port, + !(info->key.tun_flags & TUNNEL_CSUM)); + return 0; + +free_dst: + dst_release(dst); + return err; +} +#endif + +static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + struct ip_tunnel_info *info = NULL; + int err; + + if (skb->protocol != bareudp->ethertype) { + err = -EINVAL; + goto tx_error; + } + + info = skb_tunnel_info(skb); + if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { + err = -EINVAL; + goto tx_error; + } + + rcu_read_lock(); +#if IS_ENABLED(CONFIG_IPV6) + if (info->mode & IP_TUNNEL_INFO_IPV6) + err = bareudp6_xmit_skb(skb, dev, bareudp, info); + else +#endif + err = bareudp_xmit_skb(skb, dev, bareudp, info); + + rcu_read_unlock(); + + if (likely(!err)) + return NETDEV_TX_OK; +tx_error: + dev_kfree_skb(skb); + + if (err == -ELOOP) + dev->stats.collisions++; + else if (err == -ENETUNREACH) + dev->stats.tx_carrier_errors++; + + dev->stats.tx_errors++; + return NETDEV_TX_OK; +} + +static int bareudp_fill_metadata_dst(struct net_device *dev, + struct sk_buff *skb) +{ + struct ip_tunnel_info *info = skb_tunnel_info(skb); + struct bareudp_dev *bareudp = netdev_priv(dev); + bool use_cache; + + use_cache = ip_tunnel_dst_cache_usable(skb, info); + + if (ip_tunnel_info_af(info) == AF_INET) { + struct rtable *rt; + __be32 saddr; + + rt = ip_route_output_tunnel(skb, dev, bareudp->net, &saddr, + info, IPPROTO_UDP, use_cache); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + ip_rt_put(rt); + info->key.u.ipv4.src = saddr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (ip_tunnel_info_af(info) == AF_INET6) { + struct dst_entry *dst; + struct in6_addr saddr; + struct socket *sock = rcu_dereference(bareudp->sock); + + dst = ip6_dst_lookup_tunnel(skb, dev, bareudp->net, sock, + &saddr, info, IPPROTO_UDP, + use_cache); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + dst_release(dst); + info->key.u.ipv6.src = saddr; +#endif + } else { + return -EINVAL; + } + + info->key.tp_src = udp_flow_src_port(bareudp->net, skb, + bareudp->sport_min, + USHRT_MAX, true); + info->key.tp_dst = bareudp->port; + return 0; +} + +static const struct net_device_ops bareudp_netdev_ops = { + .ndo_init = bareudp_init, + .ndo_uninit = bareudp_uninit, + .ndo_open = bareudp_open, + .ndo_stop = bareudp_stop, + .ndo_start_xmit = bareudp_xmit, + .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_fill_metadata_dst = bareudp_fill_metadata_dst, +}; + +static const struct nla_policy bareudp_policy[IFLA_BAREUDP_MAX + 1] = { + [IFLA_BAREUDP_PORT] = { .type = NLA_U16 }, + [IFLA_BAREUDP_ETHERTYPE] = { .type = NLA_U16 }, + [IFLA_BAREUDP_SRCPORT_MIN] = { .type = NLA_U16 }, +}; + +/* Info for udev, that this is a virtual tunnel endpoint */ +static struct device_type bareudp_type = { + .name = "bareudp", +}; + +/* Initialize the device structure. */ +static void bareudp_setup(struct net_device *dev) +{ + dev->netdev_ops = &bareudp_netdev_ops; + dev->needs_free_netdev = true; + SET_NETDEV_DEVTYPE(dev, &bareudp_type); + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; + dev->features |= NETIF_F_RXCSUM; + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->mtu = ETH_DATA_LEN; + dev->min_mtu = IPV4_MIN_MTU; + dev->max_mtu = IP_MAX_MTU - BAREUDP_BASE_HLEN; + dev->type = ARPHRD_NONE; + netif_keep_dst(dev); + dev->priv_flags |= IFF_NO_QUEUE; + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; +} + +static int bareudp_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + if (!data) { + NL_SET_ERR_MSG(extack, + "Not enough attributes provided to perform the operation"); + return -EINVAL; + } + return 0; +} + +static int bareudp2info(struct nlattr *data[], struct bareudp_conf *conf) +{ + if (!data[IFLA_BAREUDP_PORT] || !data[IFLA_BAREUDP_ETHERTYPE]) + return -EINVAL; + + if (data[IFLA_BAREUDP_PORT]) + conf->port = nla_get_u16(data[IFLA_BAREUDP_PORT]); + + if (data[IFLA_BAREUDP_ETHERTYPE]) + conf->ethertype = nla_get_u16(data[IFLA_BAREUDP_ETHERTYPE]); + + if (data[IFLA_BAREUDP_SRCPORT_MIN]) + conf->sport_min = nla_get_u16(data[IFLA_BAREUDP_SRCPORT_MIN]); + + return 0; +} + +static struct bareudp_dev *bareudp_find_dev(struct bareudp_net *bn, + const struct bareudp_conf *conf) +{ + struct bareudp_dev *bareudp, *t = NULL; + + list_for_each_entry(bareudp, &bn->bareudp_list, next) { + if (conf->port == bareudp->port) + t = bareudp; + } + return t; +} + +static int bareudp_configure(struct net *net, struct net_device *dev, + struct bareudp_conf *conf) +{ + struct bareudp_net *bn = net_generic(net, bareudp_net_id); + struct bareudp_dev *t, *bareudp = netdev_priv(dev); + int err; + + bareudp->net = net; + bareudp->dev = dev; + t = bareudp_find_dev(bn, conf); + if (t) + return -EBUSY; + + bareudp->port = conf->port; + bareudp->ethertype = conf->ethertype; + bareudp->sport_min = conf->sport_min; + err = register_netdevice(dev); + if (err) + return err; + + list_add(&bareudp->next, &bn->bareudp_list); + return 0; +} + +static int bareudp_link_config(struct net_device *dev, + struct nlattr *tb[]) +{ + int err; + + if (tb[IFLA_MTU]) { + err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + if (err) + return err; + } + return 0; +} + +static int bareudp_newlink(struct net *net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct bareudp_conf conf; + int err; + + err = bareudp2info(data, &conf); + if (err) + return err; + + err = bareudp_configure(net, dev, &conf); + if (err) + return err; + + err = bareudp_link_config(dev, tb); + if (err) + return err; + + return 0; +} + +static void bareudp_dellink(struct net_device *dev, struct list_head *head) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + + list_del(&bareudp->next); + unregister_netdevice_queue(dev, head); +} + +static size_t bareudp_get_size(const struct net_device *dev) +{ + return nla_total_size(sizeof(__be16)) + /* IFLA_BAREUDP_PORT */ + nla_total_size(sizeof(__be16)) + /* IFLA_BAREUDP_ETHERTYPE */ + nla_total_size(sizeof(__u16)) + /* IFLA_BAREUDP_SRCPORT_MIN */ + 0; +} + +static int bareudp_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + + if (nla_put_be16(skb, IFLA_BAREUDP_PORT, bareudp->port)) + goto nla_put_failure; + if (nla_put_be16(skb, IFLA_BAREUDP_ETHERTYPE, bareudp->ethertype)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_BAREUDP_SRCPORT_MIN, bareudp->sport_min)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops bareudp_link_ops __read_mostly = { + .kind = "bareudp", + .maxtype = IFLA_BAREUDP_MAX, + .policy = bareudp_policy, + .priv_size = sizeof(struct bareudp_dev), + .setup = bareudp_setup, + .validate = bareudp_validate, + .newlink = bareudp_newlink, + .dellink = bareudp_dellink, + .get_size = bareudp_get_size, + .fill_info = bareudp_fill_info, +}; + +struct net_device *bareudp_dev_create(struct net *net, const char *name, + u8 name_assign_type, + struct bareudp_conf *conf) +{ + struct nlattr *tb[IFLA_MAX + 1]; + struct net_device *dev; + LIST_HEAD(list_kill); + int err; + + memset(tb, 0, sizeof(tb)); + dev = rtnl_create_link(net, name, name_assign_type, + &bareudp_link_ops, tb, NULL); + if (IS_ERR(dev)) + return dev; + + err = bareudp_configure(net, dev, conf); + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } + err = dev_set_mtu(dev, IP_MAX_MTU - BAREUDP_BASE_HLEN); + if (err) + goto err; + + err = rtnl_configure_link(dev, NULL); + if (err < 0) + goto err; + + return dev; +err: + bareudp_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(bareudp_dev_create); + +static __net_init int bareudp_init_net(struct net *net) +{ + struct bareudp_net *bn = net_generic(net, bareudp_net_id); + + INIT_LIST_HEAD(&bn->bareudp_list); + return 0; +} + +static void bareudp_destroy_tunnels(struct net *net, struct list_head *head) +{ + struct bareudp_net *bn = net_generic(net, bareudp_net_id); + struct bareudp_dev *bareudp, *next; + + list_for_each_entry_safe(bareudp, next, &bn->bareudp_list, next) + unregister_netdevice_queue(bareudp->dev, head); +} + +static void __net_exit bareudp_exit_batch_net(struct list_head *net_list) +{ + struct net *net; + LIST_HEAD(list); + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) + bareudp_destroy_tunnels(net, &list); + + /* unregister the devices gathered above */ + unregister_netdevice_many(&list); + rtnl_unlock(); +} + +static struct pernet_operations bareudp_net_ops = { + .init = bareudp_init_net, + .exit_batch = bareudp_exit_batch_net, + .id = &bareudp_net_id, + .size = sizeof(struct bareudp_net), +}; + +static int __init bareudp_init_module(void) +{ + int rc; + + rc = register_pernet_subsys(&bareudp_net_ops); + if (rc) + goto out1; + + rc = rtnl_link_register(&bareudp_link_ops); + if (rc) + goto out2; + + return 0; +out2: + unregister_pernet_subsys(&bareudp_net_ops); +out1: + return rc; +} +late_initcall(bareudp_init_module); + +static void __exit bareudp_cleanup_module(void) +{ + rtnl_link_unregister(&bareudp_link_ops); + unregister_pernet_subsys(&bareudp_net_ops); +} +module_exit(bareudp_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Martin Varghese "); +MODULE_DESCRIPTION("Interface driver for UDP encapsulated traffic"); diff --git a/include/net/bareudp.h b/include/net/bareudp.h new file mode 100644 index 000000000000..513fae6f6ba1 --- /dev/null +++ b/include/net/bareudp.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __NET_BAREUDP_H +#define __NET_BAREUDP_H + +#include +#include + +struct bareudp_conf { + __be16 ethertype; + __be16 port; + u16 sport_min; +}; + +struct net_device *bareudp_dev_create(struct net *net, const char *name, + u8 name_assign_type, + struct bareudp_conf *info); + +#endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cec1a54401f2..1bf8065fe871 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1027,6 +1027,12 @@ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, st struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool connected); +struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, + struct net_device *dev, + struct net *net, struct socket *sock, + struct in6_addr *saddr, + const struct ip_tunnel_info *info, + u8 protocol, bool use_cache); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *orig_dst); diff --git a/include/net/route.h b/include/net/route.h index a9c60fc68e36..81750ae50833 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -128,6 +128,12 @@ static inline struct rtable *__ip_route_output_key(struct net *net, struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, const struct sock *sk); +struct rtable *ip_route_output_tunnel(struct sk_buff *skb, + struct net_device *dev, + struct net *net, __be32 *saddr, + const struct ip_tunnel_info *info, + u8 protocol, bool use_cache); + struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 024af2d1d0af..fb4b33af23d5 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -590,6 +590,17 @@ enum ifla_geneve_df { GENEVE_DF_MAX = __GENEVE_DF_END - 1, }; +/* Bareudp section */ +enum { + IFLA_BAREUDP_UNSPEC, + IFLA_BAREUDP_PORT, + IFLA_BAREUDP_ETHERTYPE, + IFLA_BAREUDP_SRCPORT_MIN, + __IFLA_BAREUDP_MAX +}; + +#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1) + /* PPP section */ enum { IFLA_PPP_UNSPEC, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ebe7060d0fc9..042599cc691d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2774,6 +2774,54 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); +struct rtable *ip_route_output_tunnel(struct sk_buff *skb, + struct net_device *dev, + struct net *net, __be32 *saddr, + const struct ip_tunnel_info *info, + u8 protocol, bool use_cache) +{ +#ifdef CONFIG_DST_CACHE + struct dst_cache *dst_cache; +#endif + struct rtable *rt = NULL; + struct flowi4 fl4; + __u8 tos; + +#ifdef CONFIG_DST_CACHE + dst_cache = (struct dst_cache *)&info->dst_cache; + if (use_cache) { + rt = dst_cache_get_ip4(dst_cache, saddr); + if (rt) + return rt; + } +#endif + memset(&fl4, 0, sizeof(fl4)); + fl4.flowi4_mark = skb->mark; + fl4.flowi4_proto = protocol; + fl4.daddr = info->key.u.ipv4.dst; + fl4.saddr = info->key.u.ipv4.src; + tos = info->key.tos; + fl4.flowi4_tos = RT_TOS(tos); + + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); + return ERR_PTR(-ENETUNREACH); + } + if (rt->dst.dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); + ip_rt_put(rt); + return ERR_PTR(-ELOOP); + } +#ifdef CONFIG_DST_CACHE + if (use_cache) + dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); +#endif + *saddr = fl4.saddr; + return rt; +} +EXPORT_SYMBOL_GPL(ip_route_output_tunnel); + /* called with rcu_read_lock held */ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, struct rtable *rt, u32 table_id, struct flowi4 *fl4, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 087304427bbb..8a8c2d0cfcc8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -54,6 +54,7 @@ #include #include #include +#include static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -1196,6 +1197,75 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); +/** + * ip6_dst_lookup_tunnel - perform route lookup on tunnel + * @skb: Packet for which lookup is done + * @dev: Tunnel device + * @net: Network namespace of tunnel device + * @sk: Socket which provides route info + * @saddr: Memory to store the src ip address + * @info: Tunnel information + * @protocol: IP protocol + * @use_cahce: Flag to enable cache usage + * This function performs a route lookup on a tunnel + * + * It returns a valid dst pointer and stores src address to be used in + * tunnel in param saddr on success, else a pointer encoded error code. + */ + +struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, + struct net_device *dev, + struct net *net, + struct socket *sock, + struct in6_addr *saddr, + const struct ip_tunnel_info *info, + u8 protocol, + bool use_cache) +{ + struct dst_entry *dst = NULL; +#ifdef CONFIG_DST_CACHE + struct dst_cache *dst_cache; +#endif + struct flowi6 fl6; + __u8 prio; + +#ifdef CONFIG_DST_CACHE + dst_cache = (struct dst_cache *)&info->dst_cache; + if (use_cache) { + dst = dst_cache_get_ip6(dst_cache, saddr); + if (dst) + return dst; + } +#endif + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = protocol; + fl6.daddr = info->key.u.ipv6.dst; + fl6.saddr = info->key.u.ipv6.src; + prio = info->key.tos; + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio), + info->key.label); + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, + NULL); + if (IS_ERR(dst)) { + netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); + return ERR_PTR(-ENETUNREACH); + } + if (dst->dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); + dst_release(dst); + return ERR_PTR(-ELOOP); + } +#ifdef CONFIG_DST_CACHE + if (use_cache) + dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); +#endif + *saddr = fl6.saddr; + return dst; +} +EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); + static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { -- cgit v1.2.3 From 13ef6ae8c0d96c00476ab9c81671e39a9f714e2b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Feb 2020 17:35:53 +0000 Subject: net: qrtr: fix spelling mistake "serivce" -> "service" There is a spelling mistake in a pr_err message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/qrtr/ns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 67a4e59cdf4d..7bfde01f4e8a 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -150,7 +150,7 @@ static int service_announce_del(struct sockaddr_qrtr *dest, ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); if (ret < 0) - pr_err("failed to announce del serivce\n"); + pr_err("failed to announce del service\n"); return ret; } -- cgit v1.2.3 From 3d9f773cf2876c01a505b9fe27270901d464e90a Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 24 Feb 2020 15:01:43 +0100 Subject: bpf: Use bpf_prog_run_pin_on_cpu() at simple call sites. All of these cases are strictly of the form: preempt_disable(); BPF_PROG_RUN(...); preempt_enable(); Replace this with bpf_prog_run_pin_on_cpu() which wraps BPF_PROG_RUN() with: migrate_disable(); BPF_PROG_RUN(...); migrate_enable(); On non RT enabled kernels this maps to preempt_disable/enable() and on RT enabled kernels this solely prevents migration, which is sufficient as there is no requirement to prevent reentrancy to any BPF program from a preempting task. The only requirement is that the program stays on the same CPU. Therefore, this is a trivially correct transformation. The seccomp loop does not need protection over the loop. It only needs protection per BPF filter program [ tglx: Converted to bpf_prog_run_pin_on_cpu() ] Signed-off-by: David S. Miller Signed-off-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200224145643.691493094@linutronix.de --- include/linux/filter.h | 4 +--- kernel/seccomp.c | 4 +--- net/core/flow_dissector.c | 4 +--- net/core/skmsg.c | 8 ++------ net/kcm/kcmsock.c | 4 +--- 5 files changed, 6 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1982a52eb4c9..9270de2a0df8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -717,9 +717,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, if (unlikely(prog->cb_access)) memset(cb_data, 0, BPF_SKB_CB_LEN); - preempt_disable(); - res = BPF_PROG_RUN(prog, skb); - preempt_enable(); + res = bpf_prog_run_pin_on_cpu(prog, skb); return res; } diff --git a/kernel/seccomp.c b/kernel/seccomp.c index b6ea3dcb57bf..787041eb011b 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -268,16 +268,14 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). */ - preempt_disable(); for (; f; f = f->prev) { - u32 cur_ret = BPF_PROG_RUN(f->prog, sd); + u32 cur_ret = bpf_prog_run_pin_on_cpu(f->prog, sd); if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) { ret = cur_ret; *match = f; } } - preempt_enable(); return ret; } #endif /* CONFIG_SECCOMP_FILTER */ diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index a1670dff0629..3eff84824c8b 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -920,9 +920,7 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP); flow_keys->flags = flags; - preempt_disable(); - result = BPF_PROG_RUN(prog, ctx); - preempt_enable(); + result = bpf_prog_run_pin_on_cpu(prog, ctx); flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen); flow_keys->thoff = clamp_t(u16, flow_keys->thoff, diff --git a/net/core/skmsg.c b/net/core/skmsg.c index eeb28cb85664..c479372f2cd2 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -628,7 +628,6 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, struct bpf_prog *prog; int ret; - preempt_disable(); rcu_read_lock(); prog = READ_ONCE(psock->progs.msg_parser); if (unlikely(!prog)) { @@ -638,7 +637,7 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, sk_msg_compute_data_pointers(msg); msg->sk = sk; - ret = BPF_PROG_RUN(prog, msg); + ret = bpf_prog_run_pin_on_cpu(prog, msg); ret = sk_psock_map_verd(ret, msg->sk_redir); psock->apply_bytes = msg->apply_bytes; if (ret == __SK_REDIRECT) { @@ -653,7 +652,6 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, } out: rcu_read_unlock(); - preempt_enable(); return ret; } EXPORT_SYMBOL_GPL(sk_psock_msg_verdict); @@ -665,9 +663,7 @@ static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog, skb->sk = psock->sk; bpf_compute_data_end_sk_skb(skb); - preempt_disable(); - ret = BPF_PROG_RUN(prog, skb); - preempt_enable(); + ret = bpf_prog_run_pin_on_cpu(prog, skb); /* strparser clones the skb before handing it to a upper layer, * meaning skb_orphan has been called. We NULL sk on the way out * to ensure we don't trigger a BUG_ON() in skb/sk operations diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index ea9e73428ed9..56fac24a627a 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -380,9 +380,7 @@ static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb) struct bpf_prog *prog = psock->bpf_prog; int res; - preempt_disable(); - res = BPF_PROG_RUN(prog, skb); - preempt_enable(); + res = bpf_prog_run_pin_on_cpu(prog, skb); return res; } -- cgit v1.2.3 From 6eac7795e8ef75de062c8f5bdb9520c9f0f065fa Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 24 Feb 2020 15:01:44 +0100 Subject: bpf/tests: Use migrate disable instead of preempt disable Replace the preemption disable/enable with migrate_disable/enable() to reflect the actual requirement and to allow PREEMPT_RT to substitute it with an actual migration disable mechanism which does not disable preemption. [ tglx: Switched it over to migrate disable ] Signed-off-by: David S. Miller Signed-off-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200224145643.785306549@linutronix.de --- lib/test_bpf.c | 4 ++-- net/bpf/test_run.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/lib/test_bpf.c b/lib/test_bpf.c index cecb230833be..a5fddf9ebcb7 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -6660,14 +6660,14 @@ static int __run_one(const struct bpf_prog *fp, const void *data, u64 start, finish; int ret = 0, i; - preempt_disable(); + migrate_disable(); start = ktime_get_ns(); for (i = 0; i < runs; i++) ret = BPF_PROG_RUN(fp, data); finish = ktime_get_ns(); - preempt_enable(); + migrate_enable(); *duration = finish - start; do_div(*duration, runs); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index d555c0d8657d..562443f94133 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -37,7 +37,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, repeat = 1; rcu_read_lock(); - preempt_disable(); + migrate_disable(); time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { bpf_cgroup_storage_set(storage); @@ -54,18 +54,18 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, if (need_resched()) { time_spent += ktime_get_ns() - time_start; - preempt_enable(); + migrate_enable(); rcu_read_unlock(); cond_resched(); rcu_read_lock(); - preempt_disable(); + migrate_disable(); time_start = ktime_get_ns(); } } time_spent += ktime_get_ns() - time_start; - preempt_enable(); + migrate_enable(); rcu_read_unlock(); do_div(time_spent, repeat); -- cgit v1.2.3 From 2008495d81159a66de4dc3ee4252a5fc60294a82 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 25 Feb 2020 11:45:18 +0100 Subject: flow_offload: pass action cookie through offload structures Extend struct flow_action_entry in order to hold TC action cookie specified by user inserting the action. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/flow_offload.h | 11 +++++++++++ net/core/flow_offload.c | 21 +++++++++++++++++++++ net/sched/cls_api.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 62 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index c6f7bd22db60..4e864c34a1b0 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -156,6 +156,16 @@ enum flow_action_mangle_base { typedef void (*action_destr)(void *priv); +struct flow_action_cookie { + u32 cookie_len; + u8 cookie[]; +}; + +struct flow_action_cookie *flow_action_cookie_create(void *data, + unsigned int len, + gfp_t gfp); +void flow_action_cookie_destroy(struct flow_action_cookie *cookie); + struct flow_action_entry { enum flow_action_id id; action_destr destructor; @@ -214,6 +224,7 @@ struct flow_action_entry { u8 ttl; } mpls_mangle; }; + struct flow_action_cookie *cookie; /* user defined action cookie */ }; struct flow_action { diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 45b6a59ac124..d21348202ba6 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -167,6 +167,27 @@ void flow_rule_match_enc_opts(const struct flow_rule *rule, } EXPORT_SYMBOL(flow_rule_match_enc_opts); +struct flow_action_cookie *flow_action_cookie_create(void *data, + unsigned int len, + gfp_t gfp) +{ + struct flow_action_cookie *cookie; + + cookie = kmalloc(sizeof(*cookie) + len, gfp); + if (!cookie) + return NULL; + cookie->cookie_len = len; + memcpy(cookie->cookie, data, len); + return cookie; +} +EXPORT_SYMBOL(flow_action_cookie_create); + +void flow_action_cookie_destroy(struct flow_action_cookie *cookie) +{ + kfree(cookie); +} +EXPORT_SYMBOL(flow_action_cookie_destroy); + struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 13c33eaf1ca1..4e766c5ab77a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3382,14 +3382,40 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, } EXPORT_SYMBOL(tc_setup_cb_reoffload); +static int tcf_act_get_cookie(struct flow_action_entry *entry, + const struct tc_action *act) +{ + struct tc_cookie *cookie; + int err = 0; + + rcu_read_lock(); + cookie = rcu_dereference(act->act_cookie); + if (cookie) { + entry->cookie = flow_action_cookie_create(cookie->data, + cookie->len, + GFP_ATOMIC); + if (!entry->cookie) + err = -ENOMEM; + } + rcu_read_unlock(); + return err; +} + +static void tcf_act_put_cookie(struct flow_action_entry *entry) +{ + flow_action_cookie_destroy(entry->cookie); +} + void tc_cleanup_flow_action(struct flow_action *flow_action) { struct flow_action_entry *entry; int i; - flow_action_for_each(i, entry, flow_action) + flow_action_for_each(i, entry, flow_action) { + tcf_act_put_cookie(entry); if (entry->destructor) entry->destructor(entry->destructor_priv); + } } EXPORT_SYMBOL(tc_cleanup_flow_action); @@ -3447,6 +3473,9 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry = &flow_action->entries[j]; spin_lock_bh(&act->tcfa_lock); + err = tcf_act_get_cookie(entry, act); + if (err) + goto err_out_locked; if (is_tcf_gact_ok(act)) { entry->id = FLOW_ACTION_ACCEPT; } else if (is_tcf_gact_shot(act)) { -- cgit v1.2.3 From 85b0589ede83d7b4aeb2bc3cb8910183876cd5ee Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 25 Feb 2020 11:45:19 +0100 Subject: devlink: add trap metadata type for cookie Allow driver to indicate cookie metadata for registered traps. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/devlink.h | 1 + include/uapi/linux/devlink.h | 2 ++ net/core/devlink.c | 3 +++ 3 files changed, 6 insertions(+) (limited to 'net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 07923e619206..014a8b3d1499 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -541,6 +541,7 @@ struct devlink_trap_group { }; #define DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT BIT(0) +#define DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE BIT(1) /** * struct devlink_trap - Immutable packet trap attributes. diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ae37fd4d194a..be2a2948f452 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -252,6 +252,8 @@ enum devlink_trap_type { enum { /* Trap can report input port as metadata */ DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT, + /* Trap can report flow action cookie as metadata */ + DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE, }; enum devlink_attr { diff --git a/net/core/devlink.c b/net/core/devlink.c index 0d7c5d3443d2..12e6ef749b8a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -5540,6 +5540,9 @@ static int devlink_trap_metadata_put(struct sk_buff *msg, if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) && nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT)) goto nla_put_failure; + if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE) && + nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE)) + goto nla_put_failure; nla_nest_end(msg, attr); -- cgit v1.2.3 From 742b8cceaabc3ed09d4c4d527d9c6311c4925545 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 25 Feb 2020 11:45:20 +0100 Subject: drop_monitor: extend by passing cookie from driver If driver passed along the cookie, push it through Netlink. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/drop_monitor.h | 3 +++ include/uapi/linux/net_dropmon.h | 1 + net/core/drop_monitor.c | 33 ++++++++++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/drop_monitor.h b/include/net/drop_monitor.h index 2ab668461463..ddd441a60e03 100644 --- a/include/net/drop_monitor.h +++ b/include/net/drop_monitor.h @@ -6,17 +6,20 @@ #include #include #include +#include /** * struct net_dm_hw_metadata - Hardware-supplied packet metadata. * @trap_group_name: Hardware trap group name. * @trap_name: Hardware trap name. * @input_dev: Input netdevice. + * @fa_cookie: Flow action user cookie. */ struct net_dm_hw_metadata { const char *trap_group_name; const char *trap_name; struct net_device *input_dev; + const struct flow_action_cookie *fa_cookie; }; #if IS_ENABLED(CONFIG_NET_DROP_MONITOR) diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h index 8bf79a9eb234..66048cc5d7b3 100644 --- a/include/uapi/linux/net_dropmon.h +++ b/include/uapi/linux/net_dropmon.h @@ -92,6 +92,7 @@ enum net_dm_attr { NET_DM_ATTR_HW_TRAP_COUNT, /* u32 */ NET_DM_ATTR_SW_DROPS, /* flag */ NET_DM_ATTR_HW_DROPS, /* flag */ + NET_DM_ATTR_FLOW_ACTION_COOKIE, /* binary */ __NET_DM_ATTR_MAX, NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1 diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 31700e0c3928..d58c1c45a895 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -700,6 +701,13 @@ static void net_dm_packet_work(struct work_struct *work) net_dm_packet_report(skb); } +static size_t +net_dm_flow_action_cookie_size(const struct net_dm_hw_metadata *hw_metadata) +{ + return hw_metadata->fa_cookie ? + nla_total_size(hw_metadata->fa_cookie->cookie_len) : 0; +} + static size_t net_dm_hw_packet_report_size(size_t payload_len, const struct net_dm_hw_metadata *hw_metadata) @@ -717,6 +725,8 @@ net_dm_hw_packet_report_size(size_t payload_len, nla_total_size(strlen(hw_metadata->trap_name) + 1) + /* NET_DM_ATTR_IN_PORT */ net_dm_in_port_size() + + /* NET_DM_ATTR_FLOW_ACTION_COOKIE */ + net_dm_flow_action_cookie_size(hw_metadata) + /* NET_DM_ATTR_TIMESTAMP */ nla_total_size(sizeof(u64)) + /* NET_DM_ATTR_ORIG_LEN */ @@ -762,6 +772,12 @@ static int net_dm_hw_packet_report_fill(struct sk_buff *msg, goto nla_put_failure; } + if (hw_metadata->fa_cookie && + nla_put(msg, NET_DM_ATTR_FLOW_ACTION_COOKIE, + hw_metadata->fa_cookie->cookie_len, + hw_metadata->fa_cookie->cookie)) + goto nla_put_failure; + if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP, ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD)) goto nla_put_failure; @@ -794,11 +810,12 @@ nla_put_failure: static struct net_dm_hw_metadata * net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata) { + const struct flow_action_cookie *fa_cookie; struct net_dm_hw_metadata *n_hw_metadata; const char *trap_group_name; const char *trap_name; - n_hw_metadata = kmalloc(sizeof(*hw_metadata), GFP_ATOMIC); + n_hw_metadata = kzalloc(sizeof(*hw_metadata), GFP_ATOMIC); if (!n_hw_metadata) return NULL; @@ -812,12 +829,25 @@ net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata) goto free_trap_group; n_hw_metadata->trap_name = trap_name; + if (hw_metadata->fa_cookie) { + size_t cookie_size = sizeof(*fa_cookie) + + hw_metadata->fa_cookie->cookie_len; + + fa_cookie = kmemdup(hw_metadata->fa_cookie, cookie_size, + GFP_ATOMIC); + if (!fa_cookie) + goto free_trap_name; + n_hw_metadata->fa_cookie = fa_cookie; + } + n_hw_metadata->input_dev = hw_metadata->input_dev; if (n_hw_metadata->input_dev) dev_hold(n_hw_metadata->input_dev); return n_hw_metadata; +free_trap_name: + kfree(trap_name); free_trap_group: kfree(trap_group_name); free_hw_metadata: @@ -830,6 +860,7 @@ net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata) { if (hw_metadata->input_dev) dev_put(hw_metadata->input_dev); + kfree(hw_metadata->fa_cookie); kfree(hw_metadata->trap_name); kfree(hw_metadata->trap_group_name); kfree(hw_metadata); -- cgit v1.2.3 From 5a2e106c746d2740f425cce3ac039321c924dc85 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 25 Feb 2020 11:45:21 +0100 Subject: devlink: extend devlink_trap_report() to accept cookie and pass Add cookie argument to devlink_trap_report() allowing driver to pass in the user cookie. Pass on the cookie down to drop monitor code. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 4 ++-- drivers/net/netdevsim/dev.c | 2 +- include/net/devlink.h | 7 ++++--- net/core/devlink.c | 11 ++++++++--- 4 files changed, 15 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 04f2445f6d43..a55577a50e90 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -71,7 +71,7 @@ static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, local_port); skb_push(skb, ETH_HLEN); - devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port, NULL); consume_skb(skb); } @@ -95,7 +95,7 @@ static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port, in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, local_port); skb_push(skb, ETH_HLEN); - devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port, NULL); skb_pull(skb, ETH_HLEN); skb->offload_fwd_mark = 1; netif_receive_skb(skb); diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index d7706a0346f2..aa17533c06e1 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -385,7 +385,7 @@ static void nsim_dev_trap_report(struct nsim_dev_port *nsim_dev_port) */ local_bh_disable(); devlink_trap_report(devlink, skb, nsim_trap_item->trap_ctx, - &nsim_dev_port->devlink_port); + &nsim_dev_port->devlink_port, NULL); local_bh_enable(); consume_skb(skb); } diff --git a/include/net/devlink.h b/include/net/devlink.h index 014a8b3d1499..c9ca86b054bc 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -16,6 +16,7 @@ #include #include #include +#include #include struct devlink_ops; @@ -1050,9 +1051,9 @@ int devlink_traps_register(struct devlink *devlink, void devlink_traps_unregister(struct devlink *devlink, const struct devlink_trap *traps, size_t traps_count); -void devlink_trap_report(struct devlink *devlink, - struct sk_buff *skb, void *trap_ctx, - struct devlink_port *in_devlink_port); +void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb, + void *trap_ctx, struct devlink_port *in_devlink_port, + const struct flow_action_cookie *fa_cookie); void *devlink_trap_ctx_priv(void *trap_ctx); #if IS_ENABLED(CONFIG_NET_DEVLINK) diff --git a/net/core/devlink.c b/net/core/devlink.c index 12e6ef749b8a..49706031ab45 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -8205,12 +8205,14 @@ devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats, static void devlink_trap_report_metadata_fill(struct net_dm_hw_metadata *hw_metadata, const struct devlink_trap_item *trap_item, - struct devlink_port *in_devlink_port) + struct devlink_port *in_devlink_port, + const struct flow_action_cookie *fa_cookie) { struct devlink_trap_group_item *group_item = trap_item->group_item; hw_metadata->trap_group_name = group_item->group->name; hw_metadata->trap_name = trap_item->trap->name; + hw_metadata->fa_cookie = fa_cookie; spin_lock(&in_devlink_port->type_lock); if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH) @@ -8224,9 +8226,12 @@ devlink_trap_report_metadata_fill(struct net_dm_hw_metadata *hw_metadata, * @skb: Trapped packet. * @trap_ctx: Trap context. * @in_devlink_port: Input devlink port. + * @fa_cookie: Flow action cookie. Could be NULL. */ void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb, - void *trap_ctx, struct devlink_port *in_devlink_port) + void *trap_ctx, struct devlink_port *in_devlink_port, + const struct flow_action_cookie *fa_cookie) + { struct devlink_trap_item *trap_item = trap_ctx; struct net_dm_hw_metadata hw_metadata = {}; @@ -8235,7 +8240,7 @@ void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb, devlink_trap_stats_update(trap_item->group_item->stats, skb->len); devlink_trap_report_metadata_fill(&hw_metadata, trap_item, - in_devlink_port); + in_devlink_port, fa_cookie); net_dm_hw_report(skb, &hw_metadata); } EXPORT_SYMBOL_GPL(devlink_trap_report); -- cgit v1.2.3 From e6dee9f3893c823dff9c7f33fe0a598ee25c78f7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:17 +0100 Subject: net-sysfs: add netdev_change_owner() Add a function to change the owner of a network device when it is moved between network namespaces. Currently, when moving network devices between network namespaces the ownership of the corresponding sysfs entries is not changed. This leads to problems when tools try to operate on the corresponding sysfs files. This leads to a bug whereby a network device that is created in a network namespaces owned by a user namespace will have its corresponding sysfs entry owned by the root user of the corresponding user namespace. If such a network device has to be moved back to the host network namespace the permissions will still be set to the user namespaces. This means unprivileged users can e.g. trigger uevents for such incorrectly owned devices. They can also modify the settings of the device itself. Both of these things are unwanted. For example, workloads will create network devices in the host network namespace. Other tools will then proceed to move such devices between network namespaces owner by other user namespaces. While the ownership of the device itself is updated in net/core/net-sysfs.c:dev_change_net_namespace() the corresponding sysfs entry for the device is not: drwxr-xr-x 5 nobody nobody 0 Jan 25 18:08 . drwxr-xr-x 9 nobody nobody 0 Jan 25 18:08 .. -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 addr_assign_type -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 addr_len -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 address -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 broadcast -rw-r--r-- 1 nobody nobody 4096 Jan 25 18:09 carrier -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 carrier_changes -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 carrier_down_count -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 carrier_up_count -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 dev_id -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 dev_port -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 dormant -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 duplex -rw-r--r-- 1 nobody nobody 4096 Jan 25 18:09 flags -rw-r--r-- 1 nobody nobody 4096 Jan 25 18:09 gro_flush_timeout -rw-r--r-- 1 nobody nobody 4096 Jan 25 18:09 ifalias -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 ifindex -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 iflink -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 link_mode -rw-r--r-- 1 nobody nobody 4096 Jan 25 18:09 mtu -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 name_assign_type -rw-r--r-- 1 nobody nobody 4096 Jan 25 18:09 netdev_group -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 operstate -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 phys_port_id -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 phys_port_name -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 phys_switch_id drwxr-xr-x 2 nobody nobody 0 Jan 25 18:09 power -rw-r--r-- 1 nobody nobody 4096 Jan 25 18:09 proto_down drwxr-xr-x 4 nobody nobody 0 Jan 25 18:09 queues -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 speed drwxr-xr-x 2 nobody nobody 0 Jan 25 18:09 statistics lrwxrwxrwx 1 nobody nobody 0 Jan 25 18:08 subsystem -> ../../../../class/net -rw-r--r-- 1 nobody nobody 4096 Jan 25 18:09 tx_queue_len -r--r--r-- 1 nobody nobody 4096 Jan 25 18:09 type -rw-r--r-- 1 nobody nobody 4096 Jan 25 18:08 uevent However, if a device is created directly in the network namespace then the device's sysfs permissions will be correctly updated: drwxr-xr-x 5 root root 0 Jan 25 18:12 . drwxr-xr-x 9 nobody nobody 0 Jan 25 18:08 .. -r--r--r-- 1 root root 4096 Jan 25 18:12 addr_assign_type -r--r--r-- 1 root root 4096 Jan 25 18:12 addr_len -r--r--r-- 1 root root 4096 Jan 25 18:12 address -r--r--r-- 1 root root 4096 Jan 25 18:12 broadcast -rw-r--r-- 1 root root 4096 Jan 25 18:12 carrier -r--r--r-- 1 root root 4096 Jan 25 18:12 carrier_changes -r--r--r-- 1 root root 4096 Jan 25 18:12 carrier_down_count -r--r--r-- 1 root root 4096 Jan 25 18:12 carrier_up_count -r--r--r-- 1 root root 4096 Jan 25 18:12 dev_id -r--r--r-- 1 root root 4096 Jan 25 18:12 dev_port -r--r--r-- 1 root root 4096 Jan 25 18:12 dormant -r--r--r-- 1 root root 4096 Jan 25 18:12 duplex -rw-r--r-- 1 root root 4096 Jan 25 18:12 flags -rw-r--r-- 1 root root 4096 Jan 25 18:12 gro_flush_timeout -rw-r--r-- 1 root root 4096 Jan 25 18:12 ifalias -r--r--r-- 1 root root 4096 Jan 25 18:12 ifindex -r--r--r-- 1 root root 4096 Jan 25 18:12 iflink -r--r--r-- 1 root root 4096 Jan 25 18:12 link_mode -rw-r--r-- 1 root root 4096 Jan 25 18:12 mtu -r--r--r-- 1 root root 4096 Jan 25 18:12 name_assign_type -rw-r--r-- 1 root root 4096 Jan 25 18:12 netdev_group -r--r--r-- 1 root root 4096 Jan 25 18:12 operstate -r--r--r-- 1 root root 4096 Jan 25 18:12 phys_port_id -r--r--r-- 1 root root 4096 Jan 25 18:12 phys_port_name -r--r--r-- 1 root root 4096 Jan 25 18:12 phys_switch_id drwxr-xr-x 2 root root 0 Jan 25 18:12 power -rw-r--r-- 1 root root 4096 Jan 25 18:12 proto_down drwxr-xr-x 4 root root 0 Jan 25 18:12 queues -r--r--r-- 1 root root 4096 Jan 25 18:12 speed drwxr-xr-x 2 root root 0 Jan 25 18:12 statistics lrwxrwxrwx 1 nobody nobody 0 Jan 25 18:12 subsystem -> ../../../../class/net -rw-r--r-- 1 root root 4096 Jan 25 18:12 tx_queue_len -r--r--r-- 1 root root 4096 Jan 25 18:12 type -rw-r--r-- 1 root root 4096 Jan 25 18:12 uevent Now, when creating a network device in a network namespace owned by a user namespace and moving it to the host the permissions will be set to the id that the user namespace root user has been mapped to on the host leading to all sorts of permission issues: 458752 drwxr-xr-x 5 458752 458752 0 Jan 25 18:12 . drwxr-xr-x 9 root root 0 Jan 25 18:08 .. -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 addr_assign_type -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 addr_len -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 address -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 broadcast -rw-r--r-- 1 458752 458752 4096 Jan 25 18:12 carrier -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 carrier_changes -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 carrier_down_count -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 carrier_up_count -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 dev_id -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 dev_port -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 dormant -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 duplex -rw-r--r-- 1 458752 458752 4096 Jan 25 18:12 flags -rw-r--r-- 1 458752 458752 4096 Jan 25 18:12 gro_flush_timeout -rw-r--r-- 1 458752 458752 4096 Jan 25 18:12 ifalias -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 ifindex -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 iflink -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 link_mode -rw-r--r-- 1 458752 458752 4096 Jan 25 18:12 mtu -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 name_assign_type -rw-r--r-- 1 458752 458752 4096 Jan 25 18:12 netdev_group -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 operstate -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 phys_port_id -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 phys_port_name -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 phys_switch_id drwxr-xr-x 2 458752 458752 0 Jan 25 18:12 power -rw-r--r-- 1 458752 458752 4096 Jan 25 18:12 proto_down drwxr-xr-x 4 458752 458752 0 Jan 25 18:12 queues -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 speed drwxr-xr-x 2 458752 458752 0 Jan 25 18:12 statistics lrwxrwxrwx 1 root root 0 Jan 25 18:12 subsystem -> ../../../../class/net -rw-r--r-- 1 458752 458752 4096 Jan 25 18:12 tx_queue_len -r--r--r-- 1 458752 458752 4096 Jan 25 18:12 type -rw-r--r-- 1 458752 458752 4096 Jan 25 18:12 uevent Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 27 +++++++++++++++++++++++++++ net/core/net-sysfs.h | 2 ++ 2 files changed, 29 insertions(+) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4c826b8bf9b1..e19967665cb0 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1767,6 +1767,33 @@ int netdev_register_kobject(struct net_device *ndev) return error; } +/* Change owner for sysfs entries when moving network devices across network + * namespaces owned by different user namespaces. + */ +int netdev_change_owner(struct net_device *ndev, const struct net *net_old, + const struct net *net_new) +{ + struct device *dev = &ndev->dev; + kuid_t old_uid, new_uid; + kgid_t old_gid, new_gid; + int error; + + net_ns_get_ownership(net_old, &old_uid, &old_gid); + net_ns_get_ownership(net_new, &new_uid, &new_gid); + + /* The network namespace was changed but the owning user namespace is + * identical so there's no need to change the owner of sysfs entries. + */ + if (uid_eq(old_uid, new_uid) && gid_eq(old_gid, new_gid)) + return 0; + + error = device_change_owner(dev, new_uid, new_gid); + if (error) + return error; + + return 0; +} + int netdev_class_create_file_ns(const struct class_attribute *class_attr, const void *ns) { diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 006876c7b78d..8a5b04c2699a 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -8,5 +8,7 @@ void netdev_unregister_kobject(struct net_device *); int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); int netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num); +int netdev_change_owner(struct net_device *, const struct net *net_old, + const struct net *net_new); #endif -- cgit v1.2.3 From d755407d4444c3e0fbd7d7c3aa666d595e9ab217 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:18 +0100 Subject: net-sysfs: add queue_change_owner() Add a function to change the owner of the queue entries for a network device when it is moved between network namespaces. Currently, when moving network devices between network namespaces the ownership of the corresponding queue sysfs entries are not changed. This leads to problems when tools try to operate on the corresponding sysfs files. Fix this. Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e19967665cb0..cf0215734ceb 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -944,6 +944,24 @@ err: kobject_put(kobj); return error; } + +static int rx_queue_change_owner(struct net_device *dev, int index, kuid_t kuid, + kgid_t kgid) +{ + struct netdev_rx_queue *queue = dev->_rx + index; + struct kobject *kobj = &queue->kobj; + int error; + + error = sysfs_change_owner(kobj, kuid, kgid); + if (error) + return error; + + if (dev->sysfs_rx_queue_group) + error = sysfs_group_change_owner( + kobj, dev->sysfs_rx_queue_group, kuid, kgid); + + return error; +} #endif /* CONFIG_SYSFS */ int @@ -981,6 +999,29 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) #endif } +static int net_rx_queue_change_owner(struct net_device *dev, int num, + kuid_t kuid, kgid_t kgid) +{ +#ifdef CONFIG_SYSFS + int error = 0; + int i; + +#ifndef CONFIG_RPS + if (!dev->sysfs_rx_queue_group) + return 0; +#endif + for (i = 0; i < num; i++) { + error = rx_queue_change_owner(dev, i, kuid, kgid); + if (error) + break; + } + + return error; +#else + return 0; +#endif +} + #ifdef CONFIG_SYSFS /* * netdev_queue sysfs structures and functions. @@ -1486,6 +1527,23 @@ err: kobject_put(kobj); return error; } + +static int tx_queue_change_owner(struct net_device *ndev, int index, + kuid_t kuid, kgid_t kgid) +{ + struct netdev_queue *queue = ndev->_tx + index; + struct kobject *kobj = &queue->kobj; + int error; + + error = sysfs_change_owner(kobj, kuid, kgid); + if (error) + return error; + +#ifdef CONFIG_BQL + error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid); +#endif + return error; +} #endif /* CONFIG_SYSFS */ int @@ -1520,6 +1578,25 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) #endif /* CONFIG_SYSFS */ } +static int net_tx_queue_change_owner(struct net_device *dev, int num, + kuid_t kuid, kgid_t kgid) +{ +#ifdef CONFIG_SYSFS + int error = 0; + int i; + + for (i = 0; i < num; i++) { + error = tx_queue_change_owner(dev, i, kuid, kgid); + if (error) + break; + } + + return error; +#else + return 0; +#endif /* CONFIG_SYSFS */ +} + static int register_queue_kobjects(struct net_device *dev) { int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; @@ -1554,6 +1631,31 @@ error: return error; } +static int queue_change_owner(struct net_device *ndev, kuid_t kuid, kgid_t kgid) +{ + int error = 0, real_rx = 0, real_tx = 0; + +#ifdef CONFIG_SYSFS + if (ndev->queues_kset) { + error = sysfs_change_owner(&ndev->queues_kset->kobj, kuid, kgid); + if (error) + return error; + } + real_rx = ndev->real_num_rx_queues; +#endif + real_tx = ndev->real_num_tx_queues; + + error = net_rx_queue_change_owner(ndev, real_rx, kuid, kgid); + if (error) + return error; + + error = net_tx_queue_change_owner(ndev, real_tx, kuid, kgid); + if (error) + return error; + + return 0; +} + static void remove_queue_kobjects(struct net_device *dev) { int real_rx = 0, real_tx = 0; @@ -1791,6 +1893,10 @@ int netdev_change_owner(struct net_device *ndev, const struct net *net_old, if (error) return error; + error = queue_change_owner(ndev, new_uid, new_gid); + if (error) + return error; + return 0; } -- cgit v1.2.3 From ef6a4c88e9e11bc32cd02b052d04745af9691412 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:19 +0100 Subject: net: fix sysfs permssions when device changes network namespace Now that we moved all the helpers in place and make use netdev_change_owner() to fixup the permissions when moving network devices between network namespaces. Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- net/core/dev.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 4770dde3448d..dbbfff123196 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10003,6 +10003,7 @@ EXPORT_SYMBOL(unregister_netdev); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) { + struct net *net_old = dev_net(dev); int err, new_nsid, new_ifindex; ASSERT_RTNL(); @@ -10018,7 +10019,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Get out if there is nothing todo */ err = 0; - if (net_eq(dev_net(dev), net)) + if (net_eq(net_old, net)) goto out; /* Pick the destination device name, and ensure @@ -10094,6 +10095,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char err = device_rename(&dev->dev, dev->name); WARN_ON(err); + /* Adapt owner in case owning user namespace of target network + * namespace is different from the original one. + */ + err = netdev_change_owner(dev, net_old, net); + WARN_ON(err); + /* Add the device back in the hashes */ list_netdevice(dev); -- cgit v1.2.3 From 0b7f41f68710ccbf7d029c749616e5d26ae8f74d Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Tue, 25 Feb 2020 12:38:54 -0800 Subject: tcp-zerocopy: Update returned getsockopt() optlen. TCP receive zerocopy currently does not update the returned optlen for getsockopt() if the user passed in a larger than expected value. Thus, userspace cannot properly determine if all the fields are set in the passed-in struct. This patch sets the optlen for this case before returning, in keeping with the expected operation of getsockopt(). Fixes: c8856c051454 ("tcp-zerocopy: Return inq along with tcp receive zerocopy.") Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1b685485a5b5..48aa457a9516 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3669,8 +3669,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; if (len < offsetofend(struct tcp_zerocopy_receive, length)) return -EINVAL; - if (len > sizeof(zc)) + if (len > sizeof(zc)) { len = sizeof(zc); + if (put_user(len, optlen)) + return -EFAULT; + } if (copy_from_user(&zc, optval, len)) return -EFAULT; lock_sock(sk); -- cgit v1.2.3 From 366bb249b58381bc415a83608859019736b23766 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Tue, 25 Feb 2020 22:41:21 +0100 Subject: net/smc: rework peer ID handling This patch initializes the peer ID to a random instance ID and a zero MAC address. If a RoCE device is in the host, the MAC address part of the peer ID is overwritten with the respective address. Also, a function for checking if the peer ID is valid is added. A peer ID is considered valid if the MAC address part contains a non-zero MAC address. Signed-off-by: Hans Wippel Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 6756bd5a3fe4..e0592d337a94 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -37,11 +37,7 @@ struct smc_ib_devices smc_ib_devices = { /* smc-registered ib devices */ .list = LIST_HEAD_INIT(smc_ib_devices.list), }; -#define SMC_LOCAL_SYSTEMID_RESET "%%%%%%%" - -u8 local_systemid[SMC_SYSTEMID_LEN] = SMC_LOCAL_SYSTEMID_RESET; /* unique system - * identifier - */ +u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */ static int smc_ib_modify_qp_init(struct smc_link *lnk) { @@ -168,6 +164,15 @@ static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev, { memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1], sizeof(smcibdev->mac[ibport - 1])); +} + +static bool smc_ib_is_valid_local_systemid(void) +{ + return !is_zero_ether_addr(&local_systemid[2]); +} + +static void smc_ib_init_local_systemid(void) +{ get_random_bytes(&local_systemid[0], 2); } @@ -224,8 +229,7 @@ static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) rc = smc_ib_fill_mac(smcibdev, ibport); if (rc) goto out; - if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET, - sizeof(local_systemid)) && + if (!smc_ib_is_valid_local_systemid() && smc_ib_port_active(smcibdev, ibport)) /* create unique system identifier */ smc_ib_define_local_systemid(smcibdev, ibport); @@ -605,6 +609,7 @@ static struct ib_client smc_ib_client = { int __init smc_ib_register_client(void) { + smc_ib_init_local_systemid(); return ib_register_client(&smc_ib_client); } -- cgit v1.2.3 From a082ec897ffed754aae39a9665af161f1a6fa44b Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Tue, 25 Feb 2020 22:41:22 +0100 Subject: net/smc: improve peer ID in CLC decline for SMC-R According to RFC 7609, all CLC messages contain a peer ID that consists of a unique instance ID and the MAC address of one of the host's RoCE devices. But if a SMC-R connection cannot be established, e.g., because no matching pnet table entry is found, the current implementation uses a zero value in the CLC decline message although the host's peer ID is set to a proper value. If no RoCE and no ISM device is usable for a connection, there is no LGR and the LGR check in smc_clc_send_decline() prevents that the peer ID is copied into the CLC decline message for both SMC-D and SMC-R. So, this patch modifies the check to also accept the case of no LGR. Also, only a valid peer ID is copied into the decline message. Signed-off-by: Hans Wippel Signed-off-by: David S. Miller --- net/smc/smc_clc.c | 3 ++- net/smc/smc_ib.c | 2 +- net/smc/smc_ib.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 3e16b887cfcf..ea0068f0173c 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -372,7 +372,8 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); dclc.hdr.version = SMC_CLC_V1; dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0; - if (smc->conn.lgr && !smc->conn.lgr->is_smcd) + if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) && + smc_ib_is_valid_local_systemid()) memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); dclc.peer_diagnosis = htonl(peer_diag_info); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e0592d337a94..3444de27fecd 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -166,7 +166,7 @@ static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev, sizeof(smcibdev->mac[ibport - 1])); } -static bool smc_ib_is_valid_local_systemid(void) +bool smc_ib_is_valid_local_systemid(void) { return !is_zero_ether_addr(&local_systemid[2]); } diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 255db87547d3..5c2b115d36da 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -84,4 +84,5 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, enum dma_data_direction data_direction); int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, unsigned short vlan_id, u8 gid[], u8 *sgid_index); +bool smc_ib_is_valid_local_systemid(void); #endif -- cgit v1.2.3 From c535f9203209be19b60421b363b0bb2c7e90f298 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 25 Feb 2020 21:08:52 -0800 Subject: af_llc: fix if-statement empty body warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When debugging via dprintk() is not enabled, make the dprintk() macro be an empty do-while loop, as is done in . This fixes a gcc warning when -Wextra is set: ../net/llc/af_llc.c:974:51: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] I have verified that there is not object code change (with gcc 7.5.0). Signed-off-by: Randy Dunlap Cc: netdev@vger.kernel.org Cc: "David S. Miller" Signed-off-by: David S. Miller --- net/llc/af_llc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2922d4150d88..54fb8d452a7b 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -47,7 +47,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); #if 0 #define dprintk(args...) printk(KERN_DEBUG args) #else -#define dprintk(args...) +#define dprintk(args...) do {} while (0) #endif /* Maybe we'll add some more in the future. */ -- cgit v1.2.3 From 101f6f851ee6268a46e2dcec244a812fd9a9195a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Feb 2020 10:14:46 +0100 Subject: mptcp: add and use mptcp_data_ready helper allows us to schedule the work queue to drain the ssk receive queue in a followup patch. This is needed to avoid sending all-to-pessimistic mptcp-level acknowledgements. At this time, the ack_seq is what was last read by userspace instead of the highest in-sequence number queued for reading. Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 8 ++++++++ net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 14 ++++---------- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e9aa6807b5be..1d55563e9aca 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -111,6 +111,14 @@ static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk) return NULL; } +void mptcp_data_ready(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + set_bit(MPTCP_DATA_READY, &msk->flags); + sk->sk_data_ready(sk); +} + static bool mptcp_ext_cache_refill(struct mptcp_sock *msk) { if (!msk->cached_ext) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 9f8663b30456..67895a7c1e5b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -201,6 +201,7 @@ void mptcp_get_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx); void mptcp_finish_connect(struct sock *sk); +void mptcp_data_ready(struct sock *sk); int mptcp_token_new_request(struct request_sock *req); void mptcp_token_destroy_request(u32 token); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 65122edf60aa..3dad662840aa 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -554,11 +554,8 @@ static void subflow_data_ready(struct sock *sk) return; } - if (mptcp_subflow_data_available(sk)) { - set_bit(MPTCP_DATA_READY, &mptcp_sk(parent)->flags); - - parent->sk_data_ready(parent); - } + if (mptcp_subflow_data_available(sk)) + mptcp_data_ready(parent); } static void subflow_write_space(struct sock *sk) @@ -690,11 +687,8 @@ static void subflow_state_change(struct sock *sk) * a fin packet carrying a DSS can be unnoticed if we don't trigger * the data available machinery here. */ - if (parent && subflow->mp_capable && mptcp_subflow_data_available(sk)) { - set_bit(MPTCP_DATA_READY, &mptcp_sk(parent)->flags); - - parent->sk_data_ready(parent); - } + if (parent && subflow->mp_capable && mptcp_subflow_data_available(sk)) + mptcp_data_ready(parent); if (parent && !(parent->sk_shutdown & RCV_SHUTDOWN) && !subflow->rx_eof && subflow_is_done(sk)) { -- cgit v1.2.3 From 80992017150b4e37fe01c0aa2e3c9d02de66c11e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 26 Feb 2020 10:14:47 +0100 Subject: mptcp: add work queue skeleton Will be extended with functionality in followup patches. Initial user is moving skbs from subflows receive queue to the mptcp-level receive queue. Signed-off-by: Paolo Abeni Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 22 ++++++++++++++++++++++ net/mptcp/protocol.h | 1 + 2 files changed, 23 insertions(+) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1d55563e9aca..cbf184a71ed7 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -551,12 +551,24 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, } } +static void mptcp_worker(struct work_struct *work) +{ + struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); + struct sock *sk = &msk->sk.icsk_inet.sk; + + lock_sock(sk); + + release_sock(sk); + sock_put(sk); +} + static int __mptcp_init_sock(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); INIT_LIST_HEAD(&msk->conn_list); __set_bit(MPTCP_SEND_SPACE, &msk->flags); + INIT_WORK(&msk->work, mptcp_worker); msk->first = NULL; @@ -571,6 +583,14 @@ static int mptcp_init_sock(struct sock *sk) return __mptcp_init_sock(sk); } +static void mptcp_cancel_work(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + if (cancel_work_sync(&msk->work)) + sock_put(sk); +} + static void mptcp_subflow_shutdown(struct sock *ssk, int how) { lock_sock(ssk); @@ -616,6 +636,8 @@ static void mptcp_close(struct sock *sk, long timeout) __mptcp_close_ssk(sk, ssk, subflow, timeout); } + mptcp_cancel_work(sk); + sk_common_release(sk); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 67895a7c1e5b..6e6e162d25f1 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -70,6 +70,7 @@ struct mptcp_sock { u32 token; unsigned long flags; bool can_ack; + struct work_struct work; struct list_head conn_list; struct skb_ext *cached_ext; /* for the next sendmsg */ struct socket *subflow; /* outgoing connect/listener/!mp_capable */ -- cgit v1.2.3 From 6771bfd9ee2460c13e38c0cd46a3afb5404ae716 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Feb 2020 10:14:48 +0100 Subject: mptcp: update mptcp ack sequence from work queue If userspace is not reading data, all the mptcp-level acks contain the ack_seq from the last time userspace read data rather than the most recent in-sequence value. This causes pointless retransmissions for data that is already queued. The reason for this is that all the mptcp protocol level processing happens at mptcp_recv time. This adds work queue to move skbs from the subflow sockets receive queue on the mptcp socket receive queue (which was not used so far). This allows us to announce the correct mptcp ack sequence in a timely fashion, even when the application does not call recv() on the mptcp socket for some time. We still wake userspace tasks waiting for POLLIN immediately: If the mptcp level receive queue is empty (because the work queue is still pending) it can be filled from in-sequence subflow sockets at recv time without a need to wait for the worker. The skb_orphan when moving skbs from subflow to mptcp level is needed, because the destructor (sock_rfree) relies on skb->sk (ssk!) lock being taken. A followup patch will add needed rmem accouting for the moved skbs. Other problem: In case application behaves as expected, and calls recv() as soon as mptcp socket becomes readable, the work queue will only waste cpu cycles. This will also be addressed in followup patches. Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 234 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 165 insertions(+), 69 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cbf184a71ed7..b4a8517d8eac 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -31,6 +31,12 @@ struct mptcp6_sock { }; #endif +struct mptcp_skb_cb { + u32 offset; +}; + +#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0])) + /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not * completed yet or has failed, return the subflow socket. * Otherwise return NULL. @@ -111,11 +117,88 @@ static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk) return NULL; } +static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, + struct sk_buff *skb, + unsigned int offset, size_t copy_len) +{ + struct sock *sk = (struct sock *)msk; + + __skb_unlink(skb, &ssk->sk_receive_queue); + skb_orphan(skb); + __skb_queue_tail(&sk->sk_receive_queue, skb); + + msk->ack_seq += copy_len; + MPTCP_SKB_CB(skb)->offset = offset; +} + +static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, + struct sock *ssk, + unsigned int *bytes) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + unsigned int moved = 0; + bool more_data_avail; + struct tcp_sock *tp; + bool done = false; + + tp = tcp_sk(ssk); + do { + u32 map_remaining, offset; + u32 seq = tp->copied_seq; + struct sk_buff *skb; + bool fin; + + /* try to move as much data as available */ + map_remaining = subflow->map_data_len - + mptcp_subflow_get_map_offset(subflow); + + skb = skb_peek(&ssk->sk_receive_queue); + if (!skb) + break; + + offset = seq - TCP_SKB_CB(skb)->seq; + fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; + if (fin) { + done = true; + seq++; + } + + if (offset < skb->len) { + size_t len = skb->len - offset; + + if (tp->urg_data) + done = true; + + __mptcp_move_skb(msk, ssk, skb, offset, len); + seq += len; + moved += len; + + if (WARN_ON_ONCE(map_remaining < len)) + break; + } else { + WARN_ON_ONCE(!fin); + sk_eat_skb(ssk, skb); + done = true; + } + + WRITE_ONCE(tp->copied_seq, seq); + more_data_avail = mptcp_subflow_data_available(ssk); + } while (more_data_avail); + + *bytes = moved; + + return done; +} + void mptcp_data_ready(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); set_bit(MPTCP_DATA_READY, &msk->flags); + + if (schedule_work(&msk->work)) + sock_hold((struct sock *)msk); + sk->sk_data_ready(sk); } @@ -373,19 +456,68 @@ static void mptcp_wait_data(struct sock *sk, long *timeo) remove_wait_queue(sk_sleep(sk), &wait); } +static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, + struct msghdr *msg, + size_t len) +{ + struct sock *sk = (struct sock *)msk; + struct sk_buff *skb; + int copied = 0; + + while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { + u32 offset = MPTCP_SKB_CB(skb)->offset; + u32 data_len = skb->len - offset; + u32 count = min_t(size_t, len - copied, data_len); + int err; + + err = skb_copy_datagram_msg(skb, offset, msg, count); + if (unlikely(err < 0)) { + if (!copied) + return err; + break; + } + + copied += count; + + if (count < data_len) { + MPTCP_SKB_CB(skb)->offset += count; + break; + } + + __skb_unlink(skb, &sk->sk_receive_queue); + __kfree_skb(skb); + + if (copied >= len) + break; + } + + return copied; +} + +static bool __mptcp_move_skbs(struct mptcp_sock *msk) +{ + unsigned int moved = 0; + bool done; + + do { + struct sock *ssk = mptcp_subflow_recv_lookup(msk); + + if (!ssk) + break; + + lock_sock(ssk); + done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); + release_sock(ssk); + } while (!done); + + return moved > 0; +} + static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { struct mptcp_sock *msk = mptcp_sk(sk); - struct mptcp_subflow_context *subflow; - bool more_data_avail = false; - struct mptcp_read_arg arg; - read_descriptor_t desc; - bool wait_data = false; struct socket *ssock; - struct tcp_sock *tp; - bool done = false; - struct sock *ssk; int copied = 0; int target; long timeo; @@ -403,65 +535,26 @@ fallback: return copied; } - arg.msg = msg; - desc.arg.data = &arg; - desc.error = 0; - timeo = sock_rcvtimeo(sk, nonblock); len = min_t(size_t, len, INT_MAX); target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); - while (!done) { - u32 map_remaining; + while (len > (size_t)copied) { int bytes_read; - ssk = mptcp_subflow_recv_lookup(msk); - pr_debug("msk=%p ssk=%p", msk, ssk); - if (!ssk) - goto wait_for_data; + bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied); + if (unlikely(bytes_read < 0)) { + if (!copied) + copied = bytes_read; + goto out_err; + } - subflow = mptcp_subflow_ctx(ssk); - tp = tcp_sk(ssk); + copied += bytes_read; - lock_sock(ssk); - do { - /* try to read as much data as available */ - map_remaining = subflow->map_data_len - - mptcp_subflow_get_map_offset(subflow); - desc.count = min_t(size_t, len - copied, map_remaining); - pr_debug("reading %zu bytes, copied %d", desc.count, - copied); - bytes_read = tcp_read_sock(ssk, &desc, - mptcp_read_actor); - if (bytes_read < 0) { - if (!copied) - copied = bytes_read; - done = true; - goto next; - } - - pr_debug("msk ack_seq=%llx -> %llx", msk->ack_seq, - msk->ack_seq + bytes_read); - msk->ack_seq += bytes_read; - copied += bytes_read; - if (copied >= len) { - done = true; - goto next; - } - if (tp->urg_data && tp->urg_seq == tp->copied_seq) { - pr_err("Urgent data present, cannot proceed"); - done = true; - goto next; - } -next: - more_data_avail = mptcp_subflow_data_available(ssk); - } while (more_data_avail && !done); - release_sock(ssk); - continue; - -wait_for_data: - more_data_avail = false; + if (skb_queue_empty(&sk->sk_receive_queue) && + __mptcp_move_skbs(msk)) + continue; /* only the master socket status is relevant here. The exit * conditions mirror closely tcp_recvmsg() @@ -502,26 +595,25 @@ wait_for_data: } pr_debug("block timeout %ld", timeo); - wait_data = true; mptcp_wait_data(sk, &timeo); if (unlikely(__mptcp_tcp_fallback(msk))) goto fallback; } - if (more_data_avail) { - if (!test_bit(MPTCP_DATA_READY, &msk->flags)) - set_bit(MPTCP_DATA_READY, &msk->flags); - } else if (!wait_data) { + if (skb_queue_empty(&sk->sk_receive_queue)) { + /* entire backlog drained, clear DATA_READY. */ clear_bit(MPTCP_DATA_READY, &msk->flags); - /* .. race-breaker: ssk might get new data after last - * data_available() returns false. + /* .. race-breaker: ssk might have gotten new data + * after last __mptcp_move_skbs() returned false. */ - ssk = mptcp_subflow_recv_lookup(msk); - if (unlikely(ssk)) + if (unlikely(__mptcp_move_skbs(msk))) set_bit(MPTCP_DATA_READY, &msk->flags); + } else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) { + /* data to read but mptcp_wait_data() cleared DATA_READY */ + set_bit(MPTCP_DATA_READY, &msk->flags); } - +out_err: release_sock(sk); return copied; } @@ -557,7 +649,7 @@ static void mptcp_worker(struct work_struct *work) struct sock *sk = &msk->sk.icsk_inet.sk; lock_sock(sk); - + __mptcp_move_skbs(msk); release_sock(sk); sock_put(sk); } @@ -638,6 +730,8 @@ static void mptcp_close(struct sock *sk, long timeout) mptcp_cancel_work(sk); + __skb_queue_purge(&sk->sk_receive_queue); + sk_common_release(sk); } @@ -1204,6 +1298,8 @@ void mptcp_proto_init(void) panic("Failed to register MPTCP proto.\n"); inet_register_protosw(&mptcp_protosw); + + BUILD_BUG_ON(sizeof(struct mptcp_skb_cb) > sizeof_field(struct sk_buff, cb)); } #if IS_ENABLED(CONFIG_MPTCP_IPV6) -- cgit v1.2.3 From 600911ff5f72baeac3fc6a9532ec8ba949cd818a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Feb 2020 10:14:49 +0100 Subject: mptcp: add rmem queue accounting If userspace never drains the receive buffers we must stop draining the subflow socket(s) at some point. This adds the needed rmem accouting for this. If the threshold is reached, we stop draining the subflows. Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b4a8517d8eac..381d5647a95b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -124,7 +124,7 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, struct sock *sk = (struct sock *)msk; __skb_unlink(skb, &ssk->sk_receive_queue); - skb_orphan(skb); + skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); msk->ack_seq += copy_len; @@ -136,10 +136,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, unsigned int *bytes) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = (struct sock *)msk; unsigned int moved = 0; bool more_data_avail; struct tcp_sock *tp; bool done = false; + int rcvbuf; + + rcvbuf = max(ssk->sk_rcvbuf, sk->sk_rcvbuf); + if (rcvbuf > sk->sk_rcvbuf) + sk->sk_rcvbuf = rcvbuf; tp = tcp_sk(ssk); do { @@ -183,6 +189,11 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, WRITE_ONCE(tp->copied_seq, seq); more_data_avail = mptcp_subflow_data_available(ssk); + + if (atomic_read(&sk->sk_rmem_alloc) > READ_ONCE(sk->sk_rcvbuf)) { + done = true; + break; + } } while (more_data_avail); *bytes = moved; @@ -196,9 +207,14 @@ void mptcp_data_ready(struct sock *sk) set_bit(MPTCP_DATA_READY, &msk->flags); + /* don't schedule if mptcp sk is (still) over limit */ + if (atomic_read(&sk->sk_rmem_alloc) > READ_ONCE(sk->sk_rcvbuf)) + goto wake; + if (schedule_work(&msk->work)) sock_hold((struct sock *)msk); +wake: sk->sk_data_ready(sk); } -- cgit v1.2.3 From bfae9dae449d5eeb196a071fe4720504362672b4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Feb 2020 10:14:50 +0100 Subject: mptcp: remove mptcp_read_actor Only used to discard stale data from the subflow, so move it where needed. Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 27 --------------------------- net/mptcp/protocol.h | 7 ------- net/mptcp/subflow.c | 18 +++++++++++++----- 3 files changed, 13 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 381d5647a95b..b781498e69b4 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -430,33 +430,6 @@ fallback: return ret; } -int mptcp_read_actor(read_descriptor_t *desc, struct sk_buff *skb, - unsigned int offset, size_t len) -{ - struct mptcp_read_arg *arg = desc->arg.data; - size_t copy_len; - - copy_len = min(desc->count, len); - - if (likely(arg->msg)) { - int err; - - err = skb_copy_datagram_msg(skb, offset, arg->msg, copy_len); - if (err) { - pr_debug("error path"); - desc->error = err; - return err; - } - } else { - pr_debug("Flushing skb payload"); - } - - desc->count -= copy_len; - - pr_debug("consumed %zu bytes, %zu left", copy_len, desc->count); - return copy_len; -} - static void mptcp_wait_data(struct sock *sk, long *timeo) { DEFINE_WAIT_FUNC(wait, woken_wake_function); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 6e6e162d25f1..d06170c5f191 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -191,13 +191,6 @@ void mptcp_proto_init(void); int mptcp_proto_v6_init(void); #endif -struct mptcp_read_arg { - struct msghdr *msg; -}; - -int mptcp_read_actor(read_descriptor_t *desc, struct sk_buff *skb, - unsigned int offset, size_t len); - void mptcp_get_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 3dad662840aa..37a4767db441 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -408,6 +408,18 @@ validate_seq: return MAPPING_OK; } +static int subflow_read_actor(read_descriptor_t *desc, + struct sk_buff *skb, + unsigned int offset, size_t len) +{ + size_t copy_len = min(desc->count, len); + + desc->count -= copy_len; + + pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count); + return copy_len; +} + static bool subflow_check_data_avail(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -482,16 +494,12 @@ static bool subflow_check_data_avail(struct sock *ssk) pr_debug("discarding %zu bytes, current map len=%d", delta, map_remaining); if (delta) { - struct mptcp_read_arg arg = { - .msg = NULL, - }; read_descriptor_t desc = { .count = delta, - .arg.data = &arg, }; int ret; - ret = tcp_read_sock(ssk, &desc, mptcp_read_actor); + ret = tcp_read_sock(ssk, &desc, subflow_read_actor); if (ret < 0) { ssk->sk_err = -ret; goto fatal; -- cgit v1.2.3 From 2e52213c79c0b94aff42ba898ad9ad57546be67d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Feb 2020 10:14:51 +0100 Subject: mptcp: avoid work queue scheduling if possible We can't lock_sock() the mptcp socket from the subflow data_ready callback, it would result in ABBA deadlock with the subflow socket lock. We can however grab the spinlock: if that succeeds and the mptcp socket is not owned at the moment, we can process the new skbs right away without deferring this to the work queue. This avoids the schedule_work and hence the small delay until the work item is processed. Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 29 ++++++++++++++++++++++++++++- net/mptcp/protocol.h | 2 +- net/mptcp/subflow.c | 4 ++-- 3 files changed, 31 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b781498e69b4..70f20c8eddbd 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -201,12 +201,39 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, return done; } -void mptcp_data_ready(struct sock *sk) +/* In most cases we will be able to lock the mptcp socket. If its already + * owned, we need to defer to the work queue to avoid ABBA deadlock. + */ +static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) +{ + struct sock *sk = (struct sock *)msk; + unsigned int moved = 0; + + if (READ_ONCE(sk->sk_lock.owned)) + return false; + + if (unlikely(!spin_trylock_bh(&sk->sk_lock.slock))) + return false; + + /* must re-check after taking the lock */ + if (!READ_ONCE(sk->sk_lock.owned)) + __mptcp_move_skbs_from_subflow(msk, ssk, &moved); + + spin_unlock_bh(&sk->sk_lock.slock); + + return moved > 0; +} + +void mptcp_data_ready(struct sock *sk, struct sock *ssk) { struct mptcp_sock *msk = mptcp_sk(sk); set_bit(MPTCP_DATA_READY, &msk->flags); + if (atomic_read(&sk->sk_rmem_alloc) < READ_ONCE(sk->sk_rcvbuf) && + move_skbs_to_msk(msk, ssk)) + goto wake; + /* don't schedule if mptcp sk is (still) over limit */ if (atomic_read(&sk->sk_rmem_alloc) > READ_ONCE(sk->sk_rcvbuf)) goto wake; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d06170c5f191..6c0b2c8ab674 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -195,7 +195,7 @@ void mptcp_get_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx); void mptcp_finish_connect(struct sock *sk); -void mptcp_data_ready(struct sock *sk); +void mptcp_data_ready(struct sock *sk, struct sock *ssk); int mptcp_token_new_request(struct request_sock *req); void mptcp_token_destroy_request(u32 token); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 37a4767db441..0de2a44bdaa0 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -563,7 +563,7 @@ static void subflow_data_ready(struct sock *sk) } if (mptcp_subflow_data_available(sk)) - mptcp_data_ready(parent); + mptcp_data_ready(parent, sk); } static void subflow_write_space(struct sock *sk) @@ -696,7 +696,7 @@ static void subflow_state_change(struct sock *sk) * the data available machinery here. */ if (parent && subflow->mp_capable && mptcp_subflow_data_available(sk)) - mptcp_data_ready(parent); + mptcp_data_ready(parent, sk); if (parent && !(parent->sk_shutdown & RCV_SHUTDOWN) && !subflow->rx_eof && subflow_is_done(sk)) { -- cgit v1.2.3 From 14c441b564d560dea4c93947d5b40a992e13ca31 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 26 Feb 2020 10:14:52 +0100 Subject: mptcp: defer work schedule until mptcp lock is released Don't schedule the work queue right away, instead defer this to the lock release callback. This has the advantage that it will give recv path a chance to complete -- this might have moved all pending packets from the subflow to the mptcp receive queue, which allows to avoid the schedule_work(). Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 70f20c8eddbd..044295707bbf 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -238,9 +238,16 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) if (atomic_read(&sk->sk_rmem_alloc) > READ_ONCE(sk->sk_rcvbuf)) goto wake; - if (schedule_work(&msk->work)) - sock_hold((struct sock *)msk); + /* mptcp socket is owned, release_cb should retry */ + if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, + &sk->sk_tsq_flags)) { + sock_hold(sk); + /* need to try again, its possible release_cb() has already + * been called after the test_and_set_bit() above. + */ + move_skbs_to_msk(msk, ssk); + } wake: sk->sk_data_ready(sk); } @@ -941,6 +948,32 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; } +#define MPTCP_DEFERRED_ALL TCPF_DELACK_TIMER_DEFERRED + +/* this is very alike tcp_release_cb() but we must handle differently a + * different set of events + */ +static void mptcp_release_cb(struct sock *sk) +{ + unsigned long flags, nflags; + + do { + flags = sk->sk_tsq_flags; + if (!(flags & MPTCP_DEFERRED_ALL)) + return; + nflags = flags & ~MPTCP_DEFERRED_ALL; + } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); + + if (flags & TCPF_DELACK_TIMER_DEFERRED) { + struct mptcp_sock *msk = mptcp_sk(sk); + struct sock *ssk; + + ssk = mptcp_subflow_recv_lookup(msk); + if (!ssk || !schedule_work(&msk->work)) + __sock_put(sk); + } +} + static int mptcp_get_port(struct sock *sk, unsigned short snum) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1016,6 +1049,7 @@ static struct proto mptcp_prot = { .destroy = mptcp_destroy, .sendmsg = mptcp_sendmsg, .recvmsg = mptcp_recvmsg, + .release_cb = mptcp_release_cb, .hash = inet_hash, .unhash = inet_unhash, .get_port = mptcp_get_port, -- cgit v1.2.3 From 9baeea50718fdd55c7ae4d61c15f2a71aef6e050 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 26 Feb 2020 17:51:53 +0300 Subject: net: qrtr: Fix error pointer vs NULL bugs The callers only expect NULL pointers, so returning an error pointer will lead to an Oops. Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/qrtr/ns.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 7bfde01f4e8a..413228c4520e 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -76,7 +76,7 @@ static struct qrtr_node *node_get(unsigned int node_id) /* If node didn't exist, allocate and insert it to the tree */ node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) - return ERR_PTR(-ENOMEM); + return NULL; node->id = node_id; @@ -224,7 +224,7 @@ static struct qrtr_server *server_add(unsigned int service, srv = kzalloc(sizeof(*srv), GFP_KERNEL); if (!srv) - return ERR_PTR(-ENOMEM); + return NULL; srv->service = service; srv->instance = instance; -- cgit v1.2.3 From 07c6f9805f12f1bb538ef165a092b300350384aa Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 26 Feb 2020 17:14:21 +0000 Subject: net: switchdev: do not propagate bridge updates across bridges When configuring a tree of independent bridges, propagating changes from the upper bridge across a bridge master to the lower bridge ports brings surprises. For example, a lower bridge may have vlan filtering enabled. It may have a vlan interface attached to the bridge master, which may then be incorporated into another bridge. As soon as the lower bridge vlan interface is attached to the upper bridge, the lower bridge has vlan filtering disabled. This occurs because switchdev recursively applies its changes to all lower devices no matter what. Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 60630762a748..f25604d68337 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -475,6 +475,9 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, * necessary to go through this helper. */ netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (netif_is_bridge_master(lower_dev)) + continue; + err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info, check_cb, add_cb); if (err && err != -EOPNOTSUPP) @@ -526,6 +529,9 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev, * necessary to go through this helper. */ netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (netif_is_bridge_master(lower_dev)) + continue; + err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info, check_cb, del_cb); if (err && err != -EOPNOTSUPP) @@ -576,6 +582,9 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev, * necessary to go through this helper. */ netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (netif_is_bridge_master(lower_dev)) + continue; + err = __switchdev_handle_port_attr_set(lower_dev, port_attr_info, check_cb, set_cb); if (err && err != -EOPNOTSUPP) -- cgit v1.2.3 From 91a208f2185ad4855ff03c342d0b7e4f5fc6f5df Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 26 Feb 2020 10:23:41 +0000 Subject: net: phylink: propagate resolved link config via mac_link_up() Propagate the resolved link parameters via the mac_link_up() call for MACs that do not automatically track their PCS state. We propagate the link parameters via function arguments so that inappropriate members of struct phylink_link_state can't be accessed, and creating a new structure just for this adds needless complexity to the API. Tested-by: Andre Przywara Tested-by: Alexandre Belloni Tested-by: Vladimir Oltean Signed-off-by: Russell King Signed-off-by: David S. Miller --- Documentation/networking/sfp-phylink.rst | 17 +++++-- drivers/net/ethernet/cadence/macb_main.c | 7 ++- drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 7 ++- drivers/net/ethernet/marvell/mvneta.c | 8 ++-- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 19 +++++--- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 7 +-- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 7 +-- drivers/net/phy/phylink.c | 9 +++- include/linux/phylink.h | 57 +++++++++++++++++------ net/dsa/port.c | 4 +- 11 files changed, 105 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/Documentation/networking/sfp-phylink.rst b/Documentation/networking/sfp-phylink.rst index d753a309f9d1..8d7af28cd835 100644 --- a/Documentation/networking/sfp-phylink.rst +++ b/Documentation/networking/sfp-phylink.rst @@ -74,10 +74,13 @@ phylib to the sfp/phylink support. Please send patches to improve this documentation. 1. Optionally split the network driver's phylib update function into - three parts dealing with link-down, link-up and reconfiguring the - MAC settings. This can be done as a separate preparation commit. + two parts dealing with link-down and link-up. This can be done as + a separate preparation commit. - An example of this preparation can be found in git commit fc548b991fb0. + An older example of this preparation can be found in git commit + fc548b991fb0, although this was splitting into three parts; the + link-up part now includes configuring the MAC for the link settings. + Please see :c:func:`mac_link_up` for more information on this. 2. Replace:: @@ -207,6 +210,14 @@ this documentation. using. This is particularly important for in-band negotiation methods such as 1000base-X and SGMII. + The :c:func:`mac_link_up` method is used to inform the MAC that the + link has come up. The call includes the negotiation mode and interface + for reference only. The finalised link parameters are also supplied + (speed, duplex and flow control/pause enablement settings) which + should be used to configure the MAC when the MAC and PCS are not + tightly integrated, or when the settings are not coming from in-band + negotiation. + The :c:func:`mac_config` method is used to update the MAC with the requested state, and must avoid unnecessarily taking the link down when making changes to the MAC configuration. This means the diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 2c28da1737fe..7ab0bef5e1bd 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -626,8 +626,11 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode, netif_tx_stop_all_queues(ndev); } -static void macb_mac_link_up(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, struct phy_device *phy) +static void macb_mac_link_up(struct phylink_config *config, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct net_device *ndev = to_net_dev(config->dev); struct macb *bp = netdev_priv(ndev); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 84233e467ed1..3a75c5b58f95 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -154,8 +154,11 @@ static void dpaa2_mac_config(struct phylink_config *config, unsigned int mode, netdev_err(mac->net_dev, "dpmac_set_link_state() = %d\n", err); } -static void dpaa2_mac_link_up(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, struct phy_device *phy) +static void dpaa2_mac_link_up(struct phylink_config *config, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config); struct dpmac_link_state *dpmac_state = &mac->state; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 1c391f63a26f..9af3f8d5b289 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3965,9 +3965,11 @@ static void mvneta_mac_link_down(struct phylink_config *config, mvneta_set_eee(pp, false); } -static void mvneta_mac_link_up(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, - struct phy_device *phy) +static void mvneta_mac_link_up(struct phylink_config *config, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 72133cbe55d4..ed8042d97e29 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -58,8 +58,11 @@ static struct { */ static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state); -static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, struct phy_device *phy); +static void mvpp2_mac_link_up(struct phylink_config *config, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause); /* Queue modes */ #define MVPP2_QDIST_SINGLE_MODE 0 @@ -3473,8 +3476,9 @@ static void mvpp2_start_dev(struct mvpp2_port *port) .interface = port->phy_interface, }; mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state); - mvpp2_mac_link_up(&port->phylink_config, MLO_AN_INBAND, - port->phy_interface, NULL); + mvpp2_mac_link_up(&port->phylink_config, NULL, + MLO_AN_INBAND, port->phy_interface, + SPEED_UNKNOWN, DUPLEX_UNKNOWN, false, false); } netif_tx_start_all_queues(port->dev); @@ -5141,8 +5145,11 @@ static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode, mvpp2_port_enable(port); } -static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, struct phy_device *phy) +static void mvpp2_mac_link_up(struct phylink_config *config, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct net_device *dev = to_net_dev(config->dev); struct mvpp2_port *port = netdev_priv(dev); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8c6cfd15481c..8d28f90acfe7 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -412,9 +412,10 @@ static void mtk_mac_link_down(struct phylink_config *config, unsigned int mode, mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); } -static void mtk_mac_link_up(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, - struct phy_device *phy) +static void mtk_mac_link_up(struct phylink_config *config, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, bool tx_pause, bool rx_pause) { struct mtk_mac *mac = container_of(config, struct mtk_mac, phylink_config); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 37920b4da091..e039e715dcee 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -950,8 +950,10 @@ static void stmmac_mac_link_down(struct phylink_config *config, } static void stmmac_mac_link_up(struct phylink_config *config, + struct phy_device *phy, unsigned int mode, phy_interface_t interface, - struct phy_device *phy) + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 20746b801959..197740781157 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1486,9 +1486,10 @@ static void axienet_mac_link_down(struct phylink_config *config, } static void axienet_mac_link_up(struct phylink_config *config, - unsigned int mode, - phy_interface_t interface, - struct phy_device *phy) + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause) { /* nothing meaningful to do */ } diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 2899fbe699ab..b4367fab7899 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -480,8 +480,11 @@ static void phylink_mac_link_up(struct phylink *pl, struct net_device *ndev = pl->netdev; pl->cur_interface = link_state.interface; - pl->ops->mac_link_up(pl->config, pl->cur_link_an_mode, - pl->cur_interface, pl->phydev); + pl->ops->mac_link_up(pl->config, pl->phydev, + pl->cur_link_an_mode, pl->cur_interface, + link_state.speed, link_state.duplex, + !!(link_state.pause & MLO_PAUSE_TX), + !!(link_state.pause & MLO_PAUSE_RX)); if (ndev) netif_carrier_on(ndev); @@ -547,6 +550,8 @@ static void phylink_resolve(struct work_struct *w) link_state.pause = pl->phy_state.pause; phylink_apply_manual_flow(pl, &link_state); phylink_mac_config(pl, &link_state); + } else { + phylink_apply_manual_flow(pl, &link_state); } break; } diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 812357c03df4..2180eb1aa254 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -91,9 +91,10 @@ struct phylink_mac_ops { void (*mac_an_restart)(struct phylink_config *config); void (*mac_link_down)(struct phylink_config *config, unsigned int mode, phy_interface_t interface); - void (*mac_link_up)(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, - struct phy_device *phy); + void (*mac_link_up)(struct phylink_config *config, + struct phy_device *phy, unsigned int mode, + phy_interface_t interface, int speed, int duplex, + bool tx_pause, bool rx_pause); }; #if 0 /* For kernel-doc purposes only. */ @@ -152,6 +153,9 @@ void mac_pcs_get_state(struct phylink_config *config, * guaranteed to be correct, and so any mac_config() implementation must * never reference these fields. * + * (this requires a rewrite - please refer to mac_link_up() for situations + * where the PCS and MAC are not tightly integrated.) + * * In all negotiation modes, as defined by @mode, @state->pause indicates the * pause settings which should be applied as follows. If %MLO_PAUSE_AN is not * set, %MLO_PAUSE_TX and %MLO_PAUSE_RX indicate whether the MAC should send @@ -162,12 +166,20 @@ void mac_pcs_get_state(struct phylink_config *config, * The action performed depends on the currently selected mode: * * %MLO_AN_FIXED, %MLO_AN_PHY: - * Configure the specified @state->speed and @state->duplex over a link - * specified by @state->interface. @state->advertising may be used, but - * is not required. Pause modes as above. Other members of @state must - * be ignored. + * Configure for non-inband negotiation mode, where the link settings + * are completely communicated via mac_link_up(). The physical link + * protocol from the MAC is specified by @state->interface. + * + * @state->advertising may be used, but is not required. + * + * Older drivers (prior to the mac_link_up() change) may use @state->speed, + * @state->duplex and @state->pause to configure the MAC, but this is + * deprecated; such drivers should be converted to use mac_link_up(). * - * Valid state members: interface, speed, duplex, pause, advertising. + * Other members of @state must be ignored. + * + * Valid state members: interface, advertising. + * Deprecated state members: speed, duplex, pause. * * %MLO_AN_INBAND: * place the link in an inband negotiation mode (such as 802.3z @@ -228,19 +240,34 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, /** * mac_link_up() - allow the link to come up * @config: a pointer to a &struct phylink_config. + * @phy: any attached phy * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode - * @phy: any attached phy + * @speed: link speed + * @duplex: link duplex + * @tx_pause: link transmit pause enablement status + * @rx_pause: link receive pause enablement status * - * If @mode is not an in-band negotiation mode (as defined by - * phylink_autoneg_inband()), allow the link to come up. If @phy - * is non-%NULL, configure Energy Efficient Ethernet by calling + * Configure the MAC for an established link. + * + * @speed, @duplex, @tx_pause and @rx_pause indicate the finalised link + * settings, and should be used to configure the MAC block appropriately + * where these settings are not automatically conveyed from the PCS block, + * or if in-band negotiation (as defined by phylink_autoneg_inband(@mode)) + * is disabled. + * + * Note that when 802.3z in-band negotiation is in use, it is possible + * that the user wishes to override the pause settings, and this should + * be allowed when considering the implementation of this method. + * + * If in-band negotiation mode is disabled, allow the link to come up. If + * @phy is non-%NULL, configure Energy Efficient Ethernet by calling * phy_init_eee() and perform appropriate MAC configuration for EEE. * Interface type selection must be done in mac_config(). */ -void mac_link_up(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, - struct phy_device *phy); +void mac_link_up(struct phylink_config *config, struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, bool tx_pause, bool rx_pause); #endif struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, diff --git a/net/dsa/port.c b/net/dsa/port.c index 774facb8d547..b2f5262b35cf 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -489,9 +489,11 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config, } static void dsa_port_phylink_mac_link_up(struct phylink_config *config, + struct phy_device *phydev, unsigned int mode, phy_interface_t interface, - struct phy_device *phydev) + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; -- cgit v1.2.3 From 5b502a7b2992008a1fd5962ba032771b03c4e840 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 26 Feb 2020 10:23:46 +0000 Subject: net: dsa: propagate resolved link config via mac_link_up() Propagate the resolved link configuration down via DSA's phylink_mac_link_up() operation to allow split PCS/MAC to work. Tested-by: Vladimir Oltean Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 4 +++- drivers/net/dsa/b53/b53_priv.h | 4 +++- drivers/net/dsa/bcm_sf2.c | 4 +++- drivers/net/dsa/lantiq_gswip.c | 4 +++- drivers/net/dsa/mt7530.c | 4 +++- drivers/net/dsa/mv88e6xxx/chip.c | 4 +++- drivers/net/dsa/ocelot/felix.c | 4 +++- drivers/net/dsa/qca/ar9331.c | 4 +++- drivers/net/dsa/sja1105/sja1105_main.c | 4 +++- include/net/dsa.h | 4 +++- net/dsa/port.c | 3 ++- 11 files changed, 32 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 1a69286daa8d..ceafce446317 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1289,7 +1289,9 @@ EXPORT_SYMBOL(b53_phylink_mac_link_down); void b53_phylink_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface, - struct phy_device *phydev) + struct phy_device *phydev, + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct b53_device *dev = ds->priv; diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 3c30f3a7eb29..3d42318bc3f1 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -338,7 +338,9 @@ void b53_phylink_mac_link_down(struct dsa_switch *ds, int port, void b53_phylink_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface, - struct phy_device *phydev); + struct phy_device *phydev, + int speed, int duplex, + bool tx_pause, bool rx_pause); int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering); int b53_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index d1955543acd1..2daca9f0b8ca 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -649,7 +649,9 @@ static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port, static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface, - struct phy_device *phydev) + struct phy_device *phydev, + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_eee *p = &priv->dev->ports[port].eee; diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 0369c22fe3e1..cf6fa8fede33 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1517,7 +1517,9 @@ static void gswip_phylink_mac_link_down(struct dsa_switch *ds, int port, static void gswip_phylink_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface, - struct phy_device *phydev) + struct phy_device *phydev, + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct gswip_priv *priv = ds->priv; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 022466ca1c19..86818ab3bb07 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1482,7 +1482,9 @@ static void mt7530_phylink_mac_link_down(struct dsa_switch *ds, int port, static void mt7530_phylink_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface, - struct phy_device *phydev) + struct phy_device *phydev, + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct mt7530_priv *priv = ds->priv; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 705c118f6fdd..ebaaf21d7953 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -655,7 +655,9 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port, static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface, - struct phy_device *phydev) + struct phy_device *phydev, + int speed, int duplex, + bool tx_pause, bool rx_pause) { if (mode == MLO_AN_FIXED) mv88e6xxx_mac_link_force(ds, port, LINK_FORCED_UP); diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 35124ef7e75b..7e66821b05b4 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -263,7 +263,9 @@ static void felix_phylink_mac_link_down(struct dsa_switch *ds, int port, static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port, unsigned int link_an_mode, phy_interface_t interface, - struct phy_device *phydev) + struct phy_device *phydev, + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct ocelot *ocelot = ds->priv; struct ocelot_port *ocelot_port = ocelot->ports[port]; diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index de25f99e995a..7c86056b9401 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -458,7 +458,9 @@ static void ar9331_sw_phylink_mac_link_down(struct dsa_switch *ds, int port, static void ar9331_sw_phylink_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface, - struct phy_device *phydev) + struct phy_device *phydev, + int speed, int duplex, + bool tx_pause, bool rx_pause) { struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; struct regmap *regmap = priv->regmap; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 03ba6d25f7fe..c27cc7b37440 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -786,7 +786,9 @@ static void sja1105_mac_link_down(struct dsa_switch *ds, int port, static void sja1105_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface, - struct phy_device *phydev) + struct phy_device *phydev, + int speed, int duplex, + bool tx_pause, bool rx_pause) { sja1105_inhibit_tx(ds->priv, BIT(port), false); } diff --git a/include/net/dsa.h b/include/net/dsa.h index 63495e3443ac..7d3d84f0ef42 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -420,7 +420,9 @@ struct dsa_switch_ops { void (*phylink_mac_link_up)(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface, - struct phy_device *phydev); + struct phy_device *phydev, + int speed, int duplex, + bool tx_pause, bool rx_pause); void (*phylink_fixed_state)(struct dsa_switch *ds, int port, struct phylink_link_state *state); /* diff --git a/net/dsa/port.c b/net/dsa/port.c index b2f5262b35cf..d4450a454249 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -504,7 +504,8 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config, return; } - ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev); + ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev, + speed, duplex, tx_pause, rx_pause); } const struct phylink_mac_ops dsa_port_phylink_mac_ops = { -- cgit v1.2.3 From 5682d393b40e1fe7426a7b8c3471f05262f42010 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:09 -0800 Subject: inet_diag: Refactor inet_sk_diag_fill(), dump(), and dump_one() In a latter patch, there is a need to update "cb->min_dump_alloc" in inet_sk_diag_fill() as it learns the diffierent bpf_sk_storages stored in a sk while dumping all sk(s) (e.g. tcp_hashinfo). The inet_sk_diag_fill() currently does not take the "cb" as an argument. One of the reason is inet_sk_diag_fill() is used by both dump_one() and dump() (which belong to the "struct inet_diag_handler". The dump_one() interface does not pass the "cb" along. This patch is to make dump_one() pass a "cb". The "cb" is created in inet_diag_cmd_exact(). The "nlh" and "in_skb" are stored in "cb" as the dump() interface does. The total number of args in inet_sk_diag_fill() is also cut from 10 to 7 and that helps many callers to pass fewer args. In particular, "struct user_namespace *user_ns", "u32 pid", and "u32 seq" can be replaced by accessing "cb->nlh" and "cb->skb". A similar argument reduction is also made to inet_twsk_diag_fill() and inet_req_diag_fill(). inet_csk_diag_dump() and inet_csk_diag_fill() are also removed. They are mostly equivalent to inet_sk_diag_fill(). Their repeated usages are very limited. Thus, inet_sk_diag_fill() is directly used in those occasions. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230409.1975173-1-kafai@fb.com --- include/linux/inet_diag.h | 12 ++--- net/dccp/diag.c | 5 +- net/ipv4/inet_diag.c | 116 ++++++++++++++++++---------------------------- net/ipv4/raw_diag.c | 18 +++---- net/ipv4/tcp_diag.c | 4 +- net/ipv4/udp_diag.c | 26 +++++------ net/sctp/diag.c | 5 +- 7 files changed, 73 insertions(+), 113 deletions(-) (limited to 'net') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 39faaaf843e1..6b157ce07d74 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -18,8 +18,7 @@ struct inet_diag_handler { const struct inet_diag_req_v2 *r, struct nlattr *bc); - int (*dump_one)(struct sk_buff *in_skb, - const struct nlmsghdr *nlh, + int (*dump_one)(struct netlink_callback *cb, const struct inet_diag_req_v2 *req); void (*idiag_get_info)(struct sock *sk, @@ -42,16 +41,15 @@ struct inet_diag_handler { struct inet_connection_sock; int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, const struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, bool net_admin); + struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *req, + u16 nlmsg_flags, bool net_admin); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, const struct nlmsghdr *nlh, + struct netlink_callback *cb, const struct inet_diag_req_v2 *req); struct sock *inet_diag_find_one_icsk(struct net *net, diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 73ef73a218ff..8f1e2a653f6d 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -51,11 +51,10 @@ static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc); } -static int dccp_diag_dump_one(struct sk_buff *in_skb, - const struct nlmsghdr *nlh, +static int dccp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req); + return inet_diag_dump_one_icsk(&dccp_hashinfo, cb, req); } static const struct inet_diag_handler dccp_diag_handler = { diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index f11e997e517b..d2ecff3195ba 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -157,11 +157,9 @@ errout: EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, const struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, - bool net_admin) + struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *req, + u16 nlmsg_flags, bool net_admin) { const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; @@ -174,8 +172,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(!handler); - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - nlmsg_flags); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -187,7 +185,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 0; r->idiag_retrans = 0; - if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, + sk_user_ns(NETLINK_CB(cb->skb).sk), + net_admin)) goto errout; if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { @@ -312,30 +312,19 @@ errout: } EXPORT_SYMBOL_GPL(inet_sk_diag_fill); -static int inet_csk_diag_fill(struct sock *sk, - struct sk_buff *skb, - const struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, - bool net_admin) -{ - return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns, - portid, seq, nlmsg_flags, unlh, net_admin); -} - static int inet_twsk_diag_fill(struct sock *sk, struct sk_buff *skb, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + struct netlink_callback *cb, + u16 nlmsg_flags) { struct inet_timewait_sock *tw = inet_twsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - nlmsg_flags); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, + sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -359,16 +348,16 @@ static int inet_twsk_diag_fill(struct sock *sk, } static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, bool net_admin) + struct netlink_callback *cb, + u16 nlmsg_flags, bool net_admin) { struct request_sock *reqsk = inet_reqsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - nlmsg_flags); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -397,21 +386,18 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, const struct inet_diag_req_v2 *r, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, bool net_admin) + u16 nlmsg_flags, bool net_admin) { if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill(sk, skb, portid, seq, - nlmsg_flags, unlh); + return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags); if (sk->sk_state == TCP_NEW_SYN_RECV) - return inet_req_diag_fill(sk, skb, portid, seq, - nlmsg_flags, unlh, net_admin); + return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); - return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, - nlmsg_flags, unlh, net_admin); + return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, + net_admin); } struct sock *inet_diag_find_one_icsk(struct net *net, @@ -459,10 +445,10 @@ struct sock *inet_diag_find_one_icsk(struct net *net, EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, - const struct nlmsghdr *nlh, + struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { + struct sk_buff *in_skb = cb->skb; bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); struct net *net = sock_net(in_skb->sk); struct sk_buff *rep; @@ -479,10 +465,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, goto out; } - err = sk_diag_fill(sk, rep, req, - sk_user_ns(NETLINK_CB(in_skb).sk), - NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh, net_admin); + err = sk_diag_fill(sk, rep, cb, req, 0, net_admin); if (err < 0) { WARN_ON(err == -EMSGSIZE); nlmsg_free(rep); @@ -509,14 +492,19 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, int err; handler = inet_diag_lock_handler(req->sdiag_protocol); - if (IS_ERR(handler)) + if (IS_ERR(handler)) { err = PTR_ERR(handler); - else if (cmd == SOCK_DIAG_BY_FAMILY) - err = handler->dump_one(in_skb, nlh, req); - else if (cmd == SOCK_DESTROY && handler->destroy) + } else if (cmd == SOCK_DIAG_BY_FAMILY) { + struct netlink_callback cb = { + .nlh = nlh, + .skb = in_skb, + }; + err = handler->dump_one(&cb, req); + } else if (cmd == SOCK_DESTROY && handler->destroy) { err = handler->destroy(in_skb, req); - else + } else { err = -EOPNOTSUPP; + } inet_diag_unlock_handler(handler); return err; @@ -847,23 +835,6 @@ static int inet_diag_bc_audit(const struct nlattr *attr, return len == 0 ? 0 : -EINVAL; } -static int inet_csk_diag_dump(struct sock *sk, - struct sk_buff *skb, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - const struct nlattr *bc, - bool net_admin) -{ - if (!inet_diag_bc_sk(bc, sk)) - return 0; - - return inet_csk_diag_fill(sk, skb, r, - sk_user_ns(NETLINK_CB(cb->skb).sk), - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, - net_admin); -} - static void twsk_build_assert(void) { BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != @@ -935,8 +906,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, r->id.idiag_sport) goto next_listen; - if (inet_csk_diag_dump(sk, skb, cb, r, - bc, net_admin) < 0) { + if (!inet_diag_bc_sk(bc, sk)) + goto next_listen; + + if (inet_sk_diag_fill(sk, inet_csk(sk), skb, + cb, r, NLM_F_MULTI, + net_admin) < 0) { spin_unlock(&ilb->lock); goto done; } @@ -1014,11 +989,8 @@ next_normal: res = 0; for (idx = 0; idx < accum; idx++) { if (res >= 0) { - res = sk_diag_fill(sk_arr[idx], skb, r, - sk_user_ns(NETLINK_CB(cb->skb).sk), - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - cb->nlh, net_admin); + res = sk_diag_fill(sk_arr[idx], skb, cb, r, + NLM_F_MULTI, net_admin); if (res < 0) num = num_arr[idx]; } diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index e35736b99300..a2933eeabd91 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -87,15 +87,16 @@ out_unlock: return sk ? sk : ERR_PTR(-ENOENT); } -static int raw_diag_dump_one(struct sk_buff *in_skb, - const struct nlmsghdr *nlh, +static int raw_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { - struct net *net = sock_net(in_skb->sk); + struct sk_buff *in_skb = cb->skb; struct sk_buff *rep; struct sock *sk; + struct net *net; int err; + net = sock_net(in_skb->sk); sk = raw_sock_get(net, r); if (IS_ERR(sk)) return PTR_ERR(sk); @@ -108,10 +109,7 @@ static int raw_diag_dump_one(struct sk_buff *in_skb, return -ENOMEM; } - err = inet_sk_diag_fill(sk, NULL, rep, r, - sk_user_ns(NETLINK_CB(in_skb).sk), - NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh, + err = inet_sk_diag_fill(sk, NULL, rep, cb, r, 0, netlink_net_capable(in_skb, CAP_NET_ADMIN)); sock_put(sk); @@ -136,11 +134,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, if (!inet_diag_bc_sk(bc, sk)) return 0; - return inet_sk_diag_fill(sk, NULL, skb, r, - sk_user_ns(NETLINK_CB(cb->skb).sk), - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - cb->nlh, net_admin); + return inet_sk_diag_fill(sk, NULL, skb, cb, r, NLM_F_MULTI, net_admin); } static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 0d08f9e2d8d0..bcd3a26efff1 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -184,10 +184,10 @@ static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); } -static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, +static int tcp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); + return inet_diag_dump_one_icsk(&tcp_hashinfo, cb, req); } #ifdef CONFIG_INET_DIAG_DESTROY diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 910555a4d9fe..7d65a6a5cd51 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -21,16 +21,15 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, if (!inet_diag_bc_sk(bc, sk)) return 0; - return inet_sk_diag_fill(sk, NULL, skb, req, - sk_user_ns(NETLINK_CB(cb->skb).sk), - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin); + return inet_sk_diag_fill(sk, NULL, skb, cb, req, NLM_F_MULTI, + net_admin); } -static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, +static int udp_dump_one(struct udp_table *tbl, + struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { + struct sk_buff *in_skb = cb->skb; int err = -EINVAL; struct sock *sk = NULL; struct sk_buff *rep; @@ -70,11 +69,8 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, if (!rep) goto out; - err = inet_sk_diag_fill(sk, NULL, rep, req, - sk_user_ns(NETLINK_CB(in_skb).sk), - NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh, - netlink_net_capable(in_skb, CAP_NET_ADMIN)); + err = inet_sk_diag_fill(sk, NULL, rep, cb, req, 0, + netlink_net_capable(in_skb, CAP_NET_ADMIN)); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(rep); @@ -151,10 +147,10 @@ static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, udp_dump(&udp_table, skb, cb, r, bc); } -static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, +static int udp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - return udp_dump_one(&udp_table, in_skb, nlh, req); + return udp_dump_one(&udp_table, cb, req); } static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, @@ -255,10 +251,10 @@ static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, udp_dump(&udplite_table, skb, cb, r, bc); } -static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, +static int udplite_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - return udp_dump_one(&udplite_table, in_skb, nlh, req); + return udp_dump_one(&udplite_table, cb, req); } static const struct inet_diag_handler udplite_diag_handler = { diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 8a15146faaeb..bed6436cd0af 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -432,11 +432,12 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo); } -static int sctp_diag_dump_one(struct sk_buff *in_skb, - const struct nlmsghdr *nlh, +static int sctp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { + struct sk_buff *in_skb = cb->skb; struct net *net = sock_net(in_skb->sk); + const struct nlmsghdr *nlh = cb->nlh; union sctp_addr laddr, paddr; struct sctp_comm_param commp = { .skb = in_skb, -- cgit v1.2.3 From 0df6d32842b9a5f97a29ea90c8adc5cfac38341d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:15 -0800 Subject: inet_diag: Move the INET_DIAG_REQ_BYTECODE nlattr to cb->data The INET_DIAG_REQ_BYTECODE nlattr is currently re-found every time when the "dump()" is re-started. In a latter patch, it will also need to parse the new INET_DIAG_REQ_SK_BPF_STORAGES nlattr to learn the map_fds. Thus, this patch takes this chance to store the parsed nlattr in cb->data during the "start" time of a dump. By doing this, the "bc" argument also becomes unnecessary and is removed. Also, the two copies of the INET_DIAG_REQ_BYTECODE parsing-audit logic between compat/current version can be consolidated to one. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230415.1975555-1-kafai@fb.com --- include/linux/inet_diag.h | 11 ++-- include/uapi/linux/inet_diag.h | 3 +- net/dccp/diag.c | 4 +- net/ipv4/inet_diag.c | 117 ++++++++++++++++++++++++----------------- net/ipv4/raw_diag.c | 6 ++- net/ipv4/tcp_diag.c | 4 +- net/ipv4/udp_diag.c | 15 +++--- net/sctp/diag.c | 2 +- 8 files changed, 98 insertions(+), 64 deletions(-) (limited to 'net') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 6b157ce07d74..1bb94cac265f 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -15,8 +15,7 @@ struct netlink_callback; struct inet_diag_handler { void (*dump)(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc); + const struct inet_diag_req_v2 *r); int (*dump_one)(struct netlink_callback *cb, const struct inet_diag_req_v2 *req); @@ -39,6 +38,11 @@ struct inet_diag_handler { __u16 idiag_info_size; }; +struct inet_diag_dump_data { + struct nlattr *req_nlas[__INET_DIAG_REQ_MAX]; +#define inet_diag_nla_bc req_nlas[INET_DIAG_REQ_BYTECODE] +}; + struct inet_connection_sock; int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct netlink_callback *cb, @@ -46,8 +50,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, u16 nlmsg_flags, bool net_admin); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc); + const struct inet_diag_req_v2 *r); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct netlink_callback *cb, const struct inet_diag_req_v2 *req); diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index a1ff345b3f33..bab9a9f8da12 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -64,9 +64,10 @@ struct inet_diag_req_raw { enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, + __INET_DIAG_REQ_MAX, }; -#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE +#define INET_DIAG_REQ_MAX (__INET_DIAG_REQ_MAX - 1) /* Bytecode is sequence of 4 byte commands followed by variable arguments. * All the commands identified by "code" are conditional jumps forward: diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 8f1e2a653f6d..8a82c5a2c5a8 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -46,9 +46,9 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { - inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc); + inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r); } static int dccp_diag_dump_one(struct netlink_callback *cb, diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index d2ecff3195ba..4bce8a477699 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -495,9 +495,11 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, if (IS_ERR(handler)) { err = PTR_ERR(handler); } else if (cmd == SOCK_DIAG_BY_FAMILY) { + struct inet_diag_dump_data empty_dump_data = {}; struct netlink_callback cb = { .nlh = nlh, .skb = in_skb, + .data = &empty_dump_data, }; err = handler->dump_one(&cb, req); } else if (cmd == SOCK_DESTROY && handler->destroy) { @@ -863,14 +865,17 @@ static void twsk_build_assert(void) void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + struct inet_diag_dump_data *cb_data = cb->data; struct net *net = sock_net(skb->sk); u32 idiag_states = r->idiag_states; int i, num, s_i, s_num; + struct nlattr *bc; struct sock *sk; + bc = cb_data->inet_diag_nla_bc; if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; s_i = cb->args[1]; @@ -1014,15 +1019,14 @@ out: EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc) + const struct inet_diag_req_v2 *r) { const struct inet_diag_handler *handler; int err = 0; handler = inet_diag_lock_handler(r->sdiag_protocol); if (!IS_ERR(handler)) - handler->dump(skb, cb, r, bc); + handler->dump(skb, cb, r); else err = PTR_ERR(handler); inet_diag_unlock_handler(handler); @@ -1032,13 +1036,57 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { - int hdrlen = sizeof(struct inet_diag_req_v2); - struct nlattr *bc = NULL; + return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh)); +} + +static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) +{ + const struct nlmsghdr *nlh = cb->nlh; + struct inet_diag_dump_data *cb_data; + struct sk_buff *skb = cb->skb; + struct nlattr *nla; + int rem, err; + + cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL); + if (!cb_data) + return -ENOMEM; + + nla_for_each_attr(nla, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), rem) { + int type = nla_type(nla); + + if (type < __INET_DIAG_REQ_MAX) + cb_data->req_nlas[type] = nla; + } + + nla = cb_data->inet_diag_nla_bc; + if (nla) { + err = inet_diag_bc_audit(nla, skb); + if (err) { + kfree(cb_data); + return err; + } + } + + cb->data = cb_data; + return 0; +} + +static int inet_diag_dump_start(struct netlink_callback *cb) +{ + return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req_v2)); +} + +static int inet_diag_dump_start_compat(struct netlink_callback *cb) +{ + return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req)); +} - if (nlmsg_attrlen(cb->nlh, hdrlen)) - bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); +static int inet_diag_dump_done(struct netlink_callback *cb) +{ + kfree(cb->data); - return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc); + return 0; } static int inet_diag_type2proto(int type) @@ -1057,9 +1105,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) { struct inet_diag_req *rc = nlmsg_data(cb->nlh); - int hdrlen = sizeof(struct inet_diag_req); struct inet_diag_req_v2 req; - struct nlattr *bc = NULL; req.sdiag_family = AF_UNSPEC; /* compatibility */ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); @@ -1067,10 +1113,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, req.idiag_states = rc->idiag_states; req.id = rc->id; - if (nlmsg_attrlen(cb->nlh, hdrlen)) - bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); - - return __inet_diag_dump(skb, cb, &req, bc); + return __inet_diag_dump(skb, cb, &req); } static int inet_diag_get_exact_compat(struct sk_buff *in_skb, @@ -1098,22 +1141,12 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; if (nlh->nlmsg_flags & NLM_F_DUMP) { - if (nlmsg_attrlen(nlh, hdrlen)) { - struct nlattr *attr; - int err; - - attr = nlmsg_find_attr(nlh, hdrlen, - INET_DIAG_REQ_BYTECODE); - err = inet_diag_bc_audit(attr, skb); - if (err) - return err; - } - { - struct netlink_dump_control c = { - .dump = inet_diag_dump_compat, - }; - return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); - } + struct netlink_dump_control c = { + .start = inet_diag_dump_start_compat, + .done = inet_diag_dump_done, + .dump = inet_diag_dump_compat, + }; + return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); } return inet_diag_get_exact_compat(skb, nlh); @@ -1129,22 +1162,12 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && h->nlmsg_flags & NLM_F_DUMP) { - if (nlmsg_attrlen(h, hdrlen)) { - struct nlattr *attr; - int err; - - attr = nlmsg_find_attr(h, hdrlen, - INET_DIAG_REQ_BYTECODE); - err = inet_diag_bc_audit(attr, skb); - if (err) - return err; - } - { - struct netlink_dump_control c = { - .dump = inet_diag_dump, - }; - return netlink_dump_start(net->diag_nlsk, skb, h, &c); - } + struct netlink_dump_control c = { + .start = inet_diag_dump_start, + .done = inet_diag_dump_done, + .dump = inet_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h)); diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index a2933eeabd91..d19cce39be1b 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -138,17 +138,21 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, } static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); struct net *net = sock_net(skb->sk); + struct inet_diag_dump_data *cb_data; int num, s_num, slot, s_slot; struct sock *sk = NULL; + struct nlattr *bc; if (IS_ERR(hashinfo)) return; + cb_data = cb->data; + bc = cb_data->inet_diag_nla_bc; s_slot = cb->args[0]; num = s_num = cb->args[1]; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index bcd3a26efff1..75a1c985f49a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -179,9 +179,9 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { - inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); + inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r); } static int tcp_diag_dump_one(struct netlink_callback *cb, diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 7d65a6a5cd51..93884696abdd 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -89,12 +89,16 @@ out_nosk: static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); + struct inet_diag_dump_data *cb_data; int num, s_num, slot, s_slot; + struct nlattr *bc; + cb_data = cb->data; + bc = cb_data->inet_diag_nla_bc; s_slot = cb->args[0]; num = s_num = cb->args[1]; @@ -142,9 +146,9 @@ done: } static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { - udp_dump(&udp_table, skb, cb, r, bc); + udp_dump(&udp_table, skb, cb, r); } static int udp_diag_dump_one(struct netlink_callback *cb, @@ -245,10 +249,9 @@ static const struct inet_diag_handler udp_diag_handler = { }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc) + const struct inet_diag_req_v2 *r) { - udp_dump(&udplite_table, skb, cb, r, bc); + udp_dump(&udplite_table, skb, cb, r); } static int udplite_diag_dump_one(struct netlink_callback *cb, diff --git a/net/sctp/diag.c b/net/sctp/diag.c index bed6436cd0af..69743a6aaf6f 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -471,7 +471,7 @@ static int sctp_diag_dump_one(struct netlink_callback *cb, } static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { u32 idiag_states = r->idiag_states; struct net *net = sock_net(skb->sk); -- cgit v1.2.3 From 1ed4d92458a969e71e7914550b6f0c730c14d84e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:21 -0800 Subject: bpf: INET_DIAG support in bpf_sk_storage This patch adds INET_DIAG support to bpf_sk_storage. 1. Although this series adds bpf_sk_storage diag capability to inet sk, bpf_sk_storage is in general applicable to all fullsock. Hence, the bpf_sk_storage logic will operate on SK_DIAG_* nlattr. The caller will pass in its specific nesting nlattr (e.g. INET_DIAG_*) as the argument. 2. The request will be like: INET_DIAG_REQ_SK_BPF_STORAGES (nla_nest) (defined in latter patch) SK_DIAG_BPF_STORAGE_REQ_MAP_FD (nla_put_u32) SK_DIAG_BPF_STORAGE_REQ_MAP_FD (nla_put_u32) ...... Considering there could have multiple bpf_sk_storages in a sk, instead of reusing INET_DIAG_INFO ("ss -i"), the user can select some specific bpf_sk_storage to dump by specifying an array of SK_DIAG_BPF_STORAGE_REQ_MAP_FD. If no SK_DIAG_BPF_STORAGE_REQ_MAP_FD is specified (i.e. an empty INET_DIAG_REQ_SK_BPF_STORAGES), it will dump all bpf_sk_storages of a sk. 3. The reply will be like: INET_DIAG_BPF_SK_STORAGES (nla_nest) (defined in latter patch) SK_DIAG_BPF_STORAGE (nla_nest) SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) SK_DIAG_BPF_STORAGE (nla_nest) SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) ...... 4. Unlike other INET_DIAG info of a sk which is pretty static, the size required to dump the bpf_sk_storage(s) of a sk is dynamic as the system adding more bpf_sk_storage_map. It is hard to set a static min_dump_alloc size. Hence, this series learns it at the runtime and adjust the cb->min_dump_alloc as it iterates all sk(s) of a system. The "unsigned int *res_diag_size" in bpf_sk_storage_diag_put() is for this purpose. The next patch will update the cb->min_dump_alloc as it iterates the sk(s). Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230421.1975729-1-kafai@fb.com --- include/linux/bpf.h | 1 + include/net/bpf_sk_storage.h | 27 ++++ include/uapi/linux/sock_diag.h | 26 ++++ kernel/bpf/syscall.c | 15 +++ net/core/bpf_sk_storage.c | 283 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 346 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9aa33b8f3d55..6015a4daf118 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1023,6 +1023,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); +struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); void bpf_map_inc(struct bpf_map *map); diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h index 8e4f831d2e52..5036c94c0503 100644 --- a/include/net/bpf_sk_storage.h +++ b/include/net/bpf_sk_storage.h @@ -10,14 +10,41 @@ void bpf_sk_storage_free(struct sock *sk); extern const struct bpf_func_proto bpf_sk_storage_get_proto; extern const struct bpf_func_proto bpf_sk_storage_delete_proto; +struct bpf_sk_storage_diag; +struct sk_buff; +struct nlattr; +struct sock; + #ifdef CONFIG_BPF_SYSCALL int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk); +struct bpf_sk_storage_diag * +bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs); +void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag); +int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, + struct sock *sk, struct sk_buff *skb, + int stg_array_type, + unsigned int *res_diag_size); #else static inline int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) { return 0; } +static inline struct bpf_sk_storage_diag * +bpf_sk_storage_diag_alloc(const struct nlattr *nla) +{ + return NULL; +} +static inline void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) +{ +} +static inline int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, + struct sock *sk, struct sk_buff *skb, + int stg_array_type, + unsigned int *res_diag_size) +{ + return 0; +} #endif #endif /* _BPF_SK_STORAGE_H */ diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index e5925009a652..5f74a5f6091d 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -36,4 +36,30 @@ enum sknetlink_groups { }; #define SKNLGRP_MAX (__SKNLGRP_MAX - 1) +enum { + SK_DIAG_BPF_STORAGE_REQ_NONE, + SK_DIAG_BPF_STORAGE_REQ_MAP_FD, + __SK_DIAG_BPF_STORAGE_REQ_MAX, +}; + +#define SK_DIAG_BPF_STORAGE_REQ_MAX (__SK_DIAG_BPF_STORAGE_REQ_MAX - 1) + +enum { + SK_DIAG_BPF_STORAGE_REP_NONE, + SK_DIAG_BPF_STORAGE, + __SK_DIAG_BPF_STORAGE_REP_MAX, +}; + +#define SK_DIAB_BPF_STORAGE_REP_MAX (__SK_DIAG_BPF_STORAGE_REP_MAX - 1) + +enum { + SK_DIAG_BPF_STORAGE_NONE, + SK_DIAG_BPF_STORAGE_PAD, + SK_DIAG_BPF_STORAGE_MAP_ID, + SK_DIAG_BPF_STORAGE_MAP_VALUE, + __SK_DIAG_BPF_STORAGE_MAX, +}; + +#define SK_DIAG_BPF_STORAGE_MAX (__SK_DIAG_BPF_STORAGE_MAX - 1) + #endif /* _UAPI__SOCK_DIAG_H__ */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a79743a89815..c536c65256ad 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -902,6 +902,21 @@ void bpf_map_inc_with_uref(struct bpf_map *map) } EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); +struct bpf_map *bpf_map_get(u32 ufd) +{ + struct fd f = fdget(ufd); + struct bpf_map *map; + + map = __bpf_map_get(f); + if (IS_ERR(map)) + return map; + + bpf_map_inc(map); + fdput(f); + + return map; +} + struct bpf_map *bpf_map_get_with_uref(u32 ufd) { struct fd f = fdget(ufd); diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 3ab23f698221..3415a4896c59 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -8,6 +8,7 @@ #include #include #include +#include #include static atomic_t cache_idx; @@ -606,6 +607,14 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) kfree(map); } +/* U16_MAX is much more than enough for sk local storage + * considering a tcp_sock is ~2k. + */ +#define MAX_VALUE_SIZE \ + min_t(u32, \ + (KMALLOC_MAX_SIZE - MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem)), \ + (U16_MAX - sizeof(struct bpf_sk_storage_elem))) + static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) { if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK || @@ -619,12 +628,7 @@ static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (attr->value_size >= KMALLOC_MAX_SIZE - - MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem) || - /* U16_MAX is much more than enough for sk local storage - * considering a tcp_sock is ~2k. - */ - attr->value_size > U16_MAX - sizeof(struct bpf_sk_storage_elem)) + if (attr->value_size > MAX_VALUE_SIZE) return -E2BIG; return 0; @@ -910,3 +914,270 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = { .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_SOCKET, }; + +struct bpf_sk_storage_diag { + u32 nr_maps; + struct bpf_map *maps[]; +}; + +/* The reply will be like: + * INET_DIAG_BPF_SK_STORAGES (nla_nest) + * SK_DIAG_BPF_STORAGE (nla_nest) + * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) + * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) + * SK_DIAG_BPF_STORAGE (nla_nest) + * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) + * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) + * .... + */ +static int nla_value_size(u32 value_size) +{ + /* SK_DIAG_BPF_STORAGE (nla_nest) + * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) + * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) + */ + return nla_total_size(0) + nla_total_size(sizeof(u32)) + + nla_total_size_64bit(value_size); +} + +void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) +{ + u32 i; + + if (!diag) + return; + + for (i = 0; i < diag->nr_maps; i++) + bpf_map_put(diag->maps[i]); + + kfree(diag); +} +EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free); + +static bool diag_check_dup(const struct bpf_sk_storage_diag *diag, + const struct bpf_map *map) +{ + u32 i; + + for (i = 0; i < diag->nr_maps; i++) { + if (diag->maps[i] == map) + return true; + } + + return false; +} + +struct bpf_sk_storage_diag * +bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) +{ + struct bpf_sk_storage_diag *diag; + struct nlattr *nla; + u32 nr_maps = 0; + int rem, err; + + /* bpf_sk_storage_map is currently limited to CAP_SYS_ADMIN as + * the map_alloc_check() side also does. + */ + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + nla_for_each_nested(nla, nla_stgs, rem) { + if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) + nr_maps++; + } + + diag = kzalloc(sizeof(*diag) + sizeof(diag->maps[0]) * nr_maps, + GFP_KERNEL); + if (!diag) + return ERR_PTR(-ENOMEM); + + nla_for_each_nested(nla, nla_stgs, rem) { + struct bpf_map *map; + int map_fd; + + if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD) + continue; + + map_fd = nla_get_u32(nla); + map = bpf_map_get(map_fd); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto err_free; + } + if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) { + bpf_map_put(map); + err = -EINVAL; + goto err_free; + } + if (diag_check_dup(diag, map)) { + bpf_map_put(map); + err = -EEXIST; + goto err_free; + } + diag->maps[diag->nr_maps++] = map; + } + + return diag; + +err_free: + bpf_sk_storage_diag_free(diag); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); + +static int diag_get(struct bpf_sk_storage_data *sdata, struct sk_buff *skb) +{ + struct nlattr *nla_stg, *nla_value; + struct bpf_sk_storage_map *smap; + + /* It cannot exceed max nlattr's payload */ + BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < MAX_VALUE_SIZE); + + nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE); + if (!nla_stg) + return -EMSGSIZE; + + smap = rcu_dereference(sdata->smap); + if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) + goto errout; + + nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE, + smap->map.value_size, + SK_DIAG_BPF_STORAGE_PAD); + if (!nla_value) + goto errout; + + if (map_value_has_spin_lock(&smap->map)) + copy_map_value_locked(&smap->map, nla_data(nla_value), + sdata->data, true); + else + copy_map_value(&smap->map, nla_data(nla_value), sdata->data); + + nla_nest_end(skb, nla_stg); + return 0; + +errout: + nla_nest_cancel(skb, nla_stg); + return -EMSGSIZE; +} + +static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, + int stg_array_type, + unsigned int *res_diag_size) +{ + /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ + unsigned int diag_size = nla_total_size(0); + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_elem *selem; + struct bpf_sk_storage_map *smap; + struct nlattr *nla_stgs; + unsigned int saved_len; + int err = 0; + + rcu_read_lock(); + + sk_storage = rcu_dereference(sk->sk_bpf_storage); + if (!sk_storage || hlist_empty(&sk_storage->list)) { + rcu_read_unlock(); + return 0; + } + + nla_stgs = nla_nest_start(skb, stg_array_type); + if (!nla_stgs) + /* Continue to learn diag_size */ + err = -EMSGSIZE; + + saved_len = skb->len; + hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { + smap = rcu_dereference(SDATA(selem)->smap); + diag_size += nla_value_size(smap->map.value_size); + + if (nla_stgs && diag_get(SDATA(selem), skb)) + /* Continue to learn diag_size */ + err = -EMSGSIZE; + } + + rcu_read_unlock(); + + if (nla_stgs) { + if (saved_len == skb->len) + nla_nest_cancel(skb, nla_stgs); + else + nla_nest_end(skb, nla_stgs); + } + + if (diag_size == nla_total_size(0)) { + *res_diag_size = 0; + return 0; + } + + *res_diag_size = diag_size; + return err; +} + +int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, + struct sock *sk, struct sk_buff *skb, + int stg_array_type, + unsigned int *res_diag_size) +{ + /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ + unsigned int diag_size = nla_total_size(0); + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_data *sdata; + struct nlattr *nla_stgs; + unsigned int saved_len; + int err = 0; + u32 i; + + *res_diag_size = 0; + + /* No map has been specified. Dump all. */ + if (!diag->nr_maps) + return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type, + res_diag_size); + + rcu_read_lock(); + sk_storage = rcu_dereference(sk->sk_bpf_storage); + if (!sk_storage || hlist_empty(&sk_storage->list)) { + rcu_read_unlock(); + return 0; + } + + nla_stgs = nla_nest_start(skb, stg_array_type); + if (!nla_stgs) + /* Continue to learn diag_size */ + err = -EMSGSIZE; + + saved_len = skb->len; + for (i = 0; i < diag->nr_maps; i++) { + sdata = __sk_storage_lookup(sk_storage, + (struct bpf_sk_storage_map *)diag->maps[i], + false); + + if (!sdata) + continue; + + diag_size += nla_value_size(diag->maps[i]->value_size); + + if (nla_stgs && diag_get(sdata, skb)) + /* Continue to learn diag_size */ + err = -EMSGSIZE; + } + rcu_read_unlock(); + + if (nla_stgs) { + if (saved_len == skb->len) + nla_nest_cancel(skb, nla_stgs); + else + nla_nest_end(skb, nla_stgs); + } + + if (diag_size == nla_total_size(0)) { + *res_diag_size = 0; + return 0; + } + + *res_diag_size = diag_size; + return err; +} +EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); -- cgit v1.2.3 From 085c20cacf2b72991ce1c9d99a5e2f1d9e73bb68 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:27 -0800 Subject: bpf: inet_diag: Dump bpf_sk_storages in inet_diag_dump() This patch will dump out the bpf_sk_storages of a sk if the request has the INET_DIAG_REQ_SK_BPF_STORAGES nlattr. An array of SK_DIAG_BPF_STORAGE_REQ_MAP_FD can be specified in INET_DIAG_REQ_SK_BPF_STORAGES to select which bpf_sk_storage to dump. If no map_fd is specified, all bpf_sk_storages of a sk will be dumped. bpf_sk_storages can be added to the system at runtime. It is difficult to find a proper static value for cb->min_dump_alloc. This patch learns the nlattr size required to dump the bpf_sk_storages of a sk. If it happens to be the very first nlmsg of a dump and it cannot fit the needed bpf_sk_storages, it will try to expand the skb by "pskb_expand_head()". Instead of expanding it in inet_sk_diag_fill(), it is expanded at a sleepable context in __inet_diag_dump() so __GFP_DIRECT_RECLAIM can be used. In __inet_diag_dump(), it will retry as long as the skb is empty and the cb->min_dump_alloc becomes larger than before. cb->min_dump_alloc is bounded by KMALLOC_MAX_SIZE. The min_dump_alloc is also changed from 'u16' to 'u32' to accommodate a sk that may have a few large bpf_sk_storages. The updated cb->min_dump_alloc will also be used to allocate the skb in the next dump. This logic already exists in netlink_dump(). Here is the sample output of a locally modified 'ss' and it could be made more readable by using BTF later: [root@arch-fb-vm1 ~]# ss --bpf-map-id 14 --bpf-map-id 13 -t6an 'dst [::1]:8989' State Recv-Q Send-Q Local Address:Port Peer Address:PortProcess ESTAB 0 0 [::1]:51072 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] ESTAB 0 0 [::1]:51070 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] [root@arch-fb-vm1 ~]# ~/devshare/github/iproute2/misc/ss --bpf-maps -t6an 'dst [::1]:8989' State Recv-Q Send-Q Local Address:Port Peer Address:Port Process ESTAB 0 0 [::1]:51072 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] bpf_map_id:12 value:[ 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000... total:65407 ] ESTAB 0 0 [::1]:51070 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] bpf_map_id:12 value:[ 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000... total:65407 ] Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230427.1976129-1-kafai@fb.com --- include/linux/inet_diag.h | 4 +++ include/linux/netlink.h | 4 +-- include/uapi/linux/inet_diag.h | 2 ++ net/ipv4/inet_diag.c | 74 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 1bb94cac265f..e4ba25d63913 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -38,9 +38,13 @@ struct inet_diag_handler { __u16 idiag_info_size; }; +struct bpf_sk_storage_diag; struct inet_diag_dump_data { struct nlattr *req_nlas[__INET_DIAG_REQ_MAX]; #define inet_diag_nla_bc req_nlas[INET_DIAG_REQ_BYTECODE] +#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES] + + struct bpf_sk_storage_diag *bpf_stg_diag; }; struct inet_connection_sock; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 205fa7b1f07a..788969ccbbde 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -188,10 +188,10 @@ struct netlink_callback { struct module *module; struct netlink_ext_ack *extack; u16 family; - u16 min_dump_alloc; - bool strict_check; u16 answer_flags; + u32 min_dump_alloc; unsigned int prev_seq, seq; + bool strict_check; union { u8 ctx[48]; diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index bab9a9f8da12..75dffd78363a 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -64,6 +64,7 @@ struct inet_diag_req_raw { enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, + INET_DIAG_REQ_SK_BPF_STORAGES, __INET_DIAG_REQ_MAX, }; @@ -155,6 +156,7 @@ enum { INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */ INET_DIAG_MD5SIG, INET_DIAG_ULP_INFO, + INET_DIAG_SK_BPF_STORAGES, __INET_DIAG_MAX, }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 4bce8a477699..e1cad25909df 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -156,6 +157,8 @@ errout: } EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); +#define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, @@ -163,12 +166,14 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, { const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; + struct inet_diag_dump_data *cb_data; int ext = req->idiag_ext; struct inet_diag_msg *r; struct nlmsghdr *nlh; struct nlattr *attr; void *info = NULL; + cb_data = cb->data; handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(!handler); @@ -302,6 +307,48 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } + /* Keep it at the end for potential retry with a larger skb, + * or else do best-effort fitting, which is only done for the + * first_nlmsg. + */ + if (cb_data->bpf_stg_diag) { + bool first_nlmsg = ((unsigned char *)nlh == skb->data); + unsigned int prev_min_dump_alloc; + unsigned int total_nla_size = 0; + unsigned int msg_len; + int err; + + msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; + err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb, + INET_DIAG_SK_BPF_STORAGES, + &total_nla_size); + + if (!err) + goto out; + + total_nla_size += msg_len; + prev_min_dump_alloc = cb->min_dump_alloc; + if (total_nla_size > prev_min_dump_alloc) + cb->min_dump_alloc = min_t(u32, total_nla_size, + MAX_DUMP_ALLOC_SIZE); + + if (!first_nlmsg) + goto errout; + + if (cb->min_dump_alloc > prev_min_dump_alloc) + /* Retry with pskb_expand_head() with + * __GFP_DIRECT_RECLAIM + */ + goto errout; + + WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc); + + /* Send what we have for this sk + * and move on to the next sk in the following + * dump() + */ + } + out: nlmsg_end(skb, nlh); return 0; @@ -1022,8 +1069,11 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { const struct inet_diag_handler *handler; + u32 prev_min_dump_alloc; int err = 0; +again: + prev_min_dump_alloc = cb->min_dump_alloc; handler = inet_diag_lock_handler(r->sdiag_protocol); if (!IS_ERR(handler)) handler->dump(skb, cb, r); @@ -1031,6 +1081,15 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, err = PTR_ERR(handler); inet_diag_unlock_handler(handler); + /* The skb is not large enough to fit one sk info and + * inet_sk_diag_fill() has requested for a larger skb. + */ + if (!skb->len && cb->min_dump_alloc > prev_min_dump_alloc) { + err = pskb_expand_head(skb, 0, cb->min_dump_alloc, GFP_KERNEL); + if (!err) + goto again; + } + return err ? : skb->len; } @@ -1068,6 +1127,18 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) } } + nla = cb_data->inet_diag_nla_bpf_stgs; + if (nla) { + struct bpf_sk_storage_diag *bpf_stg_diag; + + bpf_stg_diag = bpf_sk_storage_diag_alloc(nla); + if (IS_ERR(bpf_stg_diag)) { + kfree(cb_data); + return PTR_ERR(bpf_stg_diag); + } + cb_data->bpf_stg_diag = bpf_stg_diag; + } + cb->data = cb_data; return 0; } @@ -1084,6 +1155,9 @@ static int inet_diag_dump_start_compat(struct netlink_callback *cb) static int inet_diag_dump_done(struct netlink_callback *cb) { + struct inet_diag_dump_data *cb_data = cb->data; + + bpf_sk_storage_diag_free(cb_data->bpf_stg_diag); kfree(cb->data); return 0; -- cgit v1.2.3 From a7e454542bf8d57c75f59e7e7326c21db3d0bb3f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 26 Feb 2020 17:02:27 -0600 Subject: Bluetooth: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.h | 6 +++--- drivers/bluetooth/btrtl.h | 4 ++-- include/net/bluetooth/hci.h | 30 +++++++++++++++--------------- include/net/bluetooth/hci_sock.h | 6 +++--- include/net/bluetooth/l2cap.h | 8 ++++---- include/net/bluetooth/rfcomm.h | 2 +- net/bluetooth/a2mp.h | 10 +++++----- net/bluetooth/bnep/bnep.h | 6 +++--- 8 files changed, 36 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index f5795b1a3779..e16a4d650597 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -79,7 +79,7 @@ struct qca_fw_config { struct edl_event_hdr { __u8 cresp; __u8 rtype; - __u8 data[0]; + __u8 data[]; } __packed; struct qca_btsoc_version { @@ -112,12 +112,12 @@ struct tlv_type_nvm { __le16 tag_len; __le32 reserve1; __le32 reserve2; - __u8 data[0]; + __u8 data[]; } __packed; struct tlv_type_hdr { __le32 type_len; - __u8 data[0]; + __u8 data[]; } __packed; enum qca_btsoc_type { diff --git a/drivers/bluetooth/btrtl.h b/drivers/bluetooth/btrtl.h index 10ad40c3e42c..2a582682136d 100644 --- a/drivers/bluetooth/btrtl.h +++ b/drivers/bluetooth/btrtl.h @@ -38,13 +38,13 @@ struct rtl_epatch_header { struct rtl_vendor_config_entry { __le16 offset; __u8 len; - __u8 data[0]; + __u8 data[]; } __packed; struct rtl_vendor_config { __le32 signature; __le16 total_len; - struct rtl_vendor_config_entry entry[0]; + struct rtl_vendor_config_entry entry[]; } __packed; #if IS_ENABLED(CONFIG_BT_RTL) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 6293bdd7d862..d878bf8dce20 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -935,7 +935,7 @@ struct hci_cp_sniff_subrate { struct hci_cp_set_event_flt { __u8 flt_type; __u8 cond_type; - __u8 condition[0]; + __u8 condition[]; } __packed; /* Filter types */ @@ -1335,7 +1335,7 @@ struct hci_rp_read_local_amp_assoc { __u8 status; __u8 phy_handle; __le16 rem_len; - __u8 frag[0]; + __u8 frag[]; } __packed; #define HCI_OP_WRITE_REMOTE_AMP_ASSOC 0x140b @@ -1343,7 +1343,7 @@ struct hci_cp_write_remote_amp_assoc { __u8 phy_handle; __le16 len_so_far; __le16 rem_len; - __u8 frag[0]; + __u8 frag[]; } __packed; struct hci_rp_write_remote_amp_assoc { __u8 status; @@ -1613,7 +1613,7 @@ struct hci_cp_le_set_ext_scan_params { __u8 own_addr_type; __u8 filter_policy; __u8 scanning_phys; - __u8 data[0]; + __u8 data[]; } __packed; #define LE_SCAN_PHY_1M 0x01 @@ -1641,7 +1641,7 @@ struct hci_cp_le_ext_create_conn { __u8 peer_addr_type; bdaddr_t peer_addr; __u8 phys; - __u8 data[0]; + __u8 data[]; } __packed; struct hci_cp_le_ext_conn_param { @@ -1693,7 +1693,7 @@ struct hci_rp_le_set_ext_adv_params { struct hci_cp_le_set_ext_adv_enable { __u8 enable; __u8 num_of_sets; - __u8 data[0]; + __u8 data[]; } __packed; struct hci_cp_ext_adv_set { @@ -1775,14 +1775,14 @@ struct hci_cp_le_set_cig_params { __le16 m_latency; __le16 s_latency; __u8 num_cis; - struct hci_cis_params cis[0]; + struct hci_cis_params cis[]; } __packed; struct hci_rp_le_set_cig_params { __u8 status; __u8 cig_id; __u8 num_handles; - __le16 handle[0]; + __le16 handle[]; } __packed; #define HCI_OP_LE_CREATE_CIS 0x2064 @@ -1793,7 +1793,7 @@ struct hci_cis { struct hci_cp_le_create_cis { __u8 num_cis; - struct hci_cis cis[0]; + struct hci_cis cis[]; } __packed; #define HCI_OP_LE_REMOVE_CIG 0x2065 @@ -1937,7 +1937,7 @@ struct hci_comp_pkts_info { struct hci_ev_num_comp_pkts { __u8 num_hndl; - struct hci_comp_pkts_info handles[0]; + struct hci_comp_pkts_info handles[]; } __packed; #define HCI_EV_MODE_CHANGE 0x14 @@ -2170,7 +2170,7 @@ struct hci_comp_blocks_info { struct hci_ev_num_comp_blocks { __le16 num_blocks; __u8 num_hndl; - struct hci_comp_blocks_info handles[0]; + struct hci_comp_blocks_info handles[]; } __packed; #define HCI_EV_SYNC_TRAIN_COMPLETE 0x4F @@ -2226,7 +2226,7 @@ struct hci_ev_le_advertising_info { __u8 bdaddr_type; bdaddr_t bdaddr; __u8 length; - __u8 data[0]; + __u8 data[]; } __packed; #define HCI_EV_LE_CONN_UPDATE_COMPLETE 0x03 @@ -2302,7 +2302,7 @@ struct hci_ev_le_ext_adv_report { __u8 direct_addr_type; bdaddr_t direct_addr; __u8 length; - __u8 data[0]; + __u8 data[]; } __packed; #define HCI_EV_LE_ENHANCED_CONN_COMPLETE 0x0a @@ -2362,7 +2362,7 @@ struct hci_evt_le_cis_req { #define HCI_EV_STACK_INTERNAL 0xfd struct hci_ev_stack_internal { __u16 type; - __u8 data[0]; + __u8 data[]; } __packed; #define HCI_EV_SI_DEVICE 0x01 @@ -2409,7 +2409,7 @@ struct hci_sco_hdr { struct hci_iso_hdr { __le16 handle; __le16 dlen; - __u8 data[0]; + __u8 data[]; } __packed; /* ISO data packet status flags */ diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h index 8e9138acdae1..9352bb1bf34c 100644 --- a/include/net/bluetooth/hci_sock.h +++ b/include/net/bluetooth/hci_sock.h @@ -144,19 +144,19 @@ struct hci_dev_req { struct hci_dev_list_req { __u16 dev_num; - struct hci_dev_req dev_req[0]; /* hci_dev_req structures */ + struct hci_dev_req dev_req[]; /* hci_dev_req structures */ }; struct hci_conn_list_req { __u16 dev_id; __u16 conn_num; - struct hci_conn_info conn_info[0]; + struct hci_conn_info conn_info[]; }; struct hci_conn_info_req { bdaddr_t bdaddr; __u8 type; - struct hci_conn_info conn_info[0]; + struct hci_conn_info conn_info[]; }; struct hci_auth_info_req { diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 093aedebdf0c..61dc731d5666 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -299,14 +299,14 @@ struct l2cap_conn_rsp { struct l2cap_conf_req { __le16 dcid; __le16 flags; - __u8 data[0]; + __u8 data[]; } __packed; struct l2cap_conf_rsp { __le16 scid; __le16 flags; __le16 result; - __u8 data[0]; + __u8 data[]; } __packed; #define L2CAP_CONF_SUCCESS 0x0000 @@ -322,7 +322,7 @@ struct l2cap_conf_rsp { struct l2cap_conf_opt { __u8 type; __u8 len; - __u8 val[0]; + __u8 val[]; } __packed; #define L2CAP_CONF_OPT_SIZE 2 @@ -392,7 +392,7 @@ struct l2cap_info_req { struct l2cap_info_rsp { __le16 type; __le16 result; - __u8 data[0]; + __u8 data[]; } __packed; struct l2cap_create_chan_req { diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 8d65d2a0b9b4..99d26879b02a 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -355,7 +355,7 @@ struct rfcomm_dev_info { struct rfcomm_dev_list_req { u16 dev_num; - struct rfcomm_dev_info dev_info[0]; + struct rfcomm_dev_info dev_info[]; }; int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); diff --git a/net/bluetooth/a2mp.h b/net/bluetooth/a2mp.h index 0029d5119be6..2fd253a61a2a 100644 --- a/net/bluetooth/a2mp.h +++ b/net/bluetooth/a2mp.h @@ -36,14 +36,14 @@ struct a2mp_cmd { __u8 code; __u8 ident; __le16 len; - __u8 data[0]; + __u8 data[]; } __packed; /* A2MP command codes */ #define A2MP_COMMAND_REJ 0x01 struct a2mp_cmd_rej { __le16 reason; - __u8 data[0]; + __u8 data[]; } __packed; #define A2MP_DISCOVER_REQ 0x02 @@ -62,7 +62,7 @@ struct a2mp_cl { struct a2mp_discov_rsp { __le16 mtu; __le16 ext_feat; - struct a2mp_cl cl[0]; + struct a2mp_cl cl[]; } __packed; #define A2MP_CHANGE_NOTIFY 0x04 @@ -93,7 +93,7 @@ struct a2mp_amp_assoc_req { struct a2mp_amp_assoc_rsp { __u8 id; __u8 status; - __u8 amp_assoc[0]; + __u8 amp_assoc[]; } __packed; #define A2MP_CREATEPHYSLINK_REQ 0x0A @@ -101,7 +101,7 @@ struct a2mp_amp_assoc_rsp { struct a2mp_physlink_req { __u8 local_id; __u8 remote_id; - __u8 amp_assoc[0]; + __u8 amp_assoc[]; } __packed; #define A2MP_CREATEPHYSLINK_RSP 0x0B diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index 24f18b133959..9680473ed7ef 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -74,14 +74,14 @@ struct bnep_setup_conn_req { __u8 type; __u8 ctrl; __u8 uuid_size; - __u8 service[0]; + __u8 service[]; } __packed; struct bnep_set_filter_req { __u8 type; __u8 ctrl; __be16 len; - __u8 list[0]; + __u8 list[]; } __packed; struct bnep_control_rsp { @@ -93,7 +93,7 @@ struct bnep_control_rsp { struct bnep_ext_hdr { __u8 type; __u8 len; - __u8 data[0]; + __u8 data[]; } __packed; /* BNEP ioctl defines */ -- cgit v1.2.3 From d7d41682efc25d58b5bd8b80e85e3c9ce586635c Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Tue, 25 Feb 2020 18:38:09 +0530 Subject: Bluetooth: Fix Suspicious RCU usage warnings The following functions in hci_core are always called with hdev->lock held. No need to use list_for_each_entry_rcu(), therefore change the usage of list_for_each_entry_rcu() in these functions to list_for_each_entry(). hci_link_keys_clear() hci_smp_ltks_clear() hci_smp_irks_clear() hci_blocked_keys_clear() Warning encountered with CONFIG_PROVE_RCU_LIST: [ 72.213184] ============================= [ 72.213188] WARNING: suspicious RCU usage [ 72.213192] 5.6.0-rc1+ #5 Not tainted [ 72.213195] ----------------------------- [ 72.213198] net/bluetooth/hci_core.c:2288 RCU-list traversed in non-reader section!! [ 72.213676] ============================= [ 72.213679] WARNING: suspicious RCU usage [ 72.213683] 5.6.0-rc1+ #5 Not tainted [ 72.213685] ----------------------------- [ 72.213689] net/bluetooth/hci_core.c:2298 RCU-list traversed in non-reader section!! [ 72.214195] ============================= [ 72.214198] WARNING: suspicious RCU usage [ 72.214201] 5.6.0-rc1+ #5 Not tainted [ 72.214204] ----------------------------- [ 72.214208] net/bluetooth/hci_core.c:2308 RCU-list traversed in non-reader section!! [ 333.456972] ============================= [ 333.456979] WARNING: suspicious RCU usage [ 333.457001] 5.6.0-rc1+ #5 Not tainted [ 333.457007] ----------------------------- [ 333.457014] net/bluetooth/hci_core.c:2318 RCU-list traversed in non-reader section!! Signed-off-by: Madhuparna Bhowmik Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index cbbc34a006d1..8ddd1bea02be 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2285,7 +2285,7 @@ void hci_link_keys_clear(struct hci_dev *hdev) { struct link_key *key; - list_for_each_entry_rcu(key, &hdev->link_keys, list) { + list_for_each_entry(key, &hdev->link_keys, list) { list_del_rcu(&key->list); kfree_rcu(key, rcu); } @@ -2295,7 +2295,7 @@ void hci_smp_ltks_clear(struct hci_dev *hdev) { struct smp_ltk *k; - list_for_each_entry_rcu(k, &hdev->long_term_keys, list) { + list_for_each_entry(k, &hdev->long_term_keys, list) { list_del_rcu(&k->list); kfree_rcu(k, rcu); } @@ -2305,7 +2305,7 @@ void hci_smp_irks_clear(struct hci_dev *hdev) { struct smp_irk *k; - list_for_each_entry_rcu(k, &hdev->identity_resolving_keys, list) { + list_for_each_entry(k, &hdev->identity_resolving_keys, list) { list_del_rcu(&k->list); kfree_rcu(k, rcu); } @@ -2315,7 +2315,7 @@ void hci_blocked_keys_clear(struct hci_dev *hdev) { struct blocked_key *b; - list_for_each_entry_rcu(b, &hdev->blocked_keys, list) { + list_for_each_entry(b, &hdev->blocked_keys, list) { list_del_rcu(&b->list); kfree_rcu(b, rcu); } -- cgit v1.2.3 From 0c2ac7d4f08d330dc5b092b4beba9ef88602d369 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Tue, 25 Feb 2020 18:47:53 +0530 Subject: Bluetooth: Use list_for_each_entry_rcu() to traverse RCU list in RCU read-side CS In function hci_is_blocked_key() RCU list is traversed with list_for_each_entry() in RCU read-side CS. Use list_for_each_entry_rcu() instead. Signed-off-by: Madhuparna Bhowmik Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8ddd1bea02be..4e6d61a95b20 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2327,7 +2327,7 @@ bool hci_is_blocked_key(struct hci_dev *hdev, u8 type, u8 val[16]) struct blocked_key *b; rcu_read_lock(); - list_for_each_entry(b, &hdev->blocked_keys, list) { + list_for_each_entry_rcu(b, &hdev->blocked_keys, list) { if (b->type == type && !memcmp(b->val, val, sizeof(b->val))) { blocked = true; break; -- cgit v1.2.3 From a9e45698b37d4235ec98b5c0327de59759cb2ef2 Mon Sep 17 00:00:00 2001 From: Sathish Narsimman Date: Mon, 24 Feb 2020 11:02:24 +0530 Subject: Bluetooth: Remove adv set for directed advertising Extended advertising Data is set during bluetooth initialization by default which causes InvalidHCICommandParameters when setting Extended advertising parameters. As per Core Spec 5.2 Vol 2, PART E, Sec 7.8.53, for advertising_event_property LE_LEGACY_ADV_DIRECT_IND does not supports advertising data when the advertising set already contains some, the controller shall return erroc code 'InvalidHCICommandParameters(0x12). So it is required to remove adv set for handle 0x00. since we use instance 0 for directed adv. Signed-off-by: Sathish Narsimman Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 2 ++ net/bluetooth/hci_conn.c | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index d878bf8dce20..29b638c6c934 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1724,6 +1724,8 @@ struct hci_cp_le_set_ext_scan_rsp_data { #define LE_SET_ADV_DATA_NO_FRAG 0x01 +#define HCI_OP_LE_REMOVE_ADV_SET 0x203c + #define HCI_OP_LE_CLEAR_ADV_SETS 0x203d #define HCI_OP_LE_SET_ADV_SET_RAND_ADDR 0x2035 diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index a582c676e584..2731f0ad2a90 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -898,6 +898,16 @@ static void hci_req_directed_advertising(struct hci_request *req, cp.peer_addr_type = conn->dst_type; bacpy(&cp.peer_addr, &conn->dst); + /* As per Core Spec 5.2 Vol 2, PART E, Sec 7.8.53, for + * advertising_event_property LE_LEGACY_ADV_DIRECT_IND + * does not supports advertising data when the advertising set already + * contains some, the controller shall return erroc code 'Invalid + * HCI Command Parameters(0x12). + * So it is required to remove adv set for handle 0x00. since we use + * instance 0 for directed adv. + */ + hci_req_add(req, HCI_OP_LE_REMOVE_ADV_SET, sizeof(cp.handle), &cp.handle); + hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp); if (own_addr_type == ADDR_LE_DEV_RANDOM && -- cgit v1.2.3 From c3bed4de5d0671426d047d9b58b140d6a9114c83 Mon Sep 17 00:00:00 2001 From: Sathish Narsimman Date: Mon, 24 Feb 2020 10:53:40 +0530 Subject: Bluetooth: During le_conn_timeout disable EXT_ADV Disabling LE_LEGACY_ADV when LE_EXT_ADV is enabled causes 'command disallowed . This patch fixes that issue and disables EXT_ADV if enabled. Signed-off-by: Sathish Narsimman Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2731f0ad2a90..e245bc155cc2 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -467,6 +467,23 @@ static void hci_conn_auto_accept(struct work_struct *work) &conn->dst); } +static void le_disable_advertising(struct hci_dev *hdev) +{ + if (ext_adv_capable(hdev)) { + struct hci_cp_le_set_ext_adv_enable cp; + + cp.enable = 0x00; + cp.num_of_sets = 0x00; + + hci_send_cmd(hdev, HCI_OP_LE_SET_EXT_ADV_ENABLE, sizeof(cp), + &cp); + } else { + u8 enable = 0x00; + hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), + &enable); + } +} + static void le_conn_timeout(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, @@ -481,9 +498,8 @@ static void le_conn_timeout(struct work_struct *work) * (which doesn't have a timeout of its own). */ if (conn->role == HCI_ROLE_SLAVE) { - u8 enable = 0x00; - hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), - &enable); + /* Disable LE Advertising */ + le_disable_advertising(hdev); hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); return; } -- cgit v1.2.3 From 4b127bd5f2cc1b2da041f472dab6dc729cdd4711 Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Thu, 27 Feb 2020 18:29:39 +0000 Subject: Bluetooth: Support querying for WBS support through MGMT This patch provides a mechanism for MGMT interface client to query the capability of the controller to support WBS. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 3 +++ include/net/bluetooth/hci.h | 9 +++++++++ include/net/bluetooth/mgmt.h | 3 ++- net/bluetooth/mgmt.c | 4 ++++ 4 files changed, 18 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index b34a71716fe1..48e78fdc8e83 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3867,6 +3867,9 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_BROKEN_ISOC) data->isoc = NULL; + if (id->driver_info & BTUSB_WIDEBAND_SPEECH) + set_bit(HCI_QUIRK_WIDE_BAND_SPEECH_SUPPORTED, &hdev->quirks); + if (id->driver_info & BTUSB_DIGIANSWER) { data->cmdreq_type = USB_TYPE_VENDOR; set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 27b6363dd9c6..0b3ebd35681d 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -205,6 +205,15 @@ enum { * */ HCI_QUIRK_NON_PERSISTENT_SETUP, + + /* When this quirk is set, wide band speech is supported by + * the driver since no reliable mechanism exist to report + * this from the hardware, a driver flag is use to convey + * this support + * + * This quirk must be set before hci_register_dev is called. + */ + HCI_QUIRK_WIDE_BAND_SPEECH_SUPPORTED, }; /* HCI device flags */ diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index a90666af05bd..f69f88e8e109 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -101,7 +101,8 @@ struct mgmt_rp_read_index_list { #define MGMT_SETTING_PRIVACY 0x00002000 #define MGMT_SETTING_CONFIGURATION 0x00004000 #define MGMT_SETTING_STATIC_ADDRESS 0x00008000 -#define MGMT_SETTING_PHY_CONFIGURATION 0x00010000 +#define MGMT_SETTING_PHY_CONFIGURATION 0x00010000 +#define MGMT_SETTING_WIDE_BAND_SPEECH 0x00020000 #define MGMT_OP_READ_INFO 0x0004 #define MGMT_READ_INFO_SIZE 0 diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3074363c68df..1002c657768a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -762,6 +762,10 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (lmp_sc_capable(hdev)) settings |= MGMT_SETTING_SECURE_CONN; + + if (test_bit(HCI_QUIRK_WIDE_BAND_SPEECH_SUPPORTED, + &hdev->quirks)) + settings |= MGMT_SETTING_WIDE_BAND_SPEECH; } if (lmp_le_capable(hdev)) { -- cgit v1.2.3 From 95e486f5519848ca4c2f2645cbe120de5df133f3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 28 Feb 2020 07:19:07 -0600 Subject: xdp: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Acked-by: Jonathan Lemon Acked-by: Björn Töpel Signed-off-by: David S. Miller --- net/xdp/xsk_queue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 89a01ac4e079..b50bb5c76da5 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -19,13 +19,13 @@ struct xdp_ring { /* Used for the RX and TX queues for packets */ struct xdp_rxtx_ring { struct xdp_ring ptrs; - struct xdp_desc desc[0] ____cacheline_aligned_in_smp; + struct xdp_desc desc[] ____cacheline_aligned_in_smp; }; /* Used for the fill and completion queues for buffers */ struct xdp_umem_ring { struct xdp_ring ptrs; - u64 desc[0] ____cacheline_aligned_in_smp; + u64 desc[] ____cacheline_aligned_in_smp; }; struct xsk_queue { -- cgit v1.2.3 From 680a93166e80e43e3ff85be06005c5cfa492d852 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 28 Feb 2020 07:26:15 -0600 Subject: net: mpls: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/net/mpls_iptunnel.h | 2 +- net/mpls/internal.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h index 6b4759eae158..9deb3a3735da 100644 --- a/include/net/mpls_iptunnel.h +++ b/include/net/mpls_iptunnel.h @@ -11,7 +11,7 @@ struct mpls_iptunnel_encap { u8 ttl_propagate; u8 default_ttl; u8 reserved1; - u32 label[0]; + u32 label[]; }; static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate) diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 768a302879b4..0e9aa94adc07 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -98,7 +98,7 @@ struct mpls_nh { /* next hop label forwarding entry */ u8 nh_via_table; u8 nh_reserved1; - u32 nh_label[0]; + u32 nh_label[]; }; /* offset of via from beginning of mpls_nh */ @@ -154,7 +154,7 @@ struct mpls_route { /* next hop label forwarding entry */ u8 rt_nh_size; u8 rt_via_offset; u8 rt_reserved1; - struct mpls_nh rt_nh[0]; + struct mpls_nh rt_nh[]; }; #define for_nexthops(rt) { \ -- cgit v1.2.3 From af71b090c88c12816d43514190790de919921cea Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 28 Feb 2020 07:30:45 -0600 Subject: l2tp: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] Lastly, fix the following checkpatch warning: CHECK: Prefer kernel type 'u8' over 'uint8_t' #50: FILE: net/l2tp/l2tp_core.h:119: + uint8_t priv[]; /* private data */ This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 2db3d50d10a4..10cf7c3dcbb3 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -116,7 +116,7 @@ struct l2tp_session { void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len); void (*session_close)(struct l2tp_session *session); void (*show)(struct seq_file *m, void *priv); - uint8_t priv[0]; /* private data */ + u8 priv[]; /* private data */ }; /* Describes the tunnel. It contains info to track all the associated -- cgit v1.2.3 From 8402a31dd803e091fd2ec9cd22040b34a0b07085 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 28 Feb 2020 07:33:37 -0600 Subject: net: dccp: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 +- net/dccp/ccid.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6b64b6cc2175..07e547c02fd8 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -198,7 +198,7 @@ enum dccp_role { struct dccp_service_list { __u32 dccpsl_nr; - __be32 dccpsl_list[0]; + __be32 dccpsl_list[]; }; #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 70f88f2b4456..105f3734dadb 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -95,7 +95,7 @@ void ccid_cleanup_builtins(void); struct ccid { struct ccid_operations *ccid_ops; - char ccid_priv[0]; + char ccid_priv[]; }; static inline void *ccid_priv(const struct ccid *ccid) -- cgit v1.2.3 From b0c9a2d9a8ee13ec68ff5f80fdd74bd83272f967 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 28 Feb 2020 07:36:41 -0600 Subject: ipv6: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 2 +- net/ipv6/seg6_iptunnel.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 95835e8d99aa..871d6e52ec67 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -36,7 +36,7 @@ struct tmp_ext { struct in6_addr saddr; #endif struct in6_addr daddr; - char hdrs[0]; + char hdrs[]; }; struct ah_skb_cb { diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index ab7f124ff5d7..d8afe7290de8 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -29,7 +29,7 @@ struct seg6_lwt { struct dst_cache cache; - struct seg6_iptunnel_encap tuninfo[0]; + struct seg6_iptunnel_encap tuninfo[]; }; static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt) -- cgit v1.2.3 From d2afb41ae60435cbe5d9d1078a7d90de04e571b8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 28 Feb 2020 07:43:24 -0600 Subject: net: core: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/core/bpf_sk_storage.c | 2 +- net/core/devlink.c | 2 +- net/core/drop_monitor.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 3ab23f698221..427cfbc0d50d 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -60,7 +60,7 @@ struct bpf_sk_storage_data { * the number of cachelines access during the cache hit case. */ struct bpf_sk_storage_map __rcu *smap; - u8 data[0] __aligned(8); + u8 data[] __aligned(8); }; /* Linked to bpf_sk_storage and bpf_sk_storage_map */ diff --git a/net/core/devlink.c b/net/core/devlink.c index f8af5e2d748b..295d761cbfb1 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4232,7 +4232,7 @@ struct devlink_fmsg_item { int attrtype; u8 nla_type; u16 len; - int value[0]; + int value[]; }; struct devlink_fmsg { diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d58c1c45a895..8e33cec9fc4e 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -68,7 +68,7 @@ struct net_dm_hw_entry { struct net_dm_hw_entries { u32 num_entries; - struct net_dm_hw_entry entries[0]; + struct net_dm_hw_entry entries[]; }; struct per_cpu_dm_data { -- cgit v1.2.3 From 7782040b950b5d0433f734fb2bba8b8b5ed6ce5a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 28 Feb 2020 14:45:21 +0100 Subject: unix: uses an atomic type for scm files accounting So the scm_stat_{add,del} helper can be invoked with no additional lock held. This clean-up the code a bit and will make the next patch easier. Signed-off-by: Paolo Abeni Reviewed-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/net/af_unix.h | 2 +- net/unix/af_unix.c | 21 ++++++--------------- 2 files changed, 7 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index e51d727cc3cd..f42fdddecd41 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -42,7 +42,7 @@ struct unix_skb_parms { } __randomize_layout; struct scm_stat { - u32 nr_fds; + atomic_t nr_fds; }; #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 9d0518d9bdd4..c46fa271fc4a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -690,7 +690,8 @@ static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) if (sk) { u = unix_sk(sock->sk); - seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds)); + seq_printf(m, "scm_fds: %u\n", + atomic_read(&u->scm_stat.nr_fds)); } } #else @@ -1602,10 +1603,8 @@ static void scm_stat_add(struct sock *sk, struct sk_buff *skb) struct scm_fp_list *fp = UNIXCB(skb).fp; struct unix_sock *u = unix_sk(sk); - lockdep_assert_held(&sk->sk_receive_queue.lock); - if (unlikely(fp && fp->count)) - u->scm_stat.nr_fds += fp->count; + atomic_add(fp->count, &u->scm_stat.nr_fds); } static void scm_stat_del(struct sock *sk, struct sk_buff *skb) @@ -1613,10 +1612,8 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb) struct scm_fp_list *fp = UNIXCB(skb).fp; struct unix_sock *u = unix_sk(sk); - lockdep_assert_held(&sk->sk_receive_queue.lock); - if (unlikely(fp && fp->count)) - u->scm_stat.nr_fds -= fp->count; + atomic_sub(fp->count, &u->scm_stat.nr_fds); } /* @@ -1805,10 +1802,8 @@ restart_locked: if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); maybe_add_creds(skb, sock, other); - spin_lock(&other->sk_receive_queue.lock); scm_stat_add(other, skb); - __skb_queue_tail(&other->sk_receive_queue, skb); - spin_unlock(&other->sk_receive_queue.lock); + skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); @@ -1910,10 +1905,8 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto pipe_err_free; maybe_add_creds(skb, sock, other); - spin_lock(&other->sk_receive_queue.lock); scm_stat_add(other, skb); - __skb_queue_tail(&other->sk_receive_queue, skb); - spin_unlock(&other->sk_receive_queue.lock); + skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); other->sk_data_ready(other); sent += size; @@ -2409,9 +2402,7 @@ unlock: sk_peek_offset_bwd(sk, chunk); if (UNIXCB(skb).fp) { - spin_lock(&sk->sk_receive_queue.lock); scm_stat_del(sk, skb); - spin_unlock(&sk->sk_receive_queue.lock); unix_detach_fds(&scm, skb); } -- cgit v1.2.3 From e427cad6eee47e2daf207cd7a4156ae72496ee07 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 28 Feb 2020 14:45:22 +0100 Subject: net: datagram: drop 'destructor' argument from several helpers The only users for such argument are the UDP protocol and the UNIX socket family. We can safely reclaim the accounted memory directly from the UDP code and, after the previous patch, we can do scm stats accounting outside the datagram helpers. Overall this cleans up a bit some datagram-related helpers, and avoids an indirect call per packet in the UDP receive path. v1 -> v2: - call scm_stat_del() only when not peeking - Kirill - fix build issue with CONFIG_INET_ESPINTCP Signed-off-by: Paolo Abeni Reviewed-by: Kirill Tkhai Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++---------- net/core/datagram.c | 25 +++++++------------------ net/ipv4/udp.c | 14 ++++++++------ net/unix/af_unix.c | 7 +++++-- net/xfrm/espintcp.c | 2 +- 5 files changed, 23 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5b50278c4bc8..21749b2cdc9b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3514,23 +3514,15 @@ int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, struct sk_buff_head *queue, unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, struct sk_buff_head *queue, - unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), - int *off, int *err, + unsigned int flags, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, - unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), - int *off, int *err); + unsigned int flags, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); __poll_t datagram_poll(struct file *file, struct socket *sock, diff --git a/net/core/datagram.c b/net/core/datagram.c index a78e7f864c1e..4213081c6ed3 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -166,8 +166,6 @@ done: struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, struct sk_buff_head *queue, unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), int *off, int *err, struct sk_buff **last) { @@ -198,8 +196,6 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, refcount_inc(&skb->users); } else { __skb_unlink(skb, queue); - if (destructor) - destructor(sk, skb); } *off = _off; return skb; @@ -212,7 +208,6 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, * @sk: socket * @queue: socket queue from which to receive * @flags: MSG\_ flags - * @destructor: invoked under the receive lock on successful dequeue * @off: an offset in bytes to peek skb from. Returns an offset * within an skb where data actually starts * @err: error code returned @@ -245,10 +240,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, */ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, struct sk_buff_head *queue, - unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), - int *off, int *err, + unsigned int flags, int *off, int *err, struct sk_buff **last) { struct sk_buff *skb; @@ -269,8 +261,8 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, * However, this function was correct in any case. 8) */ spin_lock_irqsave(&queue->lock, cpu_flags); - skb = __skb_try_recv_from_queue(sk, queue, flags, destructor, - off, &error, last); + skb = __skb_try_recv_from_queue(sk, queue, flags, off, &error, + last); spin_unlock_irqrestore(&queue->lock, cpu_flags); if (error) goto no_packet; @@ -293,10 +285,7 @@ EXPORT_SYMBOL(__skb_try_recv_datagram); struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, - unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), - int *off, int *err) + unsigned int flags, int *off, int *err) { struct sk_buff *skb, *last; long timeo; @@ -304,8 +293,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { - skb = __skb_try_recv_datagram(sk, sk_queue, flags, destructor, - off, err, &last); + skb = __skb_try_recv_datagram(sk, sk_queue, flags, off, err, + &last); if (skb) return skb; @@ -326,7 +315,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, return __skb_recv_datagram(sk, &sk->sk_receive_queue, flags | (noblock ? MSG_DONTWAIT : 0), - NULL, &off, err); + &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 08a41f1e1cd2..a68e2ac37f26 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1671,10 +1671,11 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, error = -EAGAIN; do { spin_lock_bh(&queue->lock); - skb = __skb_try_recv_from_queue(sk, queue, flags, - udp_skb_destructor, - off, err, &last); + skb = __skb_try_recv_from_queue(sk, queue, flags, off, + err, &last); if (skb) { + if (!(flags & MSG_PEEK)) + udp_skb_destructor(sk, skb); spin_unlock_bh(&queue->lock); return skb; } @@ -1692,9 +1693,10 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, spin_lock(&sk_queue->lock); skb_queue_splice_tail_init(sk_queue, queue); - skb = __skb_try_recv_from_queue(sk, queue, flags, - udp_skb_dtor_locked, - off, err, &last); + skb = __skb_try_recv_from_queue(sk, queue, flags, off, + err, &last); + if (skb && !(flags & MSG_PEEK)) + udp_skb_dtor_locked(sk, skb); spin_unlock(&sk_queue->lock); spin_unlock_bh(&queue->lock); if (skb) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c46fa271fc4a..3385a7a0b231 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2106,9 +2106,12 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, skip = sk_peek_offset(sk, flags); skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, - scm_stat_del, &skip, &err, &last); - if (skb) + &skip, &err, &last); + if (skb) { + if (!(flags & MSG_PEEK)) + scm_stat_del(sk, skb); break; + } mutex_unlock(&u->iolock); diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index f15d6a564b0e..037ea156d2f9 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -100,7 +100,7 @@ static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, flags |= nonblock ? MSG_DONTWAIT : 0; - skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, NULL, &off, &err); + skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err); if (!skb) return err; -- cgit v1.2.3 From b90feaff2a2cbf339069adec4bfd6091cfb44b50 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 27 Feb 2020 14:58:44 -0600 Subject: net: sched: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 2 +- net/sched/em_ipt.c | 2 +- net/sched/em_nbyte.c | 2 +- net/sched/sch_atm.c | 2 +- net/sched/sch_netem.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 6a70845bd9ab..20d2c6419612 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -181,7 +181,7 @@ struct tc_taprio_qopt_offload { u64 cycle_time_extension; size_t num_entries; - struct tc_taprio_sched_entry entries[0]; + struct tc_taprio_sched_entry entries[]; }; /* Reference counting */ diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c index 9fff6480acc6..eecfe072c508 100644 --- a/net/sched/em_ipt.c +++ b/net/sched/em_ipt.c @@ -22,7 +22,7 @@ struct em_ipt_match { const struct xt_match *match; u32 hook; u8 nfproto; - u8 match_data[0] __aligned(8); + u8 match_data[] __aligned(8); }; struct em_ipt_xt_match { diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index 88c7ce42df7e..2c1192a2ee5e 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -16,7 +16,7 @@ struct nbyte_data { struct tcf_em_nbyte hdr; - char pattern[0]; + char pattern[]; }; static int em_nbyte_change(struct net *net, void *data, int data_len, diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index f4f9b8cdbffb..ee12ca9f55b4 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -58,7 +58,7 @@ struct atm_flow_data { struct atm_flow_data *excess; /* flow for excess traffic; NULL to set CLP instead */ int hdr_len; - unsigned char hdr[0]; /* header data; MUST BE LAST */ + unsigned char hdr[]; /* header data; MUST BE LAST */ }; struct atm_qdisc_data { diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 42e557d48e4e..84f82771cdf5 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -66,7 +66,7 @@ struct disttable { u32 size; - s16 table[0]; + s16 table[]; }; struct netem_sched_data { -- cgit v1.2.3 From f3f2f98470b7b8ead7c364c103303c3cac8c2ff0 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 28 Feb 2020 18:01:20 +0000 Subject: hsr: use debugfs_remove_recursive() instead of debugfs_remove() If it uses debugfs_remove_recursive() instead of debugfs_remove(), hsr_priv() doesn't need to have "node_tbl_file" pointer variable. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_debugfs.c | 5 +---- net/hsr/hsr_main.h | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c index d5f709b940ff..9787ef11ca71 100644 --- a/net/hsr/hsr_debugfs.c +++ b/net/hsr/hsr_debugfs.c @@ -113,7 +113,6 @@ void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev) priv->node_tbl_root = NULL; return; } - priv->node_tbl_file = de; } /* hsr_debugfs_term - Tear down debugfs intrastructure @@ -125,9 +124,7 @@ void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev) void hsr_debugfs_term(struct hsr_priv *priv) { - debugfs_remove(priv->node_tbl_file); - priv->node_tbl_file = NULL; - debugfs_remove(priv->node_tbl_root); + debugfs_remove_recursive(priv->node_tbl_root); priv->node_tbl_root = NULL; } diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 754d84b217f0..7321cf8d6d2c 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -166,7 +166,6 @@ struct hsr_priv { unsigned char sup_multicast_addr[ETH_ALEN]; #ifdef CONFIG_DEBUG_FS struct dentry *node_tbl_root; - struct dentry *node_tbl_file; #endif }; -- cgit v1.2.3 From 13eeb5fea68e11765020b846ef692809c5fe04aa Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 28 Feb 2020 18:01:35 +0000 Subject: hsr: use extack error message instead of netdev_info If HSR uses the extack instead of netdev_info(), users can get error messages immediately without any checking the kernel message. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 9 +++++---- net/hsr/hsr_device.h | 3 ++- net/hsr/hsr_netlink.c | 22 +++++++++++++++------- net/hsr/hsr_slave.c | 20 ++++++++++++-------- net/hsr/hsr_slave.h | 2 +- 5 files changed, 35 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c7bd6c49fadf..a48f621c2fec 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -431,7 +431,8 @@ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { }; int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], - unsigned char multicast_spec, u8 protocol_version) + unsigned char multicast_spec, u8 protocol_version, + struct netlink_ext_ack *extack) { struct hsr_priv *hsr; struct hsr_port *port; @@ -478,7 +479,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], /* Make sure the 1st call to netif_carrier_on() gets through */ netif_carrier_off(hsr_dev); - res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER); + res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER, extack); if (res) goto err_add_master; @@ -486,11 +487,11 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (res) goto err_unregister; - res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A); + res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A, extack); if (res) goto err_add_slaves; - res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B); + res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B, extack); if (res) goto err_add_slaves; diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index 6d7759c4f5f9..a099d7de7e79 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -13,7 +13,8 @@ void hsr_dev_setup(struct net_device *dev); int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], - unsigned char multicast_spec, u8 protocol_version); + unsigned char multicast_spec, u8 protocol_version, + struct netlink_ext_ack *extack); void hsr_check_carrier_and_operstate(struct hsr_priv *hsr); bool is_hsr_master(struct net_device *dev); int hsr_get_max_mtu(struct hsr_priv *hsr); diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 8dc0547f01d0..7ed308a0c035 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -35,26 +35,34 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, unsigned char multicast_spec, hsr_version; if (!data) { - netdev_info(dev, "HSR: No slave devices specified\n"); + NL_SET_ERR_MSG_MOD(extack, "No slave devices specified"); return -EINVAL; } if (!data[IFLA_HSR_SLAVE1]) { - netdev_info(dev, "HSR: Slave1 device not specified\n"); + NL_SET_ERR_MSG_MOD(extack, "Slave1 device not specified"); return -EINVAL; } link[0] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE1])); + if (!link[0]) { + NL_SET_ERR_MSG_MOD(extack, "Slave1 does not exist"); + return -EINVAL; + } if (!data[IFLA_HSR_SLAVE2]) { - netdev_info(dev, "HSR: Slave2 device not specified\n"); + NL_SET_ERR_MSG_MOD(extack, "Slave2 device not specified"); return -EINVAL; } link[1] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE2])); + if (!link[1]) { + NL_SET_ERR_MSG_MOD(extack, "Slave2 does not exist"); + return -EINVAL; + } - if (!link[0] || !link[1]) - return -ENODEV; - if (link[0] == link[1]) + if (link[0] == link[1]) { + NL_SET_ERR_MSG_MOD(extack, "Slave1 and Slave2 are same"); return -EINVAL; + } if (!data[IFLA_HSR_MULTICAST_SPEC]) multicast_spec = 0; @@ -66,7 +74,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, else hsr_version = nla_get_u8(data[IFLA_HSR_VERSION]); - return hsr_dev_finalize(dev, link, multicast_spec, hsr_version); + return hsr_dev_finalize(dev, link, multicast_spec, hsr_version, extack); } static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index fbfd0db182b7..127ebcc0e28f 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -58,33 +58,37 @@ bool hsr_port_exists(const struct net_device *dev) return rcu_access_pointer(dev->rx_handler) == hsr_handle_frame; } -static int hsr_check_dev_ok(struct net_device *dev) +static int hsr_check_dev_ok(struct net_device *dev, + struct netlink_ext_ack *extack) { /* Don't allow HSR on non-ethernet like devices */ if ((dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN) { - netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot use loopback or non-ethernet device as HSR slave."); return -EINVAL; } /* Don't allow enslaving hsr devices */ if (is_hsr_master(dev)) { - netdev_info(dev, "Cannot create trees of HSR devices.\n"); + NL_SET_ERR_MSG_MOD(extack, + "Cannot create trees of HSR devices."); return -EINVAL; } if (hsr_port_exists(dev)) { - netdev_info(dev, "This device is already a HSR slave.\n"); + NL_SET_ERR_MSG_MOD(extack, + "This device is already a HSR slave."); return -EINVAL; } if (is_vlan_dev(dev)) { - netdev_info(dev, "HSR on top of VLAN is not yet supported in this driver.\n"); + NL_SET_ERR_MSG_MOD(extack, "HSR on top of VLAN is not yet supported in this driver."); return -EINVAL; } if (dev->priv_flags & IFF_DONT_BRIDGE) { - netdev_info(dev, "This device does not support bridging.\n"); + NL_SET_ERR_MSG_MOD(extack, + "This device does not support bridging."); return -EOPNOTSUPP; } @@ -126,13 +130,13 @@ fail_promiscuity: } int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, - enum hsr_port_type type) + enum hsr_port_type type, struct netlink_ext_ack *extack) { struct hsr_port *port, *master; int res; if (type != HSR_PT_MASTER) { - res = hsr_check_dev_ok(dev); + res = hsr_check_dev_ok(dev, extack); if (res) return res; } diff --git a/net/hsr/hsr_slave.h b/net/hsr/hsr_slave.h index 64b549529592..8953ea279ce9 100644 --- a/net/hsr/hsr_slave.h +++ b/net/hsr/hsr_slave.h @@ -13,7 +13,7 @@ #include "hsr_main.h" int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, - enum hsr_port_type pt); + enum hsr_port_type pt, struct netlink_ext_ack *extack); void hsr_del_port(struct hsr_port *port); bool hsr_port_exists(const struct net_device *dev); -- cgit v1.2.3 From 4b793acdca0050739b99ace6a8b9e7f717f57c6b Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 28 Feb 2020 18:01:46 +0000 Subject: hsr: use netdev_err() instead of WARN_ONCE() When HSR interface is sending a frame, it finds a node with the destination ethernet address from the list. If there is no node, it calls WARN_ONCE(). But, using WARN_ONCE() for this situation is a little bit overdoing. So, in this patch, the netdev_err() is used instead. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_framereg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 3ba7f61be107..d46d22c7105c 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -318,7 +318,8 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, node_dst = find_node_by_addr_A(&port->hsr->node_db, eth_hdr(skb)->h_dest); if (!node_dst) { - WARN_ONCE(1, "%s: Unknown node\n", __func__); + if (net_ratelimit()) + netdev_err(skb->dev, "%s: Unknown node\n", __func__); return; } if (port->type != node_dst->addr_B_port) -- cgit v1.2.3 From 81390d0c4e56ac7752c97d7e8209357673d1a8ab Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 28 Feb 2020 18:01:56 +0000 Subject: hsr: remove unnecessary rcu_read_lock() in hsr module In order to access the port list, the hsr_port_get_hsr() is used. And this is protected by RTNL and RCU. The hsr_fill_info(), hsr_check_carrier(), hsr_dev_open() and hsr_get_max_mtu() are protected by RTNL. So, rcu_read_lock() in these functions are not necessary. The hsr_handle_frame() also uses rcu_read_lock() but this function is called by packet path. It's already protected by RCU. So, the rcu_read_lock() in hsr_handle_frame() can be removed. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 23 +++++++---------------- net/hsr/hsr_netlink.c | 27 +++++++++------------------ net/hsr/hsr_slave.c | 3 --- 3 files changed, 16 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index a48f621c2fec..00532d14fc7c 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -57,24 +57,19 @@ static void hsr_set_operstate(struct hsr_port *master, bool has_carrier) static bool hsr_check_carrier(struct hsr_port *master) { struct hsr_port *port; - bool has_carrier; - has_carrier = false; + ASSERT_RTNL(); - rcu_read_lock(); - hsr_for_each_port(master->hsr, port) + hsr_for_each_port(master->hsr, port) { if (port->type != HSR_PT_MASTER && is_slave_up(port->dev)) { - has_carrier = true; - break; + netif_carrier_on(master->dev); + return true; } - rcu_read_unlock(); + } - if (has_carrier) - netif_carrier_on(master->dev); - else - netif_carrier_off(master->dev); + netif_carrier_off(master->dev); - return has_carrier; + return false; } static void hsr_check_announce(struct net_device *hsr_dev, @@ -118,11 +113,9 @@ int hsr_get_max_mtu(struct hsr_priv *hsr) struct hsr_port *port; mtu_max = ETH_DATA_LEN; - rcu_read_lock(); hsr_for_each_port(hsr, port) if (port->type != HSR_PT_MASTER) mtu_max = min(port->dev->mtu, mtu_max); - rcu_read_unlock(); if (mtu_max < HSR_HLEN) return 0; @@ -157,7 +150,6 @@ static int hsr_dev_open(struct net_device *dev) hsr = netdev_priv(dev); designation = '\0'; - rcu_read_lock(); hsr_for_each_port(hsr, port) { if (port->type == HSR_PT_MASTER) continue; @@ -175,7 +167,6 @@ static int hsr_dev_open(struct net_device *dev) netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a fully working HSR network\n", designation, port->dev->name); } - rcu_read_unlock(); if (designation == '\0') netdev_warn(dev, "No slave devices configured\n"); diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 7ed308a0c035..64d39c1e93a2 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -79,29 +79,20 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) { - struct hsr_priv *hsr; + struct hsr_priv *hsr = netdev_priv(dev); struct hsr_port *port; - int res; - - hsr = netdev_priv(dev); - - res = 0; - rcu_read_lock(); port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); - if (port) - res = nla_put_u32(skb, IFLA_HSR_SLAVE1, port->dev->ifindex); - rcu_read_unlock(); - if (res) - goto nla_put_failure; + if (port) { + if (nla_put_u32(skb, IFLA_HSR_SLAVE1, port->dev->ifindex)) + goto nla_put_failure; + } - rcu_read_lock(); port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); - if (port) - res = nla_put_u32(skb, IFLA_HSR_SLAVE2, port->dev->ifindex); - rcu_read_unlock(); - if (res) - goto nla_put_failure; + if (port) { + if (nla_put_u32(skb, IFLA_HSR_SLAVE2, port->dev->ifindex)) + goto nla_put_failure; + } if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN, hsr->sup_multicast_addr) || diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 127ebcc0e28f..07edc7f626fe 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -25,7 +25,6 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) return RX_HANDLER_PASS; } - rcu_read_lock(); /* hsr->node_db, hsr->ports */ port = hsr_port_get_rcu(skb->dev); if (!port) goto finish_pass; @@ -45,11 +44,9 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) hsr_forward_skb(skb, port); finish_consume: - rcu_read_unlock(); /* hsr->node_db, hsr->ports */ return RX_HANDLER_CONSUMED; finish_pass: - rcu_read_unlock(); /* hsr->node_db, hsr->ports */ return RX_HANDLER_PASS; } -- cgit v1.2.3 From e0a4b99773d3d8d3fb40087805f8fd858a23e582 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 28 Feb 2020 18:02:10 +0000 Subject: hsr: use upper/lower device infrastructure netdev_upper_dev_link() is useful to manage lower/upper interfaces. And this function internally validates looping, maximum depth. All or most virtual interfaces that could have a real interface (e.g. macsec, macvlan, ipvlan etc.) use lower/upper infrastructure. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 32 ++++++++++++++++++++------------ net/hsr/hsr_main.c | 3 ++- net/hsr/hsr_slave.c | 35 ++++++++++++++++++----------------- 3 files changed, 40 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 00532d14fc7c..fc7027314ad8 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -341,22 +341,33 @@ static void hsr_announce(struct timer_list *t) rcu_read_unlock(); } +static void hsr_del_ports(struct hsr_priv *hsr) +{ + struct hsr_port *port; + + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); + if (port) + hsr_del_port(port); + + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); + if (port) + hsr_del_port(port); + + port = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + if (port) + hsr_del_port(port); +} + /* This has to be called after all the readers are gone. * Otherwise we would have to check the return value of * hsr_port_get_hsr(). */ static void hsr_dev_destroy(struct net_device *hsr_dev) { - struct hsr_priv *hsr; - struct hsr_port *port; - struct hsr_port *tmp; - - hsr = netdev_priv(hsr_dev); + struct hsr_priv *hsr = netdev_priv(hsr_dev); hsr_debugfs_term(hsr); - - list_for_each_entry_safe(port, tmp, &hsr->ports, port_list) - hsr_del_port(port); + hsr_del_ports(hsr); del_timer_sync(&hsr->prune_timer); del_timer_sync(&hsr->announce_timer); @@ -426,8 +437,6 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], struct netlink_ext_ack *extack) { struct hsr_priv *hsr; - struct hsr_port *port; - struct hsr_port *tmp; int res; hsr = netdev_priv(hsr_dev); @@ -494,8 +503,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], err_add_slaves: unregister_netdevice(hsr_dev); err_unregister: - list_for_each_entry_safe(port, tmp, &hsr->ports, port_list) - hsr_del_port(port); + hsr_del_ports(hsr); err_add_master: hsr_del_self_node(hsr); diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index 9e389accbfc7..26d6c39f24e1 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -85,7 +85,8 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, master->dev->mtu = mtu_max; break; case NETDEV_UNREGISTER: - hsr_del_port(port); + if (!is_hsr_master(dev)) + hsr_del_port(port); break; case NETDEV_PRE_TYPE_CHANGE: /* HSR works only on Ethernet devices. Refuse slave to change diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 07edc7f626fe..123605cb5420 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -97,19 +97,25 @@ static int hsr_check_dev_ok(struct net_device *dev, } /* Setup device to be added to the HSR bridge. */ -static int hsr_portdev_setup(struct net_device *dev, struct hsr_port *port) +static int hsr_portdev_setup(struct hsr_priv *hsr, struct net_device *dev, + struct hsr_port *port, + struct netlink_ext_ack *extack) + { + struct net_device *hsr_dev; + struct hsr_port *master; int res; - dev_hold(dev); res = dev_set_promiscuity(dev, 1); if (res) goto fail_promiscuity; - /* FIXME: - * What does net device "adjacency" mean? Should we do - * res = netdev_master_upper_dev_link(port->dev, port->hsr->dev); ? - */ + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + hsr_dev = master->dev; + + res = netdev_upper_dev_link(dev, hsr_dev, extack); + if (res) + goto fail_upper_dev_link; res = netdev_rx_handler_register(dev, hsr_handle_frame, port); if (res) @@ -119,6 +125,8 @@ static int hsr_portdev_setup(struct net_device *dev, struct hsr_port *port) return 0; fail_rx_handler: + netdev_upper_dev_unlink(dev, hsr_dev); +fail_upper_dev_link: dev_set_promiscuity(dev, -1); fail_promiscuity: dev_put(dev); @@ -147,7 +155,7 @@ int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, return -ENOMEM; if (type != HSR_PT_MASTER) { - res = hsr_portdev_setup(dev, port); + res = hsr_portdev_setup(hsr, dev, port, extack); if (res) goto fail_dev_setup; } @@ -180,21 +188,14 @@ void hsr_del_port(struct hsr_port *port) list_del_rcu(&port->port_list); if (port != master) { - if (master) { - netdev_update_features(master->dev); - dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); - } + netdev_update_features(master->dev); + dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); netdev_rx_handler_unregister(port->dev); dev_set_promiscuity(port->dev, -1); + netdev_upper_dev_unlink(port->dev, master->dev); } - /* FIXME? - * netdev_upper_dev_unlink(port->dev, port->hsr->dev); - */ - synchronize_rcu(); - if (port != master) - dev_put(port->dev); kfree(port); } -- cgit v1.2.3 From 70ae1e127b486704c62d3537d69ca65c446d4d83 Mon Sep 17 00:00:00 2001 From: Cris Forno Date: Fri, 28 Feb 2020 14:12:04 -0600 Subject: ethtool: Factored out similar ethtool link settings for virtual devices to core Three virtual devices (ibmveth, virtio_net, and netvsc) all have similar code to set link settings and validate ethtool command. To eliminate duplication of code, it is factored out into core/ethtool.c. Signed-off-by: Cris Forno Signed-off-by: David S. Miller --- include/linux/ethtool.h | 6 ++++++ net/ethtool/ioctl.c | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 95991e4300bf..23373978cb3c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -420,4 +420,10 @@ struct ethtool_rx_flow_rule * ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input); void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *rule); +bool ethtool_virtdev_validate_cmd(const struct ethtool_link_ksettings *cmd); +int ethtool_virtdev_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd, + u32 *dev_speed, u8 *dev_duplex); + + #endif /* _LINUX_ETHTOOL_H */ diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index b987052d91ef..f2fe8e5896dc 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -459,6 +459,24 @@ static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to, return 0; } +/* Check if the user is trying to change anything besides speed/duplex */ +bool ethtool_virtdev_validate_cmd(const struct ethtool_link_ksettings *cmd) +{ + struct ethtool_link_settings base2 = {}; + + base2.speed = cmd->base.speed; + base2.port = PORT_OTHER; + base2.duplex = cmd->base.duplex; + base2.cmd = cmd->base.cmd; + base2.link_mode_masks_nwords = cmd->base.link_mode_masks_nwords; + + return !memcmp(&base2, &cmd->base, sizeof(base2)) && + bitmap_empty(cmd->link_modes.supported, + __ETHTOOL_LINK_MODE_MASK_NBITS) && + bitmap_empty(cmd->link_modes.lp_advertising, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} + /* convert a kernel internal ethtool_link_ksettings to * ethtool_link_usettings in user space. return 0 on success, errno on * error. @@ -581,6 +599,27 @@ static int ethtool_set_link_ksettings(struct net_device *dev, return err; } +int ethtool_virtdev_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd, + u32 *dev_speed, u8 *dev_duplex) +{ + u32 speed; + u8 duplex; + + speed = cmd->base.speed; + duplex = cmd->base.duplex; + /* don't allow custom speed and duplex */ + if (!ethtool_validate_speed(speed) || + !ethtool_validate_duplex(duplex) || + !ethtool_virtdev_validate_cmd(cmd)) + return -EINVAL; + *dev_speed = speed; + *dev_duplex = duplex; + + return 0; +} +EXPORT_SYMBOL(ethtool_virtdev_set_link_ksettings); + /* Query device for its ethtool_cmd settings. * * Backward compatibility note: for compatibility with legacy ethtool, this is -- cgit v1.2.3 From d2f7e56d1e4042dc8a4b4b2d5e9e79f53a6f8e4b Mon Sep 17 00:00:00 2001 From: Cambda Zhu Date: Tue, 3 Mar 2020 14:54:34 +0800 Subject: ipv6: Use math to point per net sysctls into the appropriate struct net The data pointers of ipv6 sysctl are set one by one which is hard to maintain, especially with kconfig. This patch simplifies it by using math to point the per net sysctls into the appropriate struct net, just like what we did for ipv4. Signed-off-by: Cambda Zhu Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/sysctl_net_ipv6.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index ec8fcfc60a27..63b657aa8d29 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -203,29 +203,16 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) struct ctl_table *ipv6_table; struct ctl_table *ipv6_route_table; struct ctl_table *ipv6_icmp_table; - int err; + int err, i; err = -ENOMEM; ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template), GFP_KERNEL); if (!ipv6_table) goto out; - ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; - ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; - ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; - ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels; - ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect; - ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries; - ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay; - ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges; - ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind; - ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect; - ipv6_table[10].data = &net->ipv6.sysctl.max_dst_opts_cnt; - ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt; - ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; - ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; - ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy, - ipv6_table[15].data = &net->ipv6.sysctl.seg6_flowlabel; + /* Update the variables to point into the current struct net */ + for (i = 0; i < ARRAY_SIZE(ipv6_table_template) - 1; i++) + ipv6_table[i].data += (void *)net - (void *)&init_net; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) -- cgit v1.2.3 From c34b961a249211bdb08d03bdecfb31ff22eb002f Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 3 Mar 2020 15:07:49 +0200 Subject: net/sched: act_ct: Create nf flow table per zone Use the NF flow tables infrastructure for CT offload. Create a nf flow table per zone. Next patches will add FT entries to this table, and do the software offload. Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/tc_act/tc_ct.h | 2 + net/sched/Kconfig | 2 +- net/sched/act_ct.c | 134 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 136 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index a8b156402873..cf3492e2a6a4 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -25,6 +25,8 @@ struct tcf_ct_params { u16 ct_action; struct rcu_head rcu; + + struct tcf_ct_flow_table *ct_ft; }; struct tcf_ct { diff --git a/net/sched/Kconfig b/net/sched/Kconfig index edde0e519438..bfbefb7bff9d 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY config NET_ACT_CT tristate "connection tracking tc action" - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE help Say Y here to allow sending the packets to conntrack module. diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f685c0d73708..3321087cb93f 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,7 @@ #include #include +#include #include #include #include @@ -31,6 +33,108 @@ #include #include +static struct workqueue_struct *act_ct_wq; +static struct rhashtable zones_ht; +static DEFINE_SPINLOCK(zones_lock); + +struct tcf_ct_flow_table { + struct rhash_head node; /* In zones tables */ + + struct rcu_work rwork; + struct nf_flowtable nf_ft; + u16 zone; + u32 ref; + + bool dying; +}; + +static const struct rhashtable_params zones_params = { + .head_offset = offsetof(struct tcf_ct_flow_table, node), + .key_offset = offsetof(struct tcf_ct_flow_table, zone), + .key_len = sizeof_field(struct tcf_ct_flow_table, zone), + .automatic_shrinking = true, +}; + +static struct nf_flowtable_type flowtable_ct = { + .owner = THIS_MODULE, +}; + +static int tcf_ct_flow_table_get(struct tcf_ct_params *params) +{ + struct tcf_ct_flow_table *ct_ft; + int err = -ENOMEM; + + spin_lock_bh(&zones_lock); + ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); + if (ct_ft) + goto take_ref; + + ct_ft = kzalloc(sizeof(*ct_ft), GFP_ATOMIC); + if (!ct_ft) + goto err_alloc; + + ct_ft->zone = params->zone; + err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params); + if (err) + goto err_insert; + + ct_ft->nf_ft.type = &flowtable_ct; + err = nf_flow_table_init(&ct_ft->nf_ft); + if (err) + goto err_init; + + __module_get(THIS_MODULE); +take_ref: + params->ct_ft = ct_ft; + ct_ft->ref++; + spin_unlock_bh(&zones_lock); + + return 0; + +err_init: + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); +err_insert: + kfree(ct_ft); +err_alloc: + spin_unlock_bh(&zones_lock); + return err; +} + +static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) +{ + struct tcf_ct_flow_table *ct_ft; + + ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table, + rwork); + nf_flow_table_free(&ct_ft->nf_ft); + kfree(ct_ft); + + module_put(THIS_MODULE); +} + +static void tcf_ct_flow_table_put(struct tcf_ct_params *params) +{ + struct tcf_ct_flow_table *ct_ft = params->ct_ft; + + spin_lock_bh(&zones_lock); + if (--params->ct_ft->ref == 0) { + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); + INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work); + queue_rcu_work(act_ct_wq, &ct_ft->rwork); + } + spin_unlock_bh(&zones_lock); +} + +static int tcf_ct_flow_tables_init(void) +{ + return rhashtable_init(&zones_ht, &zones_params); +} + +static void tcf_ct_flow_tables_uninit(void) +{ + rhashtable_destroy(&zones_ht); +} + static struct tc_action_ops act_ct_ops; static unsigned int ct_net_id; @@ -207,6 +311,8 @@ static void tcf_ct_params_free(struct rcu_head *head) struct tcf_ct_params *params = container_of(head, struct tcf_ct_params, rcu); + tcf_ct_flow_table_put(params); + if (params->tmpl) nf_conntrack_put(¶ms->tmpl->ct_general); kfree(params); @@ -730,6 +836,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, if (err) goto cleanup; + err = tcf_ct_flow_table_get(params); + if (err) + goto cleanup; + spin_lock_bh(&c->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); params = rcu_replace_pointer(c->params, params, @@ -974,12 +1084,34 @@ static struct pernet_operations ct_net_ops = { static int __init ct_init_module(void) { - return tcf_register_action(&act_ct_ops, &ct_net_ops); + int err; + + act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0); + if (!act_ct_wq) + return -ENOMEM; + + err = tcf_ct_flow_tables_init(); + if (err) + goto err_tbl_init; + + err = tcf_register_action(&act_ct_ops, &ct_net_ops); + if (err) + goto err_register; + + return 0; + +err_tbl_init: + destroy_workqueue(act_ct_wq); +err_register: + tcf_ct_flow_tables_uninit(); + return err; } static void __exit ct_cleanup_module(void) { tcf_unregister_action(&act_ct_ops, &ct_net_ops); + tcf_ct_flow_tables_uninit(); + destroy_workqueue(act_ct_wq); } module_init(ct_init_module); -- cgit v1.2.3 From 64ff70b80fd403110b67dd9f7184a604fdb0da43 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 3 Mar 2020 15:07:50 +0200 Subject: net/sched: act_ct: Offload established connections to flow table Add a ft entry when connections enter an established state and delete the connections when they leave the established state. The flow table assumes ownership of the connection. In the following patch act_ct will lookup the ct state from the FT. In future patches, drivers will register for callbacks for ft add/del events and will be able to use the information to offload the connections. Note that connection aging is managed by the FT. Signed-off-by: Paul Blakey Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_ct.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'net') diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 3321087cb93f..2ab38431252f 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -125,6 +125,67 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params) spin_unlock_bh(&zones_lock); } +static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, + struct nf_conn *ct, + bool tcp) +{ + struct flow_offload *entry; + int err; + + if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) + return; + + entry = flow_offload_alloc(ct); + if (!entry) { + WARN_ON_ONCE(1); + goto err_alloc; + } + + if (tcp) { + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + } + + err = flow_offload_add(&ct_ft->nf_ft, entry); + if (err) + goto err_add; + + return; + +err_add: + flow_offload_free(entry); +err_alloc: + clear_bit(IPS_OFFLOAD_BIT, &ct->status); +} + +static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + bool tcp = false; + + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) + return; + + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + tcp = true; + if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) + return; + break; + case IPPROTO_UDP: + break; + default: + return; + } + + if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || + ct->status & IPS_SEQ_ADJUST) + return; + + tcf_ct_flow_table_add(ct_ft, ct, tcp); +} + static int tcf_ct_flow_tables_init(void) { return rhashtable_init(&zones_ht, &zones_params); @@ -578,6 +639,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, nf_conntrack_confirm(skb); } + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); + out_push: skb_push_rcsum(skb, nh_ofs); -- cgit v1.2.3 From 46475bb20f4ba019abf22b0db10bf55a4158852e Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 3 Mar 2020 15:07:51 +0200 Subject: net/sched: act_ct: Software offload of established flows Offload nf conntrack processing by looking up the 5-tuple in the zone's flow table. The nf conntrack module will process the packets until a connection is in established state. Once in established state, the ct state pointer (nf_conn) will be restored on the skb from a successful ft lookup. Signed-off-by: Paul Blakey Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_ct.c | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 158 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 2ab38431252f..a2d5582a701e 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -186,6 +186,155 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, tcf_ct_flow_table_add(ct_ft, ct, tcp); } +static bool +tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, + struct flow_offload_tuple *tuple) +{ + struct flow_ports *ports; + unsigned int thoff; + struct iphdr *iph; + + if (!pskb_may_pull(skb, sizeof(*iph))) + return false; + + iph = ip_hdr(skb); + thoff = iph->ihl * 4; + + if (ip_is_fragment(iph) || + unlikely(thoff != sizeof(struct iphdr))) + return false; + + if (iph->protocol != IPPROTO_TCP && + iph->protocol != IPPROTO_UDP) + return false; + + if (iph->ttl <= 1) + return false; + + if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + return false; + + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + + tuple->src_v4.s_addr = iph->saddr; + tuple->dst_v4.s_addr = iph->daddr; + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + tuple->l3proto = AF_INET; + tuple->l4proto = iph->protocol; + + return true; +} + +static bool +tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, + struct flow_offload_tuple *tuple) +{ + struct flow_ports *ports; + struct ipv6hdr *ip6h; + unsigned int thoff; + + if (!pskb_may_pull(skb, sizeof(*ip6h))) + return false; + + ip6h = ipv6_hdr(skb); + + if (ip6h->nexthdr != IPPROTO_TCP && + ip6h->nexthdr != IPPROTO_UDP) + return false; + + if (ip6h->hop_limit <= 1) + return false; + + thoff = sizeof(*ip6h); + if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + return false; + + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + + tuple->src_v6 = ip6h->saddr; + tuple->dst_v6 = ip6h->daddr; + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + tuple->l3proto = AF_INET6; + tuple->l4proto = ip6h->nexthdr; + + return true; +} + +static bool tcf_ct_flow_table_check_tcp(struct flow_offload *flow, + struct sk_buff *skb, + unsigned int thoff) +{ + struct tcphdr *tcph; + + if (!pskb_may_pull(skb, thoff + sizeof(*tcph))) + return false; + + tcph = (void *)(skb_network_header(skb) + thoff); + if (unlikely(tcph->fin || tcph->rst)) { + flow_offload_teardown(flow); + return false; + } + + return true; +} + +static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, + struct sk_buff *skb, + u8 family) +{ + struct nf_flowtable *nf_ft = &p->ct_ft->nf_ft; + struct flow_offload_tuple_rhash *tuplehash; + struct flow_offload_tuple tuple = {}; + enum ip_conntrack_info ctinfo; + struct flow_offload *flow; + struct nf_conn *ct; + unsigned int thoff; + int ip_proto; + u8 dir; + + /* Previously seen or loopback */ + ct = nf_ct_get(skb, &ctinfo); + if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) + return false; + + switch (family) { + case NFPROTO_IPV4: + if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple)) + return false; + break; + case NFPROTO_IPV6: + if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple)) + return false; + break; + default: + return false; + } + + tuplehash = flow_offload_lookup(nf_ft, &tuple); + if (!tuplehash) + return false; + + dir = tuplehash->tuple.dir; + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); + ct = flow->ct; + + ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED : + IP_CT_ESTABLISHED_REPLY; + + thoff = ip_hdr(skb)->ihl * 4; + ip_proto = ip_hdr(skb)->protocol; + if (ip_proto == IPPROTO_TCP && + !tcf_ct_flow_table_check_tcp(flow, skb, thoff)) + return false; + + nf_conntrack_get(&ct->ct_general); + nf_ct_set(skb, ct, ctinfo); + + return true; +} + static int tcf_ct_flow_tables_init(void) { return rhashtable_init(&zones_ht, &zones_params); @@ -554,6 +703,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, struct nf_hook_state state; int nh_ofs, err, retval; struct tcf_ct_params *p; + bool skip_add = false; struct nf_conn *ct; u8 family; @@ -603,6 +753,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, */ cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); if (!cached) { + if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) { + skip_add = true; + goto do_nat; + } + /* Associate skb with specified zone. */ if (tmpl) { ct = nf_ct_get(skb, &ctinfo); @@ -620,6 +775,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, goto out_push; } +do_nat: ct = nf_ct_get(skb, &ctinfo); if (!ct) goto out_push; @@ -637,10 +793,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, * even if the connection is already confirmed. */ nf_conntrack_confirm(skb); + } else if (!skip_add) { + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); } - tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); - out_push: skb_push_rcsum(skb, nh_ofs); -- cgit v1.2.3 From acf1ee44ca5da39755d2aa9080392eae46a0eb34 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 3 Mar 2020 08:12:42 -0600 Subject: devlink: Introduce devlink port flavour virtual Currently mlx5 PCI PF and VF devlink devices register their ports as physical port in non-representors mode. Introduce a new port flavour as virtual so that virtual devices can register 'virtual' flavour to make it more clear to users. An example of one PCI PF and 2 PCI virtual functions, each having one devlink port. $ devlink port show pci/0000:06:00.0/1: type eth netdev ens2f0 flavour physical port 0 pci/0000:06:00.2/1: type eth netdev ens2f2 flavour virtual port 0 pci/0000:06:00.3/1: type eth netdev ens2f3 flavour virtual port 0 Reviewed-by: Jiri Pirko Signed-off-by: Parav Pandit Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + net/core/devlink.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'net') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index be2a2948f452..dfdffc42e87d 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -187,6 +187,7 @@ enum devlink_port_flavour { * for the PCI VF. It is an internal * port that faces the PCI VF. */ + DEVLINK_PORT_FLAVOUR_VIRTUAL, /* Any virtual port facing the user. */ }; enum devlink_param_cmode { diff --git a/net/core/devlink.c b/net/core/devlink.c index 295d761cbfb1..e8ccea9035c8 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -545,6 +545,7 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, case DEVLINK_PORT_FLAVOUR_PHYSICAL: case DEVLINK_PORT_FLAVOUR_CPU: case DEVLINK_PORT_FLAVOUR_DSA: + case DEVLINK_PORT_FLAVOUR_VIRTUAL: if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, attrs->phys.port_number)) return -EMSGSIZE; @@ -6806,6 +6807,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, switch (attrs->flavour) { case DEVLINK_PORT_FLAVOUR_PHYSICAL: + case DEVLINK_PORT_FLAVOUR_VIRTUAL: if (!attrs->split) n = snprintf(name, len, "p%u", attrs->phys.port_number); else -- cgit v1.2.3 From cf62089b0edd7e74a1f474844b4d9f7b5697fb5c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 3 Mar 2020 15:05:01 -0500 Subject: bpf: Add gso_size to __sk_buff BPF programs may want to know whether an skb is gso. The canonical answer is skb_is_gso(skb), which tests that gso_size != 0. Expose this field in the same manner as gso_segs. That field itself is not a sufficient signal, as the comment in skb_shared_info makes clear: gso_segs may be zero, e.g., from dodgy sources. Also prepare net/bpf/test_run for upcoming BPF_PROG_TEST_RUN tests of the feature. Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200303200503.226217-2-willemdebruijn.kernel@gmail.com --- include/uapi/linux/bpf.h | 1 + net/bpf/test_run.c | 7 +++++++ net/core/filter.c | 44 ++++++++++++++++++++++++++++++-------------- 3 files changed, 38 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8e98ced0963b..180337fae97e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3176,6 +3176,7 @@ struct __sk_buff { __u32 wire_len; __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); + __u32 gso_size; }; struct bpf_tunnel_key { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 562443f94133..1cd7a1c2f8b2 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -277,6 +277,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) /* gso_segs is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs), + offsetof(struct __sk_buff, gso_size))) + return -EINVAL; + + /* gso_size is allowed */ + + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size), sizeof(struct __sk_buff))) return -EINVAL; @@ -297,6 +303,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) if (__skb->gso_segs > GSO_MAX_SEGS) return -EINVAL; skb_shinfo(skb)->gso_segs = __skb->gso_segs; + skb_shinfo(skb)->gso_size = __skb->gso_size; return 0; } diff --git a/net/core/filter.c b/net/core/filter.c index 4a08c9fb2be7..cd0a532db4e7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7139,6 +7139,27 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si, + struct bpf_insn *insn) +{ + /* si->dst_reg = skb_shinfo(SKB); */ +#ifdef NET_SKBUFF_DATA_USES_OFFSET + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + BPF_REG_AX, si->src_reg, + offsetof(struct sk_buff, end)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, head)); + *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); +#else + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, end)); +#endif + + return insn; +} + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -7461,26 +7482,21 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct __sk_buff, gso_segs): - /* si->dst_reg = skb_shinfo(SKB); */ -#ifdef NET_SKBUFF_DATA_USES_OFFSET - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), - BPF_REG_AX, si->src_reg, - offsetof(struct sk_buff, end)); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), - si->dst_reg, si->src_reg, - offsetof(struct sk_buff, head)); - *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); -#else - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), - si->dst_reg, si->src_reg, - offsetof(struct sk_buff, end)); -#endif + insn = bpf_convert_shinfo_access(si, insn); *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs), si->dst_reg, si->dst_reg, bpf_target_off(struct skb_shared_info, gso_segs, 2, target_size)); break; + case offsetof(struct __sk_buff, gso_size): + insn = bpf_convert_shinfo_access(si, insn); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size), + si->dst_reg, si->dst_reg, + bpf_target_off(struct skb_shared_info, + gso_size, 2, + target_size)); + break; case offsetof(struct __sk_buff, wire_len): BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4); -- cgit v1.2.3 From 1954b86016cf8522970e1b7aba801233d777ec47 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Fri, 28 Feb 2020 15:47:39 -0800 Subject: mptcp: Check connection state before attempting send MPTCP should wait for an active connection or skip sending depending on the connection state, as TCP does. This happens before the possible passthrough to a regular TCP sendmsg because the subflow's socket type (MPTCP or TCP fallback) is not known until the connection is complete. This is also relevent at disconnect time, where data should not be sent in certain MPTCP-level connection states. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a8445407d25a..07559b45eec5 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -419,6 +419,15 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EOPNOTSUPP; lock_sock(sk); + + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { + ret = sk_stream_wait_connect(sk, &timeo); + if (ret) + goto out; + } + ssock = __mptcp_tcp_fallback(msk); if (unlikely(ssock)) { fallback: @@ -427,8 +436,6 @@ fallback: return ret >= 0 ? ret + copied : (copied ? copied : ret); } - timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); - ssk = mptcp_subflow_get(msk); if (!ssk) { release_sock(sk); @@ -460,6 +467,7 @@ fallback: ssk_check_wmem(msk, ssk); release_sock(ssk); +out: release_sock(sk); return ret; } -- cgit v1.2.3 From 76c42a29c0eb15ea3fdc9867a0a52e53fb4fbd76 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Fri, 28 Feb 2020 15:47:40 -0800 Subject: mptcp: Use per-subflow storage for DATA_FIN sequence number Instead of reading the MPTCP-level sequence number when sending DATA_FIN, store the data in the subflow so it can be safely accessed when the subflow TCP headers are written to the packet without the MPTCP-level lock held. This also allows the MPTCP-level socket to close individual subflows without closing the MPTCP connection. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 5 ++--- net/mptcp/protocol.c | 20 +++++++++++++++++--- net/mptcp/protocol.h | 2 ++ 3 files changed, 21 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 45acd877bef3..90c81953ec2c 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -312,7 +312,7 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, */ ext->use_map = 1; ext->dsn64 = 1; - ext->data_seq = mptcp_sk(subflow->conn)->write_seq; + ext->data_seq = subflow->data_fin_tx_seq; ext->subflow_seq = 0; ext->data_len = 1; } else { @@ -354,8 +354,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, if (mpext) opts->ext_copy = *mpext; - if (skb && tcp_fin && - subflow->conn->sk_state != TCP_ESTABLISHED) + if (skb && tcp_fin && subflow->data_fin_tx_enable) mptcp_write_data_fin(subflow, &opts->ext_copy); ret = true; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 07559b45eec5..4c075a9f7ed0 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -720,7 +720,8 @@ static void mptcp_cancel_work(struct sock *sk) sock_put(sk); } -static void mptcp_subflow_shutdown(struct sock *ssk, int how) +static void mptcp_subflow_shutdown(struct sock *ssk, int how, + bool data_fin_tx_enable, u64 data_fin_tx_seq) { lock_sock(ssk); @@ -733,6 +734,14 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how) tcp_disconnect(ssk, O_NONBLOCK); break; default: + if (data_fin_tx_enable) { + struct mptcp_subflow_context *subflow; + + subflow = mptcp_subflow_ctx(ssk); + subflow->data_fin_tx_seq = data_fin_tx_seq; + subflow->data_fin_tx_enable = 1; + } + ssk->sk_shutdown |= how; tcp_shutdown(ssk, how); break; @@ -749,6 +758,7 @@ static void mptcp_close(struct sock *sk, long timeout) struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); LIST_HEAD(conn_list); + u64 data_fin_tx_seq; lock_sock(sk); @@ -757,11 +767,15 @@ static void mptcp_close(struct sock *sk, long timeout) list_splice_init(&msk->conn_list, &conn_list); + data_fin_tx_seq = msk->write_seq; + release_sock(sk); list_for_each_entry_safe(subflow, tmp, &conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + subflow->data_fin_tx_seq = data_fin_tx_seq; + subflow->data_fin_tx_enable = 1; __mptcp_close_ssk(sk, ssk, subflow, timeout); } @@ -854,7 +868,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, *err = -ENOBUFS; local_bh_enable(); release_sock(sk); - mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1); + mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1, 0, 0); tcp_close(newsk, 0); return NULL; } @@ -1309,7 +1323,7 @@ static int mptcp_shutdown(struct socket *sock, int how) mptcp_for_each_subflow(msk, subflow) { struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); - mptcp_subflow_shutdown(tcp_sk, how); + mptcp_subflow_shutdown(tcp_sk, how, 1, msk->write_seq); } out_unlock: diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 6c0b2c8ab674..313558fa8185 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -125,7 +125,9 @@ struct mptcp_subflow_context { mpc_map : 1, data_avail : 1, rx_eof : 1, + data_fin_tx_enable : 1, can_ack : 1; /* only after processing the remote a key */ + u64 data_fin_tx_seq; struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ -- cgit v1.2.3 From 6d37a0b857c34e63764b127728b58444ac9e3f25 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Fri, 28 Feb 2020 15:47:41 -0800 Subject: mptcp: Only send DATA_FIN with final mapping When a DATA_FIN is sent in a MPTCP DSS option that contains a data mapping, the DATA_FIN consumes one byte of space in the mapping. In this case, the DATA_FIN should only be included in the DSS option if its sequence number aligns with the end of the mapped data. Otherwise the subflow can send an incorrect implicit sequence number for the DATA_FIN, and the DATA_ACK for that sequence number would not close the MPTCP-level connection correctly. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 90c81953ec2c..b9a8305bd934 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -304,21 +304,22 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, struct mptcp_ext *ext) { - ext->data_fin = 1; - if (!ext->use_map) { /* RFC6824 requires a DSS mapping with specific values * if DATA_FIN is set but no data payload is mapped */ + ext->data_fin = 1; ext->use_map = 1; ext->dsn64 = 1; ext->data_seq = subflow->data_fin_tx_seq; ext->subflow_seq = 0; ext->data_len = 1; - } else { - /* If there's an existing DSS mapping, DATA_FIN consumes - * 1 additional byte of mapping space. + } else if (ext->data_seq + ext->data_len == subflow->data_fin_tx_seq) { + /* If there's an existing DSS mapping and it is the + * final mapping, DATA_FIN consumes 1 additional byte of + * mapping space. */ + ext->data_fin = 1; ext->data_len++; } } -- cgit v1.2.3 From a1dc1d6a05a730b62b45828975a088db577d3139 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sun, 1 Mar 2020 23:03:04 -0800 Subject: net: qrtr: Respond to HELLO message Lost in the translation from the user space implementation was the detail that HELLO mesages must be exchanged between each node pair. As such the incoming HELLO must be replied to. Similar to the previous implementation no effort is made to prevent two Linux boxes from continuously sending HELLO messages back and forth, this is left to a follow up patch. say_hello() is moved, to facilitate the new call site. Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Reviewed-by: Manivannan Sadhasivam Tested-by: Manivannan Sadhasivam Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/ns.c | 54 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 413228c4520e..61a58a35cc91 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -286,9 +286,38 @@ static int server_del(struct qrtr_node *node, unsigned int port) return 0; } +static int say_hello(struct sockaddr_qrtr *dest) +{ + struct qrtr_ctrl_pkt pkt; + struct msghdr msg = { }; + struct kvec iv; + int ret; + + iv.iov_base = &pkt; + iv.iov_len = sizeof(pkt); + + memset(&pkt, 0, sizeof(pkt)); + pkt.cmd = cpu_to_le32(QRTR_TYPE_HELLO); + + msg.msg_name = (struct sockaddr *)dest; + msg.msg_namelen = sizeof(*dest); + + ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); + if (ret < 0) + pr_err("failed to send hello msg\n"); + + return ret; +} + /* Announce the list of servers registered on the local node */ static int ctrl_cmd_hello(struct sockaddr_qrtr *sq) { + int ret; + + ret = say_hello(sq); + if (ret < 0) + return ret; + return announce_servers(sq); } @@ -566,29 +595,6 @@ static void ctrl_cmd_del_lookup(struct sockaddr_qrtr *from, } } -static int say_hello(void) -{ - struct qrtr_ctrl_pkt pkt; - struct msghdr msg = { }; - struct kvec iv; - int ret; - - iv.iov_base = &pkt; - iv.iov_len = sizeof(pkt); - - memset(&pkt, 0, sizeof(pkt)); - pkt.cmd = cpu_to_le32(QRTR_TYPE_HELLO); - - msg.msg_name = (struct sockaddr *)&qrtr_ns.bcast_sq; - msg.msg_namelen = sizeof(qrtr_ns.bcast_sq); - - ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); - if (ret < 0) - pr_err("failed to send hello msg\n"); - - return ret; -} - static void qrtr_ns_worker(struct work_struct *work) { const struct qrtr_ctrl_pkt *pkt; @@ -725,7 +731,7 @@ void qrtr_ns_init(struct work_struct *work) if (!qrtr_ns.workqueue) goto err_sock; - ret = say_hello(); + ret = say_hello(&qrtr_ns.bcast_sq); if (ret < 0) goto err_wq; -- cgit v1.2.3 From 71046abfffe9d34ae90c82cf9c8e44355c2e114c Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sun, 1 Mar 2020 23:03:05 -0800 Subject: net: qrtr: Fix FIXME related to qrtr_ns_init() The 2 second delay before calling qrtr_ns_init() meant that the remote processors would register as endpoints in qrtr and the say_hello() call would therefor broadcast the outgoing HELLO to them. With the HELLO handshake corrected this delay is no longer needed. Reviewed-by: Manivannan Sadhasivam Tested-by: Manivannan Sadhasivam Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/ns.c | 2 +- net/qrtr/qrtr.c | 10 +--------- net/qrtr/qrtr.h | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 61a58a35cc91..e7d0fe3f4330 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -693,7 +693,7 @@ static void qrtr_ns_data_ready(struct sock *sk) queue_work(qrtr_ns.workqueue, &qrtr_ns.work); } -void qrtr_ns_init(struct work_struct *work) +void qrtr_ns_init(void) { struct sockaddr_qrtr sq; int ret; diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 423310896285..e22092e4a783 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -9,7 +9,6 @@ #include /* For TIOCINQ/OUTQ */ #include #include -#include #include @@ -110,8 +109,6 @@ static DEFINE_MUTEX(qrtr_node_lock); static DEFINE_IDR(qrtr_ports); static DEFINE_MUTEX(qrtr_port_lock); -static struct delayed_work qrtr_ns_work; - /** * struct qrtr_node - endpoint node * @ep_lock: lock for endpoint management and callbacks @@ -1263,11 +1260,7 @@ static int __init qrtr_proto_init(void) return rc; } - /* FIXME: Currently, this 2s delay is required to catch the NEW_SERVER - * messages from routers. But the fix could be somewhere else. - */ - INIT_DELAYED_WORK(&qrtr_ns_work, qrtr_ns_init); - schedule_delayed_work(&qrtr_ns_work, msecs_to_jiffies(2000)); + qrtr_ns_init(); return rc; } @@ -1275,7 +1268,6 @@ postcore_initcall(qrtr_proto_init); static void __exit qrtr_proto_fini(void) { - cancel_delayed_work_sync(&qrtr_ns_work); qrtr_ns_remove(); sock_unregister(qrtr_family.family); proto_unregister(&qrtr_proto); diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h index 53a237a28971..dc2b67f17927 100644 --- a/net/qrtr/qrtr.h +++ b/net/qrtr/qrtr.h @@ -29,7 +29,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep); int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len); -void qrtr_ns_init(struct work_struct *work); +void qrtr_ns_init(void); void qrtr_ns_remove(void); -- cgit v1.2.3 From ed11bb1f9657e95e8b79a9a49211986bde96005a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 29 Feb 2020 16:31:13 +0200 Subject: net: dsa: Add bypass operations for the flower classifier-action filter Due to the immense variety of classification keys and actions available for tc-flower, as well as due to potentially very different DSA switch capabilities, it doesn't make a lot of sense for the DSA mid layer to even attempt to interpret these. So just pass them on to the underlying switch driver. DSA implements just the standard boilerplate for binding and unbinding flow blocks to ports, since nobody wants to deal with that. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 6 ++++++ net/dsa/slave.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 7d3d84f0ef42..beeb81a532e3 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -540,6 +540,12 @@ struct dsa_switch_ops { /* * TC integration */ + int (*cls_flower_add)(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress); + int (*cls_flower_del)(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress); + int (*cls_flower_stats)(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress); int (*port_mirror_add)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, bool ingress); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 088c886e609e..79d9b4384d7b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -946,6 +946,64 @@ static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev, } } +static int dsa_slave_add_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->cls_flower_add) + return -EOPNOTSUPP; + + return ds->ops->cls_flower_add(ds, port, cls, ingress); +} + +static int dsa_slave_del_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->cls_flower_del) + return -EOPNOTSUPP; + + return ds->ops->cls_flower_del(ds, port, cls, ingress); +} + +static int dsa_slave_stats_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->cls_flower_stats) + return -EOPNOTSUPP; + + return ds->ops->cls_flower_stats(ds, port, cls, ingress); +} + +static int dsa_slave_setup_tc_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) +{ + switch (cls->command) { + case FLOW_CLS_REPLACE: + return dsa_slave_add_cls_flower(dev, cls, ingress); + case FLOW_CLS_DESTROY: + return dsa_slave_del_cls_flower(dev, cls, ingress); + case FLOW_CLS_STATS: + return dsa_slave_stats_cls_flower(dev, cls, ingress); + default: + return -EOPNOTSUPP; + } +} + static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv, bool ingress) { @@ -957,6 +1015,8 @@ static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSMATCHALL: return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress); + case TC_SETUP_CLSFLOWER: + return dsa_slave_setup_tc_cls_flower(dev, type_data, ingress); default: return -EOPNOTSUPP; } -- cgit v1.2.3 From 08bb4da90150e2a225f35e0f642cdc463958d696 Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Tue, 3 Mar 2020 15:55:34 +0000 Subject: Bluetooth: guard against controllers sending zero'd events Some controllers have been observed to send zero'd events under some conditions. This change guards against this condition as well as adding a trace to facilitate diagnosability of this condition. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 591e7477e925..a40ed31f6eb8 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5868,6 +5868,11 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) u8 status = 0, event = hdr->evt, req_evt = 0; u16 opcode = HCI_OP_NOP; + if (!event) { + bt_dev_warn(hdev, "Received unexpected HCI Event 00000000"); + goto done; + } + if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->hci.req_event == event) { struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data; opcode = __le16_to_cpu(cmd_hdr->opcode); @@ -6079,6 +6084,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) req_complete_skb(hdev, status, opcode, orig_skb); } +done: kfree_skb(orig_skb); kfree_skb(skb); hdev->stat.evt_rx++; -- cgit v1.2.3 From 07ac9d16b4a5d1cf303215ad7e9829824246be55 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 4 Mar 2020 13:49:38 +0200 Subject: net/sched: act_ct: Fix ipv6 lookup of offloaded connections When checking the protocol number tcf_ct_flow_table_lookup() handles the flow as if it's always ipv4, while it can be ipv6. Instead, refactor the code to fetch the tcp header, if available, in the relevant family (ipv4/ipv6) filler function, and do the check on the returned tcp header. Fixes: 46475bb20f4b ("net/sched: act_ct: Software offload of established flows") Signed-off-by: Paul Blakey Acked-by: Marcelo Ricardo Leitner Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_ct.c | 60 +++++++++++++++++++++++------------------------------- 1 file changed, 26 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index a2d5582a701e..f434db750328 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -188,7 +188,8 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, static bool tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, - struct flow_offload_tuple *tuple) + struct flow_offload_tuple *tuple, + struct tcphdr **tcph) { struct flow_ports *ports; unsigned int thoff; @@ -211,11 +212,16 @@ tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, if (iph->ttl <= 1) return false; - if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + if (!pskb_may_pull(skb, iph->protocol == IPPROTO_TCP ? + thoff + sizeof(struct tcphdr) : + thoff + sizeof(*ports))) return false; - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + iph = ip_hdr(skb); + if (iph->protocol == IPPROTO_TCP) + *tcph = (void *)(skb_network_header(skb) + thoff); + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v4.s_addr = iph->saddr; tuple->dst_v4.s_addr = iph->daddr; tuple->src_port = ports->source; @@ -228,7 +234,8 @@ tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, static bool tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, - struct flow_offload_tuple *tuple) + struct flow_offload_tuple *tuple, + struct tcphdr **tcph) { struct flow_ports *ports; struct ipv6hdr *ip6h; @@ -247,11 +254,16 @@ tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, return false; thoff = sizeof(*ip6h); - if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + if (!pskb_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ? + thoff + sizeof(struct tcphdr) : + thoff + sizeof(*ports))) return false; - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + ip6h = ipv6_hdr(skb); + if (ip6h->nexthdr == IPPROTO_TCP) + *tcph = (void *)(skb_network_header(skb) + thoff); + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v6 = ip6h->saddr; tuple->dst_v6 = ip6h->daddr; tuple->src_port = ports->source; @@ -262,24 +274,6 @@ tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, return true; } -static bool tcf_ct_flow_table_check_tcp(struct flow_offload *flow, - struct sk_buff *skb, - unsigned int thoff) -{ - struct tcphdr *tcph; - - if (!pskb_may_pull(skb, thoff + sizeof(*tcph))) - return false; - - tcph = (void *)(skb_network_header(skb) + thoff); - if (unlikely(tcph->fin || tcph->rst)) { - flow_offload_teardown(flow); - return false; - } - - return true; -} - static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, struct sk_buff *skb, u8 family) @@ -288,10 +282,9 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, struct flow_offload_tuple_rhash *tuplehash; struct flow_offload_tuple tuple = {}; enum ip_conntrack_info ctinfo; + struct tcphdr *tcph = NULL; struct flow_offload *flow; struct nf_conn *ct; - unsigned int thoff; - int ip_proto; u8 dir; /* Previously seen or loopback */ @@ -301,11 +294,11 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, switch (family) { case NFPROTO_IPV4: - if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple)) + if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph)) return false; break; case NFPROTO_IPV6: - if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple)) + if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple, &tcph)) return false; break; default: @@ -320,15 +313,14 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); ct = flow->ct; + if (tcph && (unlikely(tcph->fin || tcph->rst))) { + flow_offload_teardown(flow); + return false; + } + ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED : IP_CT_ESTABLISHED_REPLY; - thoff = ip_hdr(skb)->ihl * 4; - ip_proto = ip_hdr(skb)->protocol; - if (ip_proto == IPPROTO_TCP && - !tcf_ct_flow_table_check_tcp(flow, skb, thoff)) - return false; - nf_conntrack_get(&ct->ct_general); nf_ct_set(skb, ct, ctinfo); -- cgit v1.2.3 From 4cc5fdec6dfe92f50ef2c6a565dfce694161c769 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 4 Mar 2020 13:49:39 +0200 Subject: net/sched: act_ct: Use pskb_network_may_pull() To make the filler functions more generic, use network relative skb pulling. Signed-off-by: Paul Blakey Acked-by: Marcelo Ricardo Leitner Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_ct.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f434db750328..23eba61f0f81 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -195,7 +195,7 @@ tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, unsigned int thoff; struct iphdr *iph; - if (!pskb_may_pull(skb, sizeof(*iph))) + if (!pskb_network_may_pull(skb, sizeof(*iph))) return false; iph = ip_hdr(skb); @@ -212,9 +212,9 @@ tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, if (iph->ttl <= 1) return false; - if (!pskb_may_pull(skb, iph->protocol == IPPROTO_TCP ? - thoff + sizeof(struct tcphdr) : - thoff + sizeof(*ports))) + if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ? + thoff + sizeof(struct tcphdr) : + thoff + sizeof(*ports))) return false; iph = ip_hdr(skb); @@ -241,7 +241,7 @@ tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, struct ipv6hdr *ip6h; unsigned int thoff; - if (!pskb_may_pull(skb, sizeof(*ip6h))) + if (!pskb_network_may_pull(skb, sizeof(*ip6h))) return false; ip6h = ipv6_hdr(skb); @@ -254,9 +254,9 @@ tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, return false; thoff = sizeof(*ip6h); - if (!pskb_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ? - thoff + sizeof(struct tcphdr) : - thoff + sizeof(*ports))) + if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ? + thoff + sizeof(struct tcphdr) : + thoff + sizeof(*ports))) return false; ip6h = ipv6_hdr(skb); -- cgit v1.2.3 From 90baeb9dd2656dd9fa0a66f338f22236be96e69f Mon Sep 17 00:00:00 2001 From: Leslie Monis Date: Thu, 5 Mar 2020 00:25:59 +0530 Subject: pie: use term backlog instead of qlen Remove ambiguity by using the term backlog instead of qlen when representing the queue length in bytes. Signed-off-by: Mohit P. Tahiliani Signed-off-by: Gautam Ramakrishnan Signed-off-by: Leslie Monis Signed-off-by: David S. Miller --- include/net/pie.h | 10 +++++----- net/sched/sch_pie.c | 22 +++++++++++----------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/pie.h b/include/net/pie.h index fd5a37cb7993..24f68c1e9919 100644 --- a/include/net/pie.h +++ b/include/net/pie.h @@ -46,7 +46,7 @@ struct pie_params { * @accu_prob: accumulated drop probability * @dq_count: number of bytes dequeued in a measurement cycle * @avg_dq_rate: calculated average dq rate - * @qlen_old: queue length during previous qdelay calculation + * @backlog_old: queue backlog during previous qdelay calculation * @accu_prob_overflows: number of times accu_prob overflows */ struct pie_vars { @@ -58,7 +58,7 @@ struct pie_vars { u64 accu_prob; u64 dq_count; u32 avg_dq_rate; - u32 qlen_old; + u32 backlog_old; u8 accu_prob_overflows; }; @@ -127,12 +127,12 @@ static inline void pie_set_enqueue_time(struct sk_buff *skb) } bool pie_drop_early(struct Qdisc *sch, struct pie_params *params, - struct pie_vars *vars, u32 qlen, u32 packet_size); + struct pie_vars *vars, u32 backlog, u32 packet_size); void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params, - struct pie_vars *vars, u32 qlen); + struct pie_vars *vars, u32 backlog); void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, - u32 qlen); + u32 backlog); #endif diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 915bcdb59a9f..8a2f9f11c86f 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -31,7 +31,7 @@ struct pie_sched_data { }; bool pie_drop_early(struct Qdisc *sch, struct pie_params *params, - struct pie_vars *vars, u32 qlen, u32 packet_size) + struct pie_vars *vars, u32 backlog, u32 packet_size) { u64 rnd; u64 local_prob = vars->prob; @@ -51,7 +51,7 @@ bool pie_drop_early(struct Qdisc *sch, struct pie_params *params, /* If we have fewer than 2 mtu-sized packets, disable pie_drop_early, * similar to min_th in RED */ - if (qlen < 2 * mtu) + if (backlog < 2 * mtu) return false; /* If bytemode is turned on, use packet size to compute new @@ -215,7 +215,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, } void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params, - struct pie_vars *vars, u32 qlen) + struct pie_vars *vars, u32 backlog) { psched_time_t now = psched_get_time(); u32 dtime = 0; @@ -231,7 +231,7 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params, vars->dq_tstamp = now; - if (qlen == 0) + if (backlog == 0) vars->qdelay = 0; if (dtime == 0) @@ -244,7 +244,7 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params, * we have enough packets to calculate the drain rate. Save * current time as dq_tstamp and start measurement cycle. */ - if (qlen >= QUEUE_THRESHOLD && vars->dq_count == DQCOUNT_INVALID) { + if (backlog >= QUEUE_THRESHOLD && vars->dq_count == DQCOUNT_INVALID) { vars->dq_tstamp = psched_get_time(); vars->dq_count = 0; } @@ -283,7 +283,7 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params, * dq_count to 0 to re-enter the if block when the next * packet is dequeued */ - if (qlen < QUEUE_THRESHOLD) { + if (backlog < QUEUE_THRESHOLD) { vars->dq_count = DQCOUNT_INVALID; } else { vars->dq_count = 0; @@ -307,7 +307,7 @@ burst_allowance_reduction: EXPORT_SYMBOL_GPL(pie_process_dequeue); void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, - u32 qlen) + u32 backlog) { psched_time_t qdelay = 0; /* in pschedtime */ psched_time_t qdelay_old = 0; /* in pschedtime */ @@ -322,7 +322,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, vars->qdelay_old = vars->qdelay; if (vars->avg_dq_rate > 0) - qdelay = (qlen << PIE_SCALE) / vars->avg_dq_rate; + qdelay = (backlog << PIE_SCALE) / vars->avg_dq_rate; else qdelay = 0; } else { @@ -330,10 +330,10 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, qdelay_old = vars->qdelay_old; } - /* If qdelay is zero and qlen is not, it means qlen is very small, + /* If qdelay is zero and backlog is not, it means backlog is very small, * so we do not update probabilty in this round. */ - if (qdelay == 0 && qlen != 0) + if (qdelay == 0 && backlog != 0) update_prob = false; /* In the algorithm, alpha and beta are between 0 and 2 with typical @@ -409,7 +409,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, vars->prob -= vars->prob / 64; vars->qdelay = qdelay; - vars->qlen_old = qlen; + vars->backlog_old = backlog; /* We restart the measurement cycle if the following conditions are met * 1. If the delay has been low for 2 consecutive Tupdate periods -- cgit v1.2.3 From 220d4ac74ed691033b6fe2bd98dc07d6bdece046 Mon Sep 17 00:00:00 2001 From: Leslie Monis Date: Thu, 5 Mar 2020 00:26:00 +0530 Subject: pie: remove unnecessary type casting In function pie_calculate_probability(), the variables alpha and beta are of type u64. The variables qdelay, qdelay_old and params->target are of type psched_time_t (which is also u64). The explicit type casting done when calculating the value for the variable delta is redundant and not required. Signed-off-by: Mohit P. Tahiliani Signed-off-by: Gautam Ramakrishnan Signed-off-by: Leslie Monis Signed-off-by: David S. Miller --- net/sched/sch_pie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 8a2f9f11c86f..198cfa34a00a 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -363,8 +363,8 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, } /* alpha and beta should be between 0 and 32, in multiples of 1/16 */ - delta += alpha * (u64)(qdelay - params->target); - delta += beta * (u64)(qdelay - qdelay_old); + delta += alpha * (qdelay - params->target); + delta += beta * (qdelay - qdelay_old); oldprob = vars->prob; -- cgit v1.2.3 From 105e808c1da2a2827a4a374ae6e3003249729eec Mon Sep 17 00:00:00 2001 From: Leslie Monis Date: Thu, 5 Mar 2020 00:26:01 +0530 Subject: pie: remove pie_vars->accu_prob_overflows The variable pie_vars->accu_prob is used as an accumulator for probability values. Since probabilty values are scaled using the MAX_PROB macro denoting (2^64 - 1), pie_vars->accu_prob is likely to overflow as it is of type u64. The variable pie_vars->accu_prob_overflows counts the number of times the variable pie_vars->accu_prob overflows. The MAX_PROB macro needs to be equal to at least (2^39 - 1) in order to do precise calculations without any underflow. Thus MAX_PROB can be reduced to (2^56 - 1) without affecting the precision in calculations drastically. Doing so will eliminate the need for the variable pie_vars->accu_prob_overflows as the variable pie_vars->accu_prob will never overflow. Removing the variable pie_vars->accu_prob_overflows also reduces the size of the structure pie_vars to exactly 64 bytes. Signed-off-by: Mohit P. Tahiliani Signed-off-by: Gautam Ramakrishnan Signed-off-by: Leslie Monis Signed-off-by: David S. Miller --- include/net/pie.h | 5 +---- net/sched/sch_fq_pie.c | 1 - net/sched/sch_pie.c | 21 ++++++--------------- 3 files changed, 7 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/net/pie.h b/include/net/pie.h index 24f68c1e9919..1c645b76a2ed 100644 --- a/include/net/pie.h +++ b/include/net/pie.h @@ -8,7 +8,7 @@ #include #include -#define MAX_PROB U64_MAX +#define MAX_PROB (U64_MAX >> BITS_PER_BYTE) #define DTIME_INVALID U64_MAX #define QUEUE_THRESHOLD 16384 #define DQCOUNT_INVALID -1 @@ -47,7 +47,6 @@ struct pie_params { * @dq_count: number of bytes dequeued in a measurement cycle * @avg_dq_rate: calculated average dq rate * @backlog_old: queue backlog during previous qdelay calculation - * @accu_prob_overflows: number of times accu_prob overflows */ struct pie_vars { psched_time_t qdelay; @@ -59,7 +58,6 @@ struct pie_vars { u64 dq_count; u32 avg_dq_rate; u32 backlog_old; - u8 accu_prob_overflows; }; /** @@ -107,7 +105,6 @@ static inline void pie_vars_init(struct pie_vars *vars) vars->accu_prob = 0; vars->dq_count = DQCOUNT_INVALID; vars->avg_dq_rate = 0; - vars->accu_prob_overflows = 0; } static inline struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb) diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 214657eb3dfd..a9da8776bf5b 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -189,7 +189,6 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, out: q->stats.dropped++; sel_flow->vars.accu_prob = 0; - sel_flow->vars.accu_prob_overflows = 0; __qdisc_drop(skb, to_free); qdisc_qstats_drop(sch); return NET_XMIT_CN; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 198cfa34a00a..f52442d39bf5 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -62,27 +62,19 @@ bool pie_drop_early(struct Qdisc *sch, struct pie_params *params, else local_prob = vars->prob; - if (local_prob == 0) { + if (local_prob == 0) vars->accu_prob = 0; - vars->accu_prob_overflows = 0; - } - - if (local_prob > MAX_PROB - vars->accu_prob) - vars->accu_prob_overflows++; - - vars->accu_prob += local_prob; + else + vars->accu_prob += local_prob; - if (vars->accu_prob_overflows == 0 && - vars->accu_prob < (MAX_PROB / 100) * 85) + if (vars->accu_prob < (MAX_PROB / 100) * 85) return false; - if (vars->accu_prob_overflows == 8 && - vars->accu_prob >= MAX_PROB / 2) + if (vars->accu_prob >= (MAX_PROB / 2) * 17) return true; prandom_bytes(&rnd, 8); - if (rnd < local_prob) { + if ((rnd >> BITS_PER_BYTE) < local_prob) { vars->accu_prob = 0; - vars->accu_prob_overflows = 0; return true; } @@ -129,7 +121,6 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, out: q->stats.dropped++; q->vars.accu_prob = 0; - q->vars.accu_prob_overflows = 0; return qdisc_drop(skb, sch, to_free); } -- cgit v1.2.3 From da00d2f117a08fbca262db5ea422c80a568b112b Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:52 +0100 Subject: bpf: Add test ops for BPF_PROG_TYPE_TRACING The current fexit and fentry tests rely on a different program to exercise the functions they attach to. Instead of doing this, implement the test operations for tracing which will also be used for BPF_MODIFY_RETURN in a subsequent patch. Also, clean up the fexit test to use the generated skeleton. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-7-kpsingh@chromium.org --- include/linux/bpf.h | 10 ++++ kernel/trace/bpf_trace.c | 1 + net/bpf/test_run.c | 37 +++++++++--- .../selftests/bpf/prog_tests/fentry_fexit.c | 12 +--- .../testing/selftests/bpf/prog_tests/fentry_test.c | 14 ++--- .../testing/selftests/bpf/prog_tests/fexit_test.c | 69 +++++++--------------- 6 files changed, 67 insertions(+), 76 deletions(-) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f748b31e5888..40c53924571d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1156,6 +1156,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -1313,6 +1316,13 @@ static inline int bpf_prog_test_run_skb(struct bpf_prog *prog, return -ENOTSUPP; } +static inline int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 07764c761073..363e0a2c75cf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1266,6 +1266,7 @@ const struct bpf_verifier_ops tracing_verifier_ops = { }; const struct bpf_prog_ops tracing_prog_ops = { + .test_run = bpf_prog_test_run_tracing, }; static bool raw_tp_writable_prog_is_valid_access(int off, int size, diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 1cd7a1c2f8b2..3600f098e7c6 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -160,18 +160,37 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, kfree(data); return ERR_PTR(-EFAULT); } - if (bpf_fentry_test1(1) != 2 || - bpf_fentry_test2(2, 3) != 5 || - bpf_fentry_test3(4, 5, 6) != 15 || - bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || - bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || - bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) { - kfree(data); - return ERR_PTR(-EFAULT); - } + return data; } +int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + int err = -EFAULT; + + switch (prog->expected_attach_type) { + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + if (bpf_fentry_test1(1) != 2 || + bpf_fentry_test2(2, 3) != 5 || + bpf_fentry_test3(4, 5, 6) != 15 || + bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || + bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) + goto out; + break; + default: + goto out; + } + + err = 0; +out: + trace_bpf_test_finish(&err); + return err; +} + static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) { void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 235ac4f67f5b..83493bd5745c 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -1,22 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include -#include "test_pkt_access.skel.h" #include "fentry_test.skel.h" #include "fexit_test.skel.h" void test_fentry_fexit(void) { - struct test_pkt_access *pkt_skel = NULL; struct fentry_test *fentry_skel = NULL; struct fexit_test *fexit_skel = NULL; __u64 *fentry_res, *fexit_res; __u32 duration = 0, retval; - int err, pkt_fd, i; + int err, prog_fd, i; - pkt_skel = test_pkt_access__open_and_load(); - if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) - return; fentry_skel = fentry_test__open_and_load(); if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto close_prog; @@ -31,8 +26,8 @@ void test_fentry_fexit(void) if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto close_prog; - pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + prog_fd = bpf_program__fd(fexit_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", @@ -49,7 +44,6 @@ void test_fentry_fexit(void) } close_prog: - test_pkt_access__destroy(pkt_skel); fentry_test__destroy(fentry_skel); fexit_test__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 5cc06021f27d..04ebbf1cb390 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -1,20 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include -#include "test_pkt_access.skel.h" #include "fentry_test.skel.h" void test_fentry_test(void) { - struct test_pkt_access *pkt_skel = NULL; struct fentry_test *fentry_skel = NULL; - int err, pkt_fd, i; + int err, prog_fd, i; __u32 duration = 0, retval; __u64 *result; - pkt_skel = test_pkt_access__open_and_load(); - if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) - return; fentry_skel = fentry_test__open_and_load(); if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto cleanup; @@ -23,10 +18,10 @@ void test_fentry_test(void) if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) goto cleanup; - pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + prog_fd = bpf_program__fd(fentry_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", + CHECK(err || retval, "test_run", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); @@ -39,5 +34,4 @@ void test_fentry_test(void) cleanup: fentry_test__destroy(fentry_skel); - test_pkt_access__destroy(pkt_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index d2c3655dd7a3..78d7a2765c27 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -1,64 +1,37 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include +#include "fexit_test.skel.h" void test_fexit_test(void) { - struct bpf_prog_load_attr attr = { - .file = "./fexit_test.o", - }; - - char prog_name[] = "fexit/bpf_fentry_testX"; - struct bpf_object *obj = NULL, *pkt_obj; - int err, pkt_fd, kfree_skb_fd, i; - struct bpf_link *link[6] = {}; - struct bpf_program *prog[6]; + struct fexit_test *fexit_skel = NULL; + int err, prog_fd, i; __u32 duration = 0, retval; - struct bpf_map *data_map; - const int zero = 0; - u64 result[6]; + __u64 *result; - err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, - &pkt_obj, &pkt_fd); - if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) - return; - err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd); - if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) - goto close_prog; + fexit_skel = fexit_test__open_and_load(); + if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) + goto cleanup; - for (i = 0; i < 6; i++) { - prog_name[sizeof(prog_name) - 2] = '1' + i; - prog[i] = bpf_object__find_program_by_title(obj, prog_name); - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name)) - goto close_prog; - link[i] = bpf_program__attach_trace(prog[i]); - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) - goto close_prog; - } - data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss"); - if (CHECK(!data_map, "find_data_map", "data map not found\n")) - goto close_prog; + err = fexit_test__attach(fexit_skel); + if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) + goto cleanup; - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + prog_fd = bpf_program__fd(fexit_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", + CHECK(err || retval, "test_run", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); - err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); - if (CHECK(err, "get_result", - "failed to get output data: %d\n", err)) - goto close_prog; - - for (i = 0; i < 6; i++) - if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", - i + 1, result[i])) - goto close_prog; + result = (__u64 *)fexit_skel->bss; + for (i = 0; i < 6; i++) { + if (CHECK(result[i] != 1, "result", + "fexit_test%d failed err %lld\n", i + 1, result[i])) + goto cleanup; + } -close_prog: - for (i = 0; i < 6; i++) - if (!IS_ERR_OR_NULL(link[i])) - bpf_link__destroy(link[i]); - bpf_object__close(obj); - bpf_object__close(pkt_obj); +cleanup: + fexit_test__destroy(fexit_skel); } -- cgit v1.2.3 From 3d08b6f29cf33aeaf301553d8d3805f0aa609df7 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:53 +0100 Subject: bpf: Add selftests for BPF_MODIFY_RETURN Test for two scenarios: * When the fmod_ret program returns 0, the original function should be called along with fentry and fexit programs. * When the fmod_ret program returns a non-zero value, the original function should not be called, no side effect should be observed and fentry and fexit programs should be called. The result from the kernel function call and whether a side-effect is observed is returned via the retval attr of the BPF_PROG_TEST_RUN (bpf) syscall. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-8-kpsingh@chromium.org --- net/bpf/test_run.c | 22 +++++++- .../selftests/bpf/prog_tests/modify_return.c | 65 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/modify_return.c | 49 ++++++++++++++++ 3 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/modify_return.c create mode 100644 tools/testing/selftests/bpf/progs/modify_return.c (limited to 'net') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 3600f098e7c6..4c921f5154e0 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -10,6 +10,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -143,6 +144,14 @@ int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) return a + (long)b + c + d + (long)e + f; } +int noinline bpf_modify_return_test(int a, int *b) +{ + *b += 1; + return a + *b; +} + +ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); + static void *bpf_test_init(const union bpf_attr *kattr, u32 size, u32 headroom, u32 tailroom) { @@ -168,7 +177,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - int err = -EFAULT; + u16 side_effect = 0, ret = 0; + int b = 2, err = -EFAULT; + u32 retval = 0; switch (prog->expected_attach_type) { case BPF_TRACE_FENTRY: @@ -181,10 +192,19 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) goto out; break; + case BPF_MODIFY_RETURN: + ret = bpf_modify_return_test(1, &b); + if (b != 2) + side_effect = 1; + break; default: goto out; } + retval = ((u32)side_effect << 16) | ret; + if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) + goto out; + err = 0; out: trace_bpf_test_finish(&err); diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c new file mode 100644 index 000000000000..97fec70c600b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include +#include "modify_return.skel.h" + +#define LOWER(x) ((x) & 0xffff) +#define UPPER(x) ((x) >> 16) + + +static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret) +{ + struct modify_return *skel = NULL; + int err, prog_fd; + __u32 duration = 0, retval; + __u16 side_effect; + __s16 ret; + + skel = modify_return__open_and_load(); + if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n")) + goto cleanup; + + err = modify_return__attach(skel); + if (CHECK(err, "modify_return", "attach failed: %d\n", err)) + goto cleanup; + + skel->bss->input_retval = input_retval; + prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0, + &retval, &duration); + + CHECK(err, "test_run", "err %d errno %d\n", err, errno); + + side_effect = UPPER(retval); + ret = LOWER(retval); + + CHECK(ret != want_ret, "test_run", + "unexpected ret: %d, expected: %d\n", ret, want_ret); + CHECK(side_effect != want_side_effect, "modify_return", + "unexpected side_effect: %d\n", side_effect); + + CHECK(skel->bss->fentry_result != 1, "modify_return", + "fentry failed\n"); + CHECK(skel->bss->fexit_result != 1, "modify_return", + "fexit failed\n"); + CHECK(skel->bss->fmod_ret_result != 1, "modify_return", + "fmod_ret failed\n"); + +cleanup: + modify_return__destroy(skel); +} + +void test_modify_return(void) +{ + run_test(0 /* input_retval */, + 1 /* want_side_effect */, + 4 /* want_ret */); + run_test(-EINVAL /* input_retval */, + 0 /* want_side_effect */, + -EINVAL /* want_ret */); +} + diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c new file mode 100644 index 000000000000..8b7466a15c6b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/modify_return.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +static int sequence = 0; +__s32 input_retval = 0; + +__u64 fentry_result = 0; +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(fentry_test, int a, __u64 b) +{ + sequence++; + fentry_result = (sequence == 1); + return 0; +} + +__u64 fmod_ret_result = 0; +SEC("fmod_ret/bpf_modify_return_test") +int BPF_PROG(fmod_ret_test, int a, int *b, int ret) +{ + sequence++; + /* This is the first fmod_ret program, the ret passed should be 0 */ + fmod_ret_result = (sequence == 2 && ret == 0); + return input_retval; +} + +__u64 fexit_result = 0; +SEC("fexit/bpf_modify_return_test") +int BPF_PROG(fexit_test, int a, __u64 b, int ret) +{ + sequence++; + /* If the input_reval is non-zero a successful modification should have + * occurred. + */ + if (input_retval) + fexit_result = (sequence == 3 && ret == input_retval); + else + fexit_result = (sequence == 3 && ret == 4); + + return 0; +} -- cgit v1.2.3 From 69df578c5f4b1f9d9b649e5fb06b5d337c25d27f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 29 Feb 2020 16:50:02 +0200 Subject: net: mscc: ocelot: eliminate confusion between CPU and NPI port Ocelot has the concept of a CPU port. The CPU port is represented in the forwarding and the queueing system, but it is not a physical device. The CPU port can either be accessed via register-based injection/extraction (which is the case of Ocelot), via Frame-DMA (similar to the first one), or "connected" to a physical Ethernet port (called NPI in the datasheet) which is the case of the Felix DSA switch. In Ocelot the CPU port is at index 11. In Felix the CPU port is at index 6. The CPU bit is treated special in the forwarding, as it is never cleared from the forwarding port mask (once added to it). Other than that, it is treated the same as a normal front port. Both Felix and Ocelot should use the CPU port in the same way. This means that Felix should not use the NPI port directly when forwarding to the CPU, but instead use the CPU port. This patch is fixing this such that Felix will use port 6 as its CPU port, and just use the NPI port to carry the traffic. Therefore, eliminate the "ocelot->cpu" variable which was holding the index of the NPI port for Felix, and the index of the CPU port module for Ocelot, so the variable was actually configuring different things for different drivers and causing at least part of the confusion. Also remove the "ocelot->num_cpu_ports" variable, which is the result of another confusion. The 2 CPU ports mentioned in the datasheet are because there are two frame extraction channels (register based or DMA based). This is of no relevance to the driver at the moment, and invisible to the analyzer module. Signed-off-by: Vladimir Oltean Suggested-by: Allan W. Nielsen Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 7 ++-- drivers/net/ethernet/mscc/ocelot.c | 62 ++++++++++++++++++-------------- drivers/net/ethernet/mscc/ocelot_board.c | 7 ++-- include/soc/mscc/ocelot.h | 12 ++++--- net/dsa/tag_ocelot.c | 3 +- 5 files changed, 52 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index c0acd7dc7f48..3e87ac6c07e6 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -516,10 +516,11 @@ static int felix_setup(struct dsa_switch *ds) for (port = 0; port < ds->num_ports; port++) { ocelot_init_port(ocelot, port); + /* Bring up the CPU port module and configure the NPI port */ if (dsa_is_cpu_port(ds, port)) - ocelot_set_cpu_port(ocelot, port, - OCELOT_TAG_PREFIX_NONE, - OCELOT_TAG_PREFIX_LONG); + ocelot_configure_cpu(ocelot, port, + OCELOT_TAG_PREFIX_NONE, + OCELOT_TAG_PREFIX_LONG); } /* It looks like the MAC/PCS interrupt register - PM0_IEVENT (0x8040) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ac4cf34d3af5..06f9d013f807 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1413,7 +1413,7 @@ void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state) * a source for the other ports. */ for (p = 0; p < ocelot->num_phys_ports; p++) { - if (p == ocelot->cpu || (ocelot->bridge_fwd_mask & BIT(p))) { + if (ocelot->bridge_fwd_mask & BIT(p)) { unsigned long mask = ocelot->bridge_fwd_mask & ~BIT(p); for (i = 0; i < ocelot->num_phys_ports; i++) { @@ -1428,18 +1428,10 @@ void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state) } } - /* Avoid the NPI port from looping back to itself */ - if (p != ocelot->cpu) - mask |= BIT(ocelot->cpu); - ocelot_write_rix(ocelot, mask, ANA_PGID_PGID, PGID_SRC + p); } else { - /* Only the CPU port, this is compatible with link - * aggregation. - */ - ocelot_write_rix(ocelot, - BIT(ocelot->cpu), + ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_SRC + p); } } @@ -2308,27 +2300,34 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port, } EXPORT_SYMBOL(ocelot_probe_port); -void ocelot_set_cpu_port(struct ocelot *ocelot, int cpu, - enum ocelot_tag_prefix injection, - enum ocelot_tag_prefix extraction) +/* Configure and enable the CPU port module, which is a set of queues. + * If @npi contains a valid port index, the CPU port module is connected + * to the Node Processor Interface (NPI). This is the mode through which + * frames can be injected from and extracted to an external CPU, + * over Ethernet. + */ +void ocelot_configure_cpu(struct ocelot *ocelot, int npi, + enum ocelot_tag_prefix injection, + enum ocelot_tag_prefix extraction) { - /* Configure and enable the CPU port. */ + int cpu = ocelot->num_phys_ports; + + /* The unicast destination PGID for the CPU port module is unused */ ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, cpu); + /* Instead set up a multicast destination PGID for traffic copied to + * the CPU. Whitelisted MAC addresses like the port netdevice MAC + * addresses will be copied to the CPU via this PGID. + */ ocelot_write_rix(ocelot, BIT(cpu), ANA_PGID_PGID, PGID_CPU); ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_RECV_ENA | ANA_PORT_PORT_CFG_PORTID_VAL(cpu), ANA_PORT_PORT_CFG, cpu); - /* If the CPU port is a physical port, set up the port in Node - * Processor Interface (NPI) mode. This is the mode through which - * frames can be injected from and extracted to an external CPU. - * Only one port can be an NPI at the same time. - */ - if (cpu < ocelot->num_phys_ports) { + if (npi >= 0 && npi < ocelot->num_phys_ports) { int mtu = VLAN_ETH_FRAME_LEN + OCELOT_TAG_LEN; ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M | - QSYS_EXT_CPU_CFG_EXT_CPU_PORT(cpu), + QSYS_EXT_CPU_CFG_EXT_CPU_PORT(npi), QSYS_EXT_CPU_CFG); if (injection == OCELOT_TAG_PREFIX_SHORT) @@ -2336,14 +2335,27 @@ void ocelot_set_cpu_port(struct ocelot *ocelot, int cpu, else if (injection == OCELOT_TAG_PREFIX_LONG) mtu += OCELOT_LONG_PREFIX_LEN; - ocelot_port_set_mtu(ocelot, cpu, mtu); + ocelot_port_set_mtu(ocelot, npi, mtu); + + /* Enable NPI port */ + ocelot_write_rix(ocelot, + QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE | + QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG(1) | + QSYS_SWITCH_PORT_MODE_PORT_ENA, + QSYS_SWITCH_PORT_MODE, npi); + /* NPI port Injection/Extraction configuration */ + ocelot_write_rix(ocelot, + SYS_PORT_MODE_INCL_XTR_HDR(extraction) | + SYS_PORT_MODE_INCL_INJ_HDR(injection), + SYS_PORT_MODE, npi); } - /* CPU port Injection/Extraction configuration */ + /* Enable CPU port module */ ocelot_write_rix(ocelot, QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE | QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG(1) | QSYS_SWITCH_PORT_MODE_PORT_ENA, QSYS_SWITCH_PORT_MODE, cpu); + /* CPU port Injection/Extraction configuration */ ocelot_write_rix(ocelot, SYS_PORT_MODE_INCL_XTR_HDR(extraction) | SYS_PORT_MODE_INCL_INJ_HDR(injection), SYS_PORT_MODE, cpu); @@ -2353,10 +2365,8 @@ void ocelot_set_cpu_port(struct ocelot *ocelot, int cpu, ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA | ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1), ANA_PORT_VLAN_CFG, cpu); - - ocelot->cpu = cpu; } -EXPORT_SYMBOL(ocelot_set_cpu_port); +EXPORT_SYMBOL(ocelot_configure_cpu); int ocelot_init(struct ocelot *ocelot) { diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c index c343ca5276ef..0ac9fbf77a01 100644 --- a/drivers/net/ethernet/mscc/ocelot_board.c +++ b/drivers/net/ethernet/mscc/ocelot_board.c @@ -453,8 +453,6 @@ static int mscc_ocelot_probe(struct platform_device *pdev) ocelot->ptp = 1; } - ocelot->num_cpu_ports = 1; /* 1 port on the switch, two groups */ - ports = of_get_child_by_name(np, "ethernet-ports"); if (!ports) { dev_err(&pdev->dev, "no ethernet-ports child node found\n"); @@ -471,8 +469,9 @@ static int mscc_ocelot_probe(struct platform_device *pdev) ocelot->vcap = vsc7514_vcap_props; ocelot_init(ocelot); - ocelot_set_cpu_port(ocelot, ocelot->num_phys_ports, - OCELOT_TAG_PREFIX_NONE, OCELOT_TAG_PREFIX_NONE); + /* No NPI port */ + ocelot_configure_cpu(ocelot, -1, OCELOT_TAG_PREFIX_NONE, + OCELOT_TAG_PREFIX_NONE); for_each_available_child_of_node(ports, portnp) { struct ocelot_port_private *priv; diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 5b037f976245..23dd4ad31a32 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -451,9 +451,11 @@ struct ocelot { /* Keep track of the vlan port masks */ u32 vlan_mask[VLAN_N_VID]; + /* In tables like ANA:PORT and the ANA:PGID:PGID mask, + * the CPU is located after the physical ports (at the + * num_phys_ports index). + */ u8 num_phys_ports; - u8 num_cpu_ports; - u8 cpu; u32 *lags; @@ -508,9 +510,9 @@ void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg, int ocelot_regfields_init(struct ocelot *ocelot, const struct reg_field *const regfields); struct regmap *ocelot_regmap_init(struct ocelot *ocelot, struct resource *res); -void ocelot_set_cpu_port(struct ocelot *ocelot, int cpu, - enum ocelot_tag_prefix injection, - enum ocelot_tag_prefix extraction); +void ocelot_configure_cpu(struct ocelot *ocelot, int npi, + enum ocelot_tag_prefix injection, + enum ocelot_tag_prefix extraction); int ocelot_init(struct ocelot *ocelot); void ocelot_deinit(struct ocelot *ocelot); void ocelot_init_port(struct ocelot *ocelot, int port); diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 8e3e7283d430..59de1315100f 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -153,7 +153,8 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, memset(injection, 0, OCELOT_TAG_LEN); - src = dsa_upstream_port(ds, port); + /* Set the source port as the CPU port module and not the NPI port */ + src = ocelot->num_phys_ports; dest = BIT(port); bypass = true; qos_class = skb->priority; -- cgit v1.2.3 From 56dc0a0eac999b77b1d37fe83a3c0409a6066319 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 5 Mar 2020 00:02:54 +0000 Subject: hsr: fix refcnt leak of hsr slave interface In the commit e0a4b99773d3 ("hsr: use upper/lower device infrastructure"), dev_get() was removed but dev_put() in the error path wasn't removed. So, if creating hsr interface command is failed, the reference counter leak of lower interface would occur. Test commands: ip link add dummy0 type dummy ip link add ipvlan0 link dummy0 type ipvlan mode l2 ip link add ipvlan1 link dummy0 type ipvlan mode l2 ip link add hsr0 type hsr slave1 ipvlan0 slave2 ipvlan1 ip link del ipvlan0 Result: [ 633.271992][ T1280] unregister_netdevice: waiting for ipvlan0 to become free. Usage count = -1 Fixes: e0a4b99773d3 ("hsr: use upper/lower device infrastructure") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_slave.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 123605cb5420..d3547e8c6d5b 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -108,7 +108,7 @@ static int hsr_portdev_setup(struct hsr_priv *hsr, struct net_device *dev, res = dev_set_promiscuity(dev, 1); if (res) - goto fail_promiscuity; + return res; master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); hsr_dev = master->dev; @@ -128,9 +128,6 @@ fail_rx_handler: netdev_upper_dev_unlink(dev, hsr_dev); fail_upper_dev_link: dev_set_promiscuity(dev, -1); -fail_promiscuity: - dev_put(dev); - return res; } -- cgit v1.2.3 From 95cddcb5cc202d3f2499596b9af5b77536c5f86a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 4 Mar 2020 21:15:31 -0800 Subject: ethtool: add infrastructure for centralized checking of coalescing parameters Linux supports 22 different interrupt coalescing parameters. No driver implements them all. Some drivers just ignore the ones they don't support, while others have to carry a long list of checks to reject unsupported settings. To simplify the drivers add the ability to specify inside ethtool_ops which parameters are supported and let the core reject attempts to set any other one. This commit makes the mechanism an opt-in, only drivers which set ethtool_opts->coalesce_types to a non-zero value will have the checks enforced. The same mask is used for global and per queue settings. v3: - move the (temporary) check if driver defines types earlier (Michal) - rename used_types -> nonzero_params, and coalesce_types -> supported_coalesce_params (Alex) - use EOPNOTSUPP instead of EINVAL (Andrew, Michal) Leaving the long series of ifs for now, it seems nice to be able to grep for the field and flag names. This will probably have to be revisited once netlink support lands. Signed-off-by: Jakub Kicinski Reviewed-by: Jacob Keller Reviewed-by: Michal Kubecek Reviewed-by: Andrew Lunn Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/ethtool.h | 45 ++++++++++++++++++++++++++++++--- net/ethtool/ioctl.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 23373978cb3c..e464c946bca4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -177,8 +177,44 @@ void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, const unsigned long *src); +#define ETHTOOL_COALESCE_RX_USECS BIT(0) +#define ETHTOOL_COALESCE_RX_MAX_FRAMES BIT(1) +#define ETHTOOL_COALESCE_RX_USECS_IRQ BIT(2) +#define ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ BIT(3) +#define ETHTOOL_COALESCE_TX_USECS BIT(4) +#define ETHTOOL_COALESCE_TX_MAX_FRAMES BIT(5) +#define ETHTOOL_COALESCE_TX_USECS_IRQ BIT(6) +#define ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ BIT(7) +#define ETHTOOL_COALESCE_STATS_BLOCK_USECS BIT(8) +#define ETHTOOL_COALESCE_USE_ADAPTIVE_RX BIT(9) +#define ETHTOOL_COALESCE_USE_ADAPTIVE_TX BIT(10) +#define ETHTOOL_COALESCE_PKT_RATE_LOW BIT(11) +#define ETHTOOL_COALESCE_RX_USECS_LOW BIT(12) +#define ETHTOOL_COALESCE_RX_MAX_FRAMES_LOW BIT(13) +#define ETHTOOL_COALESCE_TX_USECS_LOW BIT(14) +#define ETHTOOL_COALESCE_TX_MAX_FRAMES_LOW BIT(15) +#define ETHTOOL_COALESCE_PKT_RATE_HIGH BIT(16) +#define ETHTOOL_COALESCE_RX_USECS_HIGH BIT(17) +#define ETHTOOL_COALESCE_RX_MAX_FRAMES_HIGH BIT(18) +#define ETHTOOL_COALESCE_TX_USECS_HIGH BIT(19) +#define ETHTOOL_COALESCE_TX_MAX_FRAMES_HIGH BIT(20) +#define ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL BIT(21) + +#define ETHTOOL_COALESCE_USECS \ + (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS) +#define ETHTOOL_COALESCE_MAX_FRAMES \ + (ETHTOOL_COALESCE_RX_MAX_FRAMES | ETHTOOL_COALESCE_TX_MAX_FRAMES) +#define ETHTOOL_COALESCE_USECS_IRQ \ + (ETHTOOL_COALESCE_RX_USECS_IRQ | ETHTOOL_COALESCE_TX_USECS_IRQ) +#define ETHTOOL_COALESCE_MAX_FRAMES_IRQ \ + (ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ | \ + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ) +#define ETHTOOL_COALESCE_USE_ADAPTIVE \ + (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | ETHTOOL_COALESCE_USE_ADAPTIVE_TX) + /** * struct ethtool_ops - optional netdev operations + * @supported_coalesce_params: supported types of interrupt coalescing. * @get_drvinfo: Report driver/device information. Should only set the * @driver, @version, @fw_version and @bus_info fields. If not * implemented, the @driver and @bus_info fields will be filled in @@ -207,8 +243,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * or zero. * @get_coalesce: Get interrupt coalescing parameters. Returns a negative * error code or zero. - * @set_coalesce: Set interrupt coalescing parameters. Returns a negative - * error code or zero. + * @set_coalesce: Set interrupt coalescing parameters. Supported coalescing + * types should be set in @supported_coalesce_params. + * Returns a negative error code or zero. * @get_ringparam: Report ring sizes * @set_ringparam: Set ring sizes. Returns a negative error code or zero. * @get_pauseparam: Report pause parameters @@ -292,7 +329,8 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * @set_per_queue_coalesce: Set interrupt coalescing parameters per queue. * It must check that the given queue number is valid. If neither a RX nor * a TX queue has this number, return -EINVAL. If only a RX queue or a TX - * queue has this number, ignore the inapplicable fields. + * queue has this number, ignore the inapplicable fields. Supported + * coalescing types should be set in @supported_coalesce_params. * Returns a negative error code or zero. * @get_link_ksettings: Get various device settings including Ethernet link * settings. The %cmd and %link_mode_masks_nwords fields should be @@ -323,6 +361,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * of the generic netdev features interface. */ struct ethtool_ops { + u32 supported_coalesce_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index f2fe8e5896dc..b2684ffa26de 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1544,6 +1544,64 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, return 0; } +static bool +ethtool_set_coalesce_supported(struct net_device *dev, + struct ethtool_coalesce *coalesce) +{ + u32 supported_params = dev->ethtool_ops->supported_coalesce_params; + u32 nonzero_params = 0; + + if (!supported_params) + return true; + + if (coalesce->rx_coalesce_usecs) + nonzero_params |= ETHTOOL_COALESCE_RX_USECS; + if (coalesce->rx_max_coalesced_frames) + nonzero_params |= ETHTOOL_COALESCE_RX_MAX_FRAMES; + if (coalesce->rx_coalesce_usecs_irq) + nonzero_params |= ETHTOOL_COALESCE_RX_USECS_IRQ; + if (coalesce->rx_max_coalesced_frames_irq) + nonzero_params |= ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ; + if (coalesce->tx_coalesce_usecs) + nonzero_params |= ETHTOOL_COALESCE_TX_USECS; + if (coalesce->tx_max_coalesced_frames) + nonzero_params |= ETHTOOL_COALESCE_TX_MAX_FRAMES; + if (coalesce->tx_coalesce_usecs_irq) + nonzero_params |= ETHTOOL_COALESCE_TX_USECS_IRQ; + if (coalesce->tx_max_coalesced_frames_irq) + nonzero_params |= ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ; + if (coalesce->stats_block_coalesce_usecs) + nonzero_params |= ETHTOOL_COALESCE_STATS_BLOCK_USECS; + if (coalesce->use_adaptive_rx_coalesce) + nonzero_params |= ETHTOOL_COALESCE_USE_ADAPTIVE_RX; + if (coalesce->use_adaptive_tx_coalesce) + nonzero_params |= ETHTOOL_COALESCE_USE_ADAPTIVE_TX; + if (coalesce->pkt_rate_low) + nonzero_params |= ETHTOOL_COALESCE_PKT_RATE_LOW; + if (coalesce->rx_coalesce_usecs_low) + nonzero_params |= ETHTOOL_COALESCE_RX_USECS_LOW; + if (coalesce->rx_max_coalesced_frames_low) + nonzero_params |= ETHTOOL_COALESCE_RX_MAX_FRAMES_LOW; + if (coalesce->tx_coalesce_usecs_low) + nonzero_params |= ETHTOOL_COALESCE_TX_USECS_LOW; + if (coalesce->tx_max_coalesced_frames_low) + nonzero_params |= ETHTOOL_COALESCE_TX_MAX_FRAMES_LOW; + if (coalesce->pkt_rate_high) + nonzero_params |= ETHTOOL_COALESCE_PKT_RATE_HIGH; + if (coalesce->rx_coalesce_usecs_high) + nonzero_params |= ETHTOOL_COALESCE_RX_USECS_HIGH; + if (coalesce->rx_max_coalesced_frames_high) + nonzero_params |= ETHTOOL_COALESCE_RX_MAX_FRAMES_HIGH; + if (coalesce->tx_coalesce_usecs_high) + nonzero_params |= ETHTOOL_COALESCE_TX_USECS_HIGH; + if (coalesce->tx_max_coalesced_frames_high) + nonzero_params |= ETHTOOL_COALESCE_TX_MAX_FRAMES_HIGH; + if (coalesce->rate_sample_interval) + nonzero_params |= ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL; + + return (supported_params & nonzero_params) == nonzero_params; +} + static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) { @@ -1555,6 +1613,9 @@ static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) return -EFAULT; + if (!ethtool_set_coalesce_supported(dev, &coalesce)) + return -EOPNOTSUPP; + return dev->ethtool_ops->set_coalesce(dev, &coalesce); } @@ -2336,6 +2397,11 @@ ethtool_set_per_queue_coalesce(struct net_device *dev, goto roll_back; } + if (!ethtool_set_coalesce_supported(dev, &coalesce)) { + ret = -EOPNOTSUPP; + goto roll_back; + } + ret = dev->ethtool_ops->set_per_queue_coalesce(dev, bit, &coalesce); if (ret != 0) goto roll_back; -- cgit v1.2.3 From aaca9408078914380fbfd8aef3c38a34b515a654 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Mar 2020 09:16:40 +0200 Subject: net: sched: Make FIFO Qdisc offloadable Invoke ndo_setup_tc() as appropriate to signal init / replacement, destroying and dumping of pFIFO / bFIFO Qdisc. A lot of the FIFO logic is used for pFIFO_head_drop as well, but that's a semantically very different Qdisc that isn't really in the same boat as pFIFO / bFIFO. Split some of the functions to keep the Qdisc intact. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/pkt_cls.h | 15 ++++++++ net/sched/sch_fifo.c | 97 ++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 107 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b6fedd54cd8e..654808bfad83 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -853,6 +853,7 @@ enum tc_setup_type { TC_SETUP_FT, TC_SETUP_QDISC_ETS, TC_SETUP_QDISC_TBF, + TC_SETUP_QDISC_FIFO, }; /* These structures hold the attributes of bpf state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 53946b509b51..341a66af8d59 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -881,4 +881,19 @@ struct tc_tbf_qopt_offload { }; }; +enum tc_fifo_command { + TC_FIFO_REPLACE, + TC_FIFO_DESTROY, + TC_FIFO_STATS, +}; + +struct tc_fifo_qopt_offload { + enum tc_fifo_command command; + u32 handle; + u32 parent; + union { + struct tc_qopt_offload_stats stats; + }; +}; + #endif diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 37c8aa75d70c..a579a4131d22 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -12,6 +12,7 @@ #include #include #include +#include /* 1 band FIFO pseudo-"scheduler" */ @@ -51,8 +52,49 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_CN; } -static int fifo_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) +static void fifo_offload_init(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_fifo_qopt_offload qopt; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return; + + qopt.command = TC_FIFO_REPLACE; + qopt.handle = sch->handle; + qopt.parent = sch->parent; + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_FIFO, &qopt); +} + +static void fifo_offload_destroy(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_fifo_qopt_offload qopt; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return; + + qopt.command = TC_FIFO_DESTROY; + qopt.handle = sch->handle; + qopt.parent = sch->parent; + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_FIFO, &qopt); +} + +static int fifo_offload_dump(struct Qdisc *sch) +{ + struct tc_fifo_qopt_offload qopt; + + qopt.command = TC_FIFO_STATS; + qopt.handle = sch->handle; + qopt.parent = sch->parent; + qopt.stats.bstats = &sch->bstats; + qopt.stats.qstats = &sch->qstats; + + return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_FIFO, &qopt); +} + +static int __fifo_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { bool bypass; bool is_bfifo = sch->ops == &bfifo_qdisc_ops; @@ -82,10 +124,35 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt, sch->flags |= TCQ_F_CAN_BYPASS; else sch->flags &= ~TCQ_F_CAN_BYPASS; + return 0; } -static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) +static int fifo_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + int err; + + err = __fifo_init(sch, opt, extack); + if (err) + return err; + + fifo_offload_init(sch); + return 0; +} + +static int fifo_hd_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + return __fifo_init(sch, opt, extack); +} + +static void fifo_destroy(struct Qdisc *sch) +{ + fifo_offload_destroy(sch); +} + +static int __fifo_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tc_fifo_qopt opt = { .limit = sch->limit }; @@ -97,6 +164,22 @@ nla_put_failure: return -1; } +static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + int err; + + err = fifo_offload_dump(sch); + if (err) + return err; + + return __fifo_dump(sch, skb); +} + +static int fifo_hd_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + return __fifo_dump(sch, skb); +} + struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .id = "pfifo", .priv_size = 0, @@ -104,6 +187,7 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, .init = fifo_init, + .destroy = fifo_destroy, .reset = qdisc_reset_queue, .change = fifo_init, .dump = fifo_dump, @@ -118,6 +202,7 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, .init = fifo_init, + .destroy = fifo_destroy, .reset = qdisc_reset_queue, .change = fifo_init, .dump = fifo_dump, @@ -131,10 +216,10 @@ struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { .enqueue = pfifo_tail_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, - .init = fifo_init, + .init = fifo_hd_init, .reset = qdisc_reset_queue, - .change = fifo_init, - .dump = fifo_dump, + .change = fifo_hd_init, + .dump = fifo_hd_dump, .owner = THIS_MODULE, }; -- cgit v1.2.3 From debdedf2eb5a2d9777cabff40900772be13cd9f9 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 5 Mar 2020 16:28:39 +0100 Subject: Bluetooth: Fix calculation of SCO handle for packet processing When processing SCO packets, the handle is wrongly assumed as 16-bit value. The actual size is 12-bits and the other 4-bits are used for packet flags. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 4e6d61a95b20..6a88954e67c0 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4387,13 +4387,16 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_sco_hdr *hdr = (void *) skb->data; struct hci_conn *conn; - __u16 handle; + __u16 handle, flags; skb_pull(skb, HCI_SCO_HDR_SIZE); handle = __le16_to_cpu(hdr->handle); + flags = hci_flags(handle); + handle = hci_handle(handle); - BT_DBG("%s len %d handle 0x%4.4x", hdev->name, skb->len, handle); + BT_DBG("%s len %d handle 0x%4.4x flags 0x%4.4x", hdev->name, skb->len, + handle, flags); hdev->stat.sco_rx++; -- cgit v1.2.3 From 55cee73e2af555fa3dcff467683c7488af47449e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 6 Mar 2020 14:51:27 -0800 Subject: Bluetooth: Make use of skb_pull to parse L2CAP signaling PDUs This uses skb_pull when parsing signalling PDUs so skb->data for pointing to the current PDU and skb->len as the remaining bytes to be processed. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_core.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dd2021270b8a..4286483beada 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5835,9 +5835,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { struct hci_conn *hcon = conn->hcon; - u8 *data = skb->data; - int len = skb->len; - struct l2cap_cmd_hdr cmd; + struct l2cap_cmd_hdr *cmd; int err; l2cap_raw_recv(conn, skb); @@ -5845,35 +5843,34 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (hcon->type != ACL_LINK) goto drop; - while (len >= L2CAP_CMD_HDR_SIZE) { - u16 cmd_len; - memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE); - data += L2CAP_CMD_HDR_SIZE; - len -= L2CAP_CMD_HDR_SIZE; + while (skb->len >= L2CAP_CMD_HDR_SIZE) { + u16 len; + + cmd = (void *) skb->data; + skb_pull(skb, L2CAP_CMD_HDR_SIZE); - cmd_len = le16_to_cpu(cmd.len); + len = le16_to_cpu(cmd->len); - BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, - cmd.ident); + BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd->code, len, + cmd->ident); - if (cmd_len > len || !cmd.ident) { + if (len > skb->len || !cmd->ident) { BT_DBG("corrupted command"); break; } - err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data); + err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data); if (err) { struct l2cap_cmd_rej_unk rej; BT_ERR("Wrong link type (%d)", err); rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); - l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, + l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); } - data += cmd_len; - len -= cmd_len; + skb_pull(skb, len); } drop: -- cgit v1.2.3 From 00bce3fb0642b38fa2e5db3217526c3e0d5952ca Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Thu, 5 Mar 2020 16:14:59 +0000 Subject: Bluetooth: Enable erroneous data reporting if WBS is supported This change introduces a wide band speech setting which allows higher level clients to query the local controller support for wide band speech as well as set the setting state when the radio is powered off. Internally, this setting controls if erroneous data reporting is enabled on the controller. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 2 +- include/net/bluetooth/hci.h | 16 +++++++++- include/net/bluetooth/hci_core.h | 1 + include/net/bluetooth/mgmt.h | 4 ++- net/bluetooth/hci_core.c | 23 ++++++++++++++ net/bluetooth/hci_event.c | 39 ++++++++++++++++++++++++ net/bluetooth/mgmt.c | 65 ++++++++++++++++++++++++++++++++++++++-- 7 files changed, 145 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 48e78fdc8e83..4b12c34f0b22 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3868,7 +3868,7 @@ static int btusb_probe(struct usb_interface *intf, data->isoc = NULL; if (id->driver_info & BTUSB_WIDEBAND_SPEECH) - set_bit(HCI_QUIRK_WIDE_BAND_SPEECH_SUPPORTED, &hdev->quirks); + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); if (id->driver_info & BTUSB_DIGIANSWER) { data->cmdreq_type = USB_TYPE_VENDOR; diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 0b3ebd35681d..4e86f1bb7a87 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -213,7 +213,7 @@ enum { * * This quirk must be set before hci_register_dev is called. */ - HCI_QUIRK_WIDE_BAND_SPEECH_SUPPORTED, + HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, }; /* HCI device flags */ @@ -286,6 +286,7 @@ enum { HCI_FAST_CONNECTABLE, HCI_BREDR_ENABLED, HCI_LE_SCAN_INTERRUPTED, + HCI_WIDEBAND_SPEECH_ENABLED, HCI_DUT_MODE, HCI_VENDOR_DIAG, @@ -1095,6 +1096,19 @@ struct hci_rp_read_inq_rsp_tx_power { __s8 tx_power; } __packed; +#define HCI_OP_READ_DEF_ERR_DATA_REPORTING 0x0c5a + #define ERR_DATA_REPORTING_DISABLED 0x00 + #define ERR_DATA_REPORTING_ENABLED 0x01 +struct hci_rp_read_def_err_data_reporting { + __u8 status; + __u8 err_data_reporting; +} __packed; + +#define HCI_OP_WRITE_DEF_ERR_DATA_REPORTING 0x0c5b +struct hci_cp_write_def_err_data_reporting { + __u8 err_data_reporting; +} __packed; + #define HCI_OP_SET_EVENT_MASK_PAGE_2 0x0c63 #define HCI_OP_READ_LOCATION_DATA 0x0c64 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index dcc0dc6e2624..c498ac113930 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -260,6 +260,7 @@ struct hci_dev { __u8 stored_num_keys; __u8 io_capability; __s8 inq_tx_power; + __u8 err_data_reporting; __u16 page_scan_interval; __u16 page_scan_window; __u8 page_scan_type; diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index f69f88e8e109..f41cd87550dc 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -102,7 +102,7 @@ struct mgmt_rp_read_index_list { #define MGMT_SETTING_CONFIGURATION 0x00004000 #define MGMT_SETTING_STATIC_ADDRESS 0x00008000 #define MGMT_SETTING_PHY_CONFIGURATION 0x00010000 -#define MGMT_SETTING_WIDE_BAND_SPEECH 0x00020000 +#define MGMT_SETTING_WIDEBAND_SPEECH 0x00020000 #define MGMT_OP_READ_INFO 0x0004 #define MGMT_READ_INFO_SIZE 0 @@ -672,6 +672,8 @@ struct mgmt_cp_set_blocked_keys { } __packed; #define MGMT_OP_SET_BLOCKED_KEYS_SIZE 2 +#define MGMT_OP_SET_WIDEBAND_SPEECH 0x0047 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6a88954e67c0..9ce98762559b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -603,6 +603,9 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) if (hdev->commands[8] & 0x01) hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); + if (hdev->commands[18] & 0x02) + hci_req_add(req, HCI_OP_READ_DEF_ERR_DATA_REPORTING, 0, NULL); + /* Some older Broadcom based Bluetooth 1.2 controllers do not * support the Read Page Scan Type command. Check support for * this command in the bit mask of supported commands. @@ -838,6 +841,26 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt) sizeof(support), &support); } + /* Set erroneous data reporting if supported to the wideband speech + * setting value + */ + if (hdev->commands[18] & 0x04) { + bool enabled = hci_dev_test_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED); + + if (enabled != + (hdev->err_data_reporting == ERR_DATA_REPORTING_ENABLED)) { + struct hci_cp_write_def_err_data_reporting cp; + + cp.err_data_reporting = enabled ? + ERR_DATA_REPORTING_ENABLED : + ERR_DATA_REPORTING_DISABLED; + + hci_req_add(req, HCI_OP_WRITE_DEF_ERR_DATA_REPORTING, + sizeof(cp), &cp); + } + } + /* Set Suggested Default Data Length to maximum if supported */ if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { struct hci_cp_le_write_def_data_len cp; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a40ed31f6eb8..b9186026508e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -901,6 +901,37 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, hdev->inq_tx_power = rp->tx_power; } +static void hci_cc_read_def_err_data_reporting(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_def_err_data_reporting *rp = (void *)skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->err_data_reporting = rp->err_data_reporting; +} + +static void hci_cc_write_def_err_data_reporting(struct hci_dev *hdev, + struct sk_buff *skb) +{ + __u8 status = *((__u8 *)skb->data); + struct hci_cp_write_def_err_data_reporting *cp; + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_WRITE_DEF_ERR_DATA_REPORTING); + if (!cp) + return; + + hdev->err_data_reporting = cp->err_data_reporting; +} + static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_pin_code_reply *rp = (void *) skb->data; @@ -3302,6 +3333,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_cc_read_inq_rsp_tx_power(hdev, skb); break; + case HCI_OP_READ_DEF_ERR_DATA_REPORTING: + hci_cc_read_def_err_data_reporting(hdev, skb); + break; + + case HCI_OP_WRITE_DEF_ERR_DATA_REPORTING: + hci_cc_write_def_err_data_reporting(hdev, skb); + break; + case HCI_OP_PIN_CODE_REPLY: hci_cc_pin_code_reply(hdev, skb); break; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1002c657768a..4abb5daeeca8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -107,6 +107,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_READ_EXT_INFO, MGMT_OP_SET_APPEARANCE, MGMT_OP_SET_BLOCKED_KEYS, + MGMT_OP_SET_WIDEBAND_SPEECH, }; static const u16 mgmt_events[] = { @@ -763,9 +764,9 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (lmp_sc_capable(hdev)) settings |= MGMT_SETTING_SECURE_CONN; - if (test_bit(HCI_QUIRK_WIDE_BAND_SPEECH_SUPPORTED, + if (test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks)) - settings |= MGMT_SETTING_WIDE_BAND_SPEECH; + settings |= MGMT_SETTING_WIDEBAND_SPEECH; } if (lmp_le_capable(hdev)) { @@ -850,6 +851,9 @@ static u32 get_current_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_STATIC_ADDRESS; } + if (hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED)) + settings |= MGMT_SETTING_WIDEBAND_SPEECH; + return settings; } @@ -3593,6 +3597,62 @@ static int set_blocked_keys(struct sock *sk, struct hci_dev *hdev, void *data, err, NULL, 0); } +static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_mode *cp = data; + int err; + bool changed = false; + + BT_DBG("request for %s", hdev->name); + + if (!test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks)) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_WIDEBAND_SPEECH, + MGMT_STATUS_NOT_SUPPORTED); + + if (cp->val != 0x00 && cp->val != 0x01) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_WIDEBAND_SPEECH, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + if (pending_find(MGMT_OP_SET_WIDEBAND_SPEECH, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_WIDEBAND_SPEECH, + MGMT_STATUS_BUSY); + goto unlock; + } + + if (hdev_is_powered(hdev) && + !!cp->val != hci_dev_test_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED)) { + err = mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_WIDEBAND_SPEECH, + MGMT_STATUS_REJECTED); + goto unlock; + } + + if (cp->val) + changed = !hci_dev_test_and_set_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED); + else + changed = hci_dev_test_and_clear_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED); + + err = send_settings_rsp(sk, MGMT_OP_SET_WIDEBAND_SPEECH, hdev); + if (err < 0) + goto unlock; + + if (changed) + err = new_settings(hdev, sk); + +unlock: + hci_dev_unlock(hdev); + return err; +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -6994,6 +7054,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { set_phy_configuration, MGMT_SET_PHY_CONFIGURATION_SIZE }, { set_blocked_keys, MGMT_OP_SET_BLOCKED_KEYS_SIZE, HCI_MGMT_VAR_LEN }, + { set_wideband_speech, MGMT_SETTING_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) -- cgit v1.2.3 From 985048f42714823ff55f24cd422cfe7651019710 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 8 Mar 2020 09:12:50 +0100 Subject: Bluetooth: Increment management interface revision Increment the mgmt revision due to the recently added setting and command. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4abb5daeeca8..4da48618b271 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,7 +38,7 @@ #include "mgmt_util.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 15 +#define MGMT_REVISION 16 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, -- cgit v1.2.3 From 15f02b91056253e8cdc592888f431da0731337b8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 2 Mar 2020 16:56:20 -0800 Subject: Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode This adds the initial code for Enhanced Credit Based Mode which introduces a new socket mode called L2CAP_MODE_EXT_FLOWCTL, which for the most part work the same as L2CAP_MODE_LE_FLOWCTL but uses different PDUs to setup the connections and also works over BR/EDR. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 4 + net/bluetooth/l2cap_core.c | 545 ++++++++++++++++++++++++++++++++++++++++-- net/bluetooth/l2cap_sock.c | 23 +- 3 files changed, 552 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 2f051d01f35d..f466cdcc6742 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -294,6 +294,8 @@ struct l2cap_conn_rsp { #define L2CAP_CR_LE_ENCRYPTION 0x0008 #define L2CAP_CR_LE_INVALID_SCID 0x0009 #define L2CAP_CR_LE_SCID_IN_USE 0X000A +#define L2CAP_CR_LE_UNACCEPT_PARAMS 0X000B +#define L2CAP_CR_LE_INVALID_PARAMS 0X000C /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 @@ -962,6 +964,7 @@ void l2cap_cleanup_sockets(void); bool l2cap_is_socket(struct socket *sock); void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan); +void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan); void __l2cap_connect_rsp_defer(struct l2cap_chan *chan); int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm); @@ -971,6 +974,7 @@ struct l2cap_chan *l2cap_chan_create(void); void l2cap_chan_close(struct l2cap_chan *chan, int reason); int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst, u8 dst_type); +int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu); int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len); void l2cap_chan_busy(struct l2cap_chan *chan, int busy); int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4286483beada..6b24db77b5df 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -535,6 +535,17 @@ static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits) skb_queue_head_init(&chan->tx_q); } +static void l2cap_ecred_init(struct l2cap_chan *chan, u16 tx_credits) +{ + l2cap_le_flowctl_init(chan, tx_credits); + + /* L2CAP implementations shall support a minimum MPS of 64 octets */ + if (chan->mps < L2CAP_ECRED_MIN_MPS) { + chan->mps = L2CAP_ECRED_MIN_MPS; + chan->rx_credits = (chan->imtu / chan->mps) + 1; + } +} + void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, @@ -641,6 +652,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) break; case L2CAP_MODE_LE_FLOWCTL: + case L2CAP_MODE_EXT_FLOWCTL: skb_queue_purge(&chan->tx_q); break; @@ -707,6 +719,27 @@ static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) &rsp); } +static void l2cap_chan_ecred_connect_reject(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct l2cap_ecred_conn_rsp rsp; + u16 result; + + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) + result = L2CAP_CR_LE_AUTHORIZATION; + else + result = L2CAP_CR_LE_BAD_PSM; + + l2cap_state_change(chan, BT_DISCONN); + + memset(&rsp, 0, sizeof(rsp)); + + rsp.result = cpu_to_le16(result); + + l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), + &rsp); +} + static void l2cap_chan_connect_reject(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; @@ -752,8 +785,16 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED) { if (conn->hcon->type == ACL_LINK) l2cap_chan_connect_reject(chan); - else if (conn->hcon->type == LE_LINK) - l2cap_chan_le_connect_reject(chan); + else if (conn->hcon->type == LE_LINK) { + switch (chan->mode) { + case L2CAP_MODE_LE_FLOWCTL: + l2cap_chan_le_connect_reject(chan); + break; + case L2CAP_MODE_EXT_FLOWCTL: + l2cap_chan_ecred_connect_reject(chan); + break; + } + } } l2cap_chan_del(chan, reason); @@ -1276,8 +1317,13 @@ static void l2cap_chan_ready(struct l2cap_chan *chan) chan->conf_state = 0; __clear_chan_timer(chan); - if (chan->mode == L2CAP_MODE_LE_FLOWCTL && !chan->tx_credits) - chan->ops->suspend(chan); + switch (chan->mode) { + case L2CAP_MODE_LE_FLOWCTL: + case L2CAP_MODE_EXT_FLOWCTL: + if (!chan->tx_credits) + chan->ops->suspend(chan); + break; + } chan->state = BT_CONNECTED; @@ -1309,6 +1355,31 @@ static void l2cap_le_connect(struct l2cap_chan *chan) sizeof(req), &req); } +static void l2cap_ecred_connect(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct { + struct l2cap_ecred_conn_req req; + __le16 scid; + } __packed pdu; + + if (test_and_set_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) + return; + + l2cap_ecred_init(chan, 0); + + pdu.req.psm = chan->psm; + pdu.req.mtu = cpu_to_le16(chan->imtu); + pdu.req.mps = cpu_to_le16(chan->mps); + pdu.req.credits = cpu_to_le16(chan->rx_credits); + pdu.scid = cpu_to_le16(chan->scid); + + chan->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_CONN_REQ, + sizeof(pdu), &pdu); +} + static void l2cap_le_start(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; @@ -1321,8 +1392,12 @@ static void l2cap_le_start(struct l2cap_chan *chan) return; } - if (chan->state == BT_CONNECT) - l2cap_le_connect(chan); + if (chan->state == BT_CONNECT) { + if (chan->mode == L2CAP_MODE_EXT_FLOWCTL) + l2cap_ecred_connect(chan); + else + l2cap_le_connect(chan); + } } static void l2cap_start_connection(struct l2cap_chan *chan) @@ -2508,6 +2583,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: + case L2CAP_MODE_EXT_FLOWCTL: /* Check outgoing MTU */ if (len > chan->omtu) return -EMSGSIZE; @@ -3776,6 +3852,45 @@ void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan) &rsp); } +void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan) +{ + struct { + struct l2cap_ecred_conn_rsp rsp; + __le16 dcid[5]; + } __packed pdu; + struct l2cap_conn *conn = chan->conn; + u16 ident = chan->ident; + int i = 0; + + if (!ident) + return; + + BT_DBG("chan %p ident %d", chan, ident); + + pdu.rsp.mtu = cpu_to_le16(chan->imtu); + pdu.rsp.mps = cpu_to_le16(chan->mps); + pdu.rsp.credits = cpu_to_le16(chan->rx_credits); + pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); + + mutex_lock(&conn->chan_lock); + + list_for_each_entry(chan, &conn->chan_l, list) { + if (chan->ident != ident) + continue; + + /* Reset ident so only one response is sent */ + chan->ident = 0; + + /* Include all channels pending with the same ident */ + pdu.dcid[i++] = cpu_to_le16(chan->scid); + } + + mutex_unlock(&conn->chan_lock); + + l2cap_send_cmd(conn, ident, L2CAP_ECRED_CONN_RSP, + sizeof(pdu.rsp) + i * sizeof(__le16), &pdu); +} + void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) { struct l2cap_conn_rsp rsp; @@ -5718,6 +5833,351 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, return 0; } +static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_ecred_conn_req *req = (void *) data; + struct { + struct l2cap_ecred_conn_rsp rsp; + __le16 dcid[5]; + } __packed pdu; + struct l2cap_chan *chan, *pchan; + u16 credits, mtu, mps; + __le16 psm; + u8 result, len = 0; + int i, num_scid; + bool defer = false; + + if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { + result = L2CAP_CR_LE_INVALID_PARAMS; + goto response; + } + + mtu = __le16_to_cpu(req->mtu); + mps = __le16_to_cpu(req->mps); + + if (mtu < L2CAP_ECRED_MIN_MTU || mps < L2CAP_ECRED_MIN_MPS) { + result = L2CAP_CR_LE_UNACCEPT_PARAMS; + goto response; + } + + psm = req->psm; + credits = 0; + + BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); + + memset(&pdu, 0, sizeof(pdu)); + + /* Check if we have socket listening on psm */ + pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, + &conn->hcon->dst, LE_LINK); + if (!pchan) { + result = L2CAP_CR_LE_BAD_PSM; + goto response; + } + + mutex_lock(&conn->chan_lock); + l2cap_chan_lock(pchan); + + if (!smp_sufficient_security(conn->hcon, pchan->sec_level, + SMP_ALLOW_STK)) { + result = L2CAP_CR_LE_AUTHENTICATION; + goto unlock; + } + + result = L2CAP_CR_LE_SUCCESS; + cmd_len -= sizeof(req); + num_scid = cmd_len / sizeof(u16); + + for (i = 0; i < num_scid; i++) { + u16 scid = __le16_to_cpu(req->scid[i]); + + BT_DBG("scid[%d] 0x%4.4x", i, scid); + + pdu.dcid[i] = 0x0000; + len += sizeof(*pdu.dcid); + + /* Check for valid dynamic CID range */ + if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { + result = L2CAP_CR_LE_INVALID_SCID; + continue; + } + + /* Check if we already have channel with that dcid */ + if (__l2cap_get_chan_by_dcid(conn, scid)) { + result = L2CAP_CR_LE_SCID_IN_USE; + continue; + } + + chan = pchan->ops->new_connection(pchan); + if (!chan) { + result = L2CAP_CR_LE_NO_MEM; + continue; + } + + bacpy(&chan->src, &conn->hcon->src); + bacpy(&chan->dst, &conn->hcon->dst); + chan->src_type = bdaddr_src_type(conn->hcon); + chan->dst_type = bdaddr_dst_type(conn->hcon); + chan->psm = psm; + chan->dcid = scid; + chan->omtu = mtu; + chan->remote_mps = mps; + + __l2cap_chan_add(conn, chan); + + l2cap_ecred_init(chan, __le16_to_cpu(req->credits)); + + /* Init response */ + if (!pdu.rsp.credits) { + pdu.rsp.mtu = cpu_to_le16(chan->imtu); + pdu.rsp.mps = cpu_to_le16(chan->mps); + pdu.rsp.credits = cpu_to_le16(chan->rx_credits); + } + + pdu.dcid[i] = cpu_to_le16(chan->scid); + + __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); + + chan->ident = cmd->ident; + + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { + l2cap_state_change(chan, BT_CONNECT2); + defer = true; + chan->ops->defer(chan); + } else { + l2cap_chan_ready(chan); + } + } + +unlock: + l2cap_chan_unlock(pchan); + mutex_unlock(&conn->chan_lock); + l2cap_chan_put(pchan); + +response: + pdu.rsp.result = cpu_to_le16(result); + + if (defer) + return 0; + + l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_CONN_RSP, + sizeof(pdu.rsp) + len, &pdu); + + return 0; +} + +static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_ecred_conn_rsp *rsp = (void *) data; + struct hci_conn *hcon = conn->hcon; + u16 mtu, mps, credits, result; + struct l2cap_chan *chan; + int err = 0, sec_level; + int i = 0; + + if (cmd_len < sizeof(*rsp)) + return -EPROTO; + + mtu = __le16_to_cpu(rsp->mtu); + mps = __le16_to_cpu(rsp->mps); + credits = __le16_to_cpu(rsp->credits); + result = __le16_to_cpu(rsp->result); + + BT_DBG("mtu %u mps %u credits %u result 0x%4.4x", mtu, mps, credits, + result); + + mutex_lock(&conn->chan_lock); + + cmd_len -= sizeof(*rsp); + + list_for_each_entry(chan, &conn->chan_l, list) { + u16 dcid; + + if (chan->ident != cmd->ident || + chan->mode != L2CAP_MODE_EXT_FLOWCTL || + chan->state == BT_CONNECTED) + continue; + + l2cap_chan_lock(chan); + + /* Check that there is a dcid for each pending channel */ + if (cmd_len < sizeof(dcid)) { + l2cap_chan_del(chan, ECONNREFUSED); + l2cap_chan_unlock(chan); + continue; + } + + dcid = __le16_to_cpu(rsp->dcid[i++]); + cmd_len -= sizeof(u16); + + BT_DBG("dcid[%d] 0x%4.4x", i, dcid); + + /* Check if dcid is already in use */ + if (dcid && __l2cap_get_chan_by_dcid(conn, dcid)) { + /* If a device receives a + * L2CAP_CREDIT_BASED_CONNECTION_RSP packet with an + * already-assigned Destination CID, then both the + * original channel and the new channel shall be + * immediately discarded and not used. + */ + l2cap_chan_del(chan, ECONNREFUSED); + l2cap_chan_unlock(chan); + chan = __l2cap_get_chan_by_dcid(conn, dcid); + l2cap_chan_lock(chan); + l2cap_chan_del(chan, ECONNRESET); + l2cap_chan_unlock(chan); + continue; + } + + switch (result) { + case L2CAP_CR_LE_AUTHENTICATION: + case L2CAP_CR_LE_ENCRYPTION: + /* If we already have MITM protection we can't do + * anything. + */ + if (hcon->sec_level > BT_SECURITY_MEDIUM) { + l2cap_chan_del(chan, ECONNREFUSED); + break; + } + + sec_level = hcon->sec_level + 1; + if (chan->sec_level < sec_level) + chan->sec_level = sec_level; + + /* We'll need to send a new Connect Request */ + clear_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags); + + smp_conn_security(hcon, chan->sec_level); + break; + + case L2CAP_CR_LE_BAD_PSM: + l2cap_chan_del(chan, ECONNREFUSED); + break; + + default: + /* If dcid was not set it means channels was refused */ + if (!dcid) { + l2cap_chan_del(chan, ECONNREFUSED); + break; + } + + chan->ident = 0; + chan->dcid = dcid; + chan->omtu = mtu; + chan->remote_mps = mps; + chan->tx_credits = credits; + l2cap_chan_ready(chan); + break; + } + + l2cap_chan_unlock(chan); + } + + mutex_unlock(&conn->chan_lock); + + return err; +} + +static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_ecred_reconf_req *req = (void *) data; + struct l2cap_ecred_reconf_rsp rsp; + u16 mtu, mps, result; + struct l2cap_chan *chan; + int i, num_scid; + + if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { + result = L2CAP_CR_LE_INVALID_PARAMS; + goto respond; + } + + mtu = __le16_to_cpu(req->mtu); + mps = __le16_to_cpu(req->mps); + + BT_DBG("mtu %u mps %u", mtu, mps); + + if (mtu < L2CAP_ECRED_MIN_MTU) { + result = L2CAP_RECONF_INVALID_MTU; + goto respond; + } + + if (mps < L2CAP_ECRED_MIN_MPS) { + result = L2CAP_RECONF_INVALID_MPS; + goto respond; + } + + cmd_len -= sizeof(*req); + num_scid = cmd_len / sizeof(u16); + result = L2CAP_RECONF_SUCCESS; + + for (i = 0; i < num_scid; i++) { + u16 scid; + + scid = __le16_to_cpu(req->scid[i]); + if (!scid) + return -EPROTO; + + chan = __l2cap_get_chan_by_dcid(conn, scid); + if (!chan) + continue; + + /* If the MTU value is decreased for any of the included + * channels, then the receiver shall disconnect all + * included channels. + */ + if (chan->omtu > mtu) { + BT_ERR("chan %p decreased MTU %u -> %u", chan, + chan->omtu, mtu); + result = L2CAP_RECONF_INVALID_MTU; + } + + chan->omtu = mtu; + chan->remote_mps = mps; + } + +respond: + rsp.result = cpu_to_le16(result); + + l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_RECONF_RSP, sizeof(rsp), + &rsp); + + return 0; +} + +static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_chan *chan; + struct l2cap_ecred_conn_rsp *rsp = (void *) data; + u16 result; + + if (cmd_len < sizeof(*rsp)) + return -EPROTO; + + result = __le16_to_cpu(rsp->result); + + BT_DBG("result 0x%4.4x", rsp->result); + + if (!result) + return 0; + + list_for_each_entry(chan, &conn->chan_l, list) { + if (chan->ident != cmd->ident) + continue; + + l2cap_chan_del(chan, ECONNRESET); + } + + return 0; +} + static inline int l2cap_le_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) @@ -5773,6 +6233,22 @@ static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, err = l2cap_le_credits(conn, cmd, cmd_len, data); break; + case L2CAP_ECRED_CONN_REQ: + err = l2cap_ecred_conn_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_ECRED_CONN_RSP: + err = l2cap_ecred_conn_rsp(conn, cmd, cmd_len, data); + break; + + case L2CAP_ECRED_RECONF_REQ: + err = l2cap_ecred_reconf_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_ECRED_RECONF_RSP: + err = l2cap_ecred_reconf_rsp(conn, cmd, cmd_len, data); + break; + case L2CAP_DISCONN_REQ: err = l2cap_disconnect_req(conn, cmd, cmd_len, data); break; @@ -6815,11 +7291,13 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) struct l2cap_le_credits pkt; u16 return_credits; - return_credits = ((chan->imtu / chan->mps) + 1) - chan->rx_credits; + return_credits = (chan->imtu / chan->mps) + 1; - if (!return_credits) + if (chan->rx_credits >= return_credits) return; + return_credits -= chan->rx_credits; + BT_DBG("chan %p returning %u credits to sender", chan, return_credits); chan->rx_credits += return_credits; @@ -6832,7 +7310,7 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt); } -static int l2cap_le_recv(struct l2cap_chan *chan, struct sk_buff *skb) +static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb) { int err; @@ -6847,7 +7325,7 @@ static int l2cap_le_recv(struct l2cap_chan *chan, struct sk_buff *skb) return err; } -static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) +static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) { int err; @@ -6895,7 +7373,7 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) } if (skb->len == sdu_len) - return l2cap_le_recv(chan, skb); + return l2cap_ecred_recv(chan, skb); chan->sdu = skb; chan->sdu_len = sdu_len; @@ -6927,7 +7405,7 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) skb = NULL; if (chan->sdu->len == chan->sdu_len) { - err = l2cap_le_recv(chan, chan->sdu); + err = l2cap_ecred_recv(chan, chan->sdu); if (!err) { chan->sdu = NULL; chan->sdu_last_frag = NULL; @@ -6988,7 +7466,8 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: - if (l2cap_le_data_rcv(chan, skb) < 0) + case L2CAP_MODE_EXT_FLOWCTL: + if (l2cap_ecred_data_rcv(chan, skb) < 0) goto drop; goto done; @@ -7215,8 +7694,8 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, struct hci_dev *hdev; int err; - BT_DBG("%pMR -> %pMR (type %u) psm 0x%2.2x", &chan->src, dst, - dst_type, __le16_to_cpu(psm)); + BT_DBG("%pMR -> %pMR (type %u) psm 0x%4.4x mode 0x%2.2x", &chan->src, + dst, dst_type, __le16_to_cpu(psm), chan->mode); hdev = hci_get_route(dst, &chan->src, chan->src_type); if (!hdev) @@ -7244,6 +7723,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, case L2CAP_MODE_BASIC: break; case L2CAP_MODE_LE_FLOWCTL: + case L2CAP_MODE_EXT_FLOWCTL: break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: @@ -7369,6 +7849,38 @@ done: } EXPORT_SYMBOL_GPL(l2cap_chan_connect); +static void l2cap_ecred_reconfigure(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct { + struct l2cap_ecred_reconf_req req; + __le16 scid; + } pdu; + + pdu.req.mtu = cpu_to_le16(chan->imtu); + pdu.req.mps = cpu_to_le16(chan->mps); + pdu.scid = cpu_to_le16(chan->scid); + + chan->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_RECONF_REQ, + sizeof(pdu), &pdu); +} + +int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu) +{ + if (chan->imtu > mtu) + return -EINVAL; + + BT_DBG("chan %p mtu 0x%4.4x", chan, mtu); + + chan->imtu = mtu; + + l2cap_ecred_reconfigure(chan); + + return 0; +} + /* ---- L2CAP interface with lower layer (HCI) ---- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) @@ -7580,7 +8092,8 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); } else if (chan->state == BT_CONNECT2 && - chan->mode != L2CAP_MODE_LE_FLOWCTL) { + !(chan->mode == L2CAP_MODE_EXT_FLOWCTL || + chan->mode == L2CAP_MODE_LE_FLOWCTL)) { struct l2cap_conn_rsp rsp; __u16 res, stat; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 305710446e66..44114db219e1 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -232,7 +232,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; } - if (chan->psm && bdaddr_type_is_le(chan->src_type)) + if (chan->psm && bdaddr_type_is_le(chan->src_type) && !chan->mode) chan->mode = L2CAP_MODE_LE_FLOWCTL; err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid), @@ -273,6 +273,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) switch (chan->mode) { case L2CAP_MODE_BASIC: case L2CAP_MODE_LE_FLOWCTL: + case L2CAP_MODE_EXT_FLOWCTL: break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: @@ -427,6 +428,8 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, opts.max_tx = chan->max_tx; opts.txwin_size = chan->tx_win; + BT_DBG("mode 0x%2.2x", chan->mode); + len = min_t(unsigned int, len, sizeof(opts)); if (copy_to_user(optval, (char *) &opts, len)) err = -EFAULT; @@ -707,6 +710,8 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, break; } + BT_DBG("mode 0x%2.2x", chan->mode); + chan->imtu = opts.imtu; chan->omtu = opts.omtu; chan->fcs = opts.fcs; @@ -939,7 +944,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (sk->sk_state == BT_CONNECTED) { + if (chan->mode == L2CAP_MODE_LE_FLOWCTL && + sk->sk_state == BT_CONNECTED) { err = -EISCONN; break; } @@ -949,7 +955,12 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - chan->imtu = opt; + if (chan->mode == L2CAP_MODE_EXT_FLOWCTL && + sk->sk_state == BT_CONNECTED) + err = l2cap_chan_reconfigure(chan, opt); + else + chan->imtu = opt; + break; default: @@ -1004,7 +1015,11 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, if (sk->sk_state == BT_CONNECT2 && test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { - if (bdaddr_type_is_le(pi->chan->src_type)) { + if (pi->chan->mode == L2CAP_MODE_EXT_FLOWCTL) { + sk->sk_state = BT_CONNECTED; + pi->chan->state = BT_CONNECTED; + __l2cap_ecred_conn_rsp_defer(pi->chan); + } if (bdaddr_type_is_le(pi->chan->src_type)) { sk->sk_state = BT_CONNECTED; pi->chan->state = BT_CONNECTED; __l2cap_le_connect_rsp_defer(pi->chan); -- cgit v1.2.3 From 4be5ca67d59d707a4b1c8608ca230ad65aa4f232 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 2 Mar 2020 16:56:21 -0800 Subject: Bluetooth: L2CAP: Add module option to enable ECRED mode This should make it safe to have the code upstream without affecting stable systems since there are a few details not sort out with ECRED mode e.g: how to initiate multiple connections at once. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 15 +++++++++++++++ net/bluetooth/l2cap_sock.c | 5 +++++ 3 files changed, 21 insertions(+) (limited to 'net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index f466cdcc6742..537aaead259f 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -958,6 +958,7 @@ static inline long l2cap_chan_no_get_sndtimeo(struct l2cap_chan *chan) } extern bool disable_ertm; +extern bool enable_ecred; int l2cap_init_sockets(void); void l2cap_cleanup_sockets(void); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 6b24db77b5df..697c0f7f2c1a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -45,6 +45,7 @@ #define LE_FLOWCTL_MAX_CREDITS 65535 bool disable_ertm; +bool enable_ecred; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN | L2CAP_FEAT_UCD; @@ -5849,6 +5850,9 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, int i, num_scid; bool defer = false; + if (!enable_ecred) + return -EINVAL; + if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; @@ -6092,6 +6096,9 @@ static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, struct l2cap_chan *chan; int i, num_scid; + if (!enable_ecred) + return -EINVAL; + if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { result = L2CAP_CR_LE_INVALID_PARAMS; goto respond; @@ -7723,7 +7730,12 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, case L2CAP_MODE_BASIC: break; case L2CAP_MODE_LE_FLOWCTL: + break; case L2CAP_MODE_EXT_FLOWCTL: + if (!enable_ecred) { + err = -EOPNOTSUPP; + goto done; + } break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: @@ -8301,3 +8313,6 @@ void l2cap_exit(void) module_param(disable_ertm, bool, 0644); MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode"); + +module_param(enable_ecred, bool, 0644); +MODULE_PARM_DESC(enable_ecred, "Enable enhanced credit flow control mode"); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 44114db219e1..0c636be3469e 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -273,7 +273,12 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) switch (chan->mode) { case L2CAP_MODE_BASIC: case L2CAP_MODE_LE_FLOWCTL: + break; case L2CAP_MODE_EXT_FLOWCTL: + if (!enable_ecred) { + err = -EOPNOTSUPP; + goto done; + } break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: -- cgit v1.2.3 From 71811cac8532b2387b3414f7cd8fe9e497482864 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Sun, 8 Mar 2020 17:45:27 +0800 Subject: Bluetooth: RFCOMM: fix ODEBUG bug in rfcomm_dev_ioctl Needn't call 'rfcomm_dlc_put' here, because 'rfcomm_dlc_exists' didn't increase dlc->refcnt. Reported-by: syzbot+4496e82090657320efc6@syzkaller.appspotmail.com Signed-off-by: Qiujun Huang Suggested-by: Hillf Danton Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/tty.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 0c7d31c6c18c..a58584949a95 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -413,10 +413,8 @@ static int __rfcomm_create_dev(struct sock *sk, void __user *arg) dlc = rfcomm_dlc_exists(&req.src, &req.dst, req.channel); if (IS_ERR(dlc)) return PTR_ERR(dlc); - else if (dlc) { - rfcomm_dlc_put(dlc); + if (dlc) return -EBUSY; - } dlc = rfcomm_dlc_alloc(GFP_KERNEL); if (!dlc) return -ENOMEM; -- cgit v1.2.3 From 319a1d19471ec49b8a91a7f6a3fe2c4535e5c279 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 7 Mar 2020 12:40:13 +0100 Subject: flow_offload: check for basic action hw stats type Introduce flow_action_basic_hw_stats_types_check() helper and use it in drivers. That sanitizes the drivers which do not have support for action HW stats types. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 9 +++- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 8 ++- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h | 3 +- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c | 3 +- drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c | 6 +++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 ++++ drivers/net/ethernet/mscc/ocelot_flower.c | 4 ++ drivers/net/ethernet/netronome/nfp/flower/action.c | 4 ++ drivers/net/ethernet/qlogic/qede/qede_filter.c | 10 ++-- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 9 +++- include/net/flow_offload.h | 61 ++++++++++++++++++++++ net/dsa/slave.c | 4 ++ 12 files changed, 119 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 9bec256b0934..523bf4be43cc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -279,7 +279,8 @@ bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions, static int bnxt_tc_parse_actions(struct bnxt *bp, struct bnxt_tc_actions *actions, - struct flow_action *flow_action) + struct flow_action *flow_action, + struct netlink_ext_ack *extack) { /* Used to store the L2 rewrite mask for dmac (6 bytes) followed by * smac (6 bytes) if rewrite of both is specified, otherwise either @@ -299,6 +300,9 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, return -EINVAL; } + if (!flow_action_basic_hw_stats_types_check(flow_action, extack)) + return -EOPNOTSUPP; + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_DROP: @@ -491,7 +495,8 @@ static int bnxt_tc_parse_flow(struct bnxt *bp, flow->tun_mask.tp_src = match.mask->src; } - return bnxt_tc_parse_actions(bp, &flow->actions, &rule->action); + return bnxt_tc_parse_actions(bp, &flow->actions, &rule->action, + tc_flow_cmd->common.extack); } static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index bb5513bdd293..cc46277e98de 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -544,7 +544,8 @@ static bool valid_pedit_action(struct net_device *dev, } int cxgb4_validate_flow_actions(struct net_device *dev, - struct flow_action *actions) + struct flow_action *actions, + struct netlink_ext_ack *extack) { struct flow_action_entry *act; bool act_redir = false; @@ -552,6 +553,9 @@ int cxgb4_validate_flow_actions(struct net_device *dev, bool act_vlan = false; int i; + if (!flow_action_basic_hw_stats_types_check(actions, extack)) + return -EOPNOTSUPP; + flow_action_for_each(i, act, actions) { switch (act->id) { case FLOW_ACTION_ACCEPT: @@ -642,7 +646,7 @@ int cxgb4_tc_flower_replace(struct net_device *dev, struct filter_ctx ctx; int fidx, ret; - if (cxgb4_validate_flow_actions(dev, &rule->action)) + if (cxgb4_validate_flow_actions(dev, &rule->action, extack)) return -EOPNOTSUPP; if (cxgb4_validate_flow_match(dev, cls)) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h index e132516e9868..0a30c96b81ff 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -112,7 +112,8 @@ void cxgb4_process_flow_actions(struct net_device *in, struct flow_action *actions, struct ch_filter_specification *fs); int cxgb4_validate_flow_actions(struct net_device *dev, - struct flow_action *actions); + struct flow_action *actions, + struct netlink_ext_ack *extack); int cxgb4_tc_flower_replace(struct net_device *dev, struct flow_cls_offload *cls); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c index 1b7681a4eb32..d80dee4d316d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c @@ -286,7 +286,8 @@ int cxgb4_tc_matchall_replace(struct net_device *dev, } ret = cxgb4_validate_flow_actions(dev, - &cls_matchall->rule->action); + &cls_matchall->rule->action, + extack); if (ret) return ret; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c index 35478cba2aa5..0a0c6ec2336c 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c @@ -1082,6 +1082,9 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port, u8 qh, ql, pmap; int index, ctx; + if (!flow_action_basic_hw_stats_types_check(&rule->flow->action, NULL)) + return -EOPNOTSUPP; + memset(&c2, 0, sizeof(c2)); index = mvpp2_cls_c2_port_flow_index(port, rule->loc); @@ -1305,6 +1308,9 @@ static int mvpp2_cls_rfs_parse_rule(struct mvpp2_rfs_rule *rule) struct flow_rule *flow = rule->flow; struct flow_action_entry *act; + if (!flow_action_basic_hw_stats_types_check(&rule->flow->action, NULL)) + return -EOPNOTSUPP; + act = &flow->action.entries[0]; if (act->id != FLOW_ACTION_QUEUE && act->id != FLOW_ACTION_DROP) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 4eb2f2392d2d..cfe393cb4026 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2878,6 +2878,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, if (!flow_action_has_entries(flow_action)) return -EINVAL; + if (!flow_action_basic_hw_stats_types_check(flow_action, extack)) + return -EOPNOTSUPP; + attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; flow_action_for_each(i, act, flow_action) { @@ -3330,6 +3333,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (!flow_action_has_entries(flow_action)) return -EINVAL; + if (!flow_action_basic_hw_stats_types_check(flow_action, extack)) + return -EOPNOTSUPP; + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_DROP: @@ -4148,6 +4154,9 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + if (!flow_action_basic_hw_stats_types_check(flow_action, extack)) + return -EOPNOTSUPP; + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_POLICE: diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 8986f209e981..6d84173373c7 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -17,6 +17,10 @@ static int ocelot_flower_parse_action(struct flow_cls_offload *f, if (!flow_offload_has_one_action(&f->rule->action)) return -EOPNOTSUPP; + if (!flow_action_basic_hw_stats_types_check(&f->rule->action, + f->common.extack)) + return -EOPNOTSUPP; + flow_action_for_each(i, a, &f->rule->action) { switch (a->id) { case FLOW_ACTION_DROP: diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index c06600fb47ff..4aa7346cb040 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -1207,6 +1207,10 @@ int nfp_flower_compile_action(struct nfp_app *app, bool pkt_host = false; u32 csum_updated = 0; + if (!flow_action_basic_hw_stats_types_check(&flow->rule->action, + extack)) + return -EOPNOTSUPP; + memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); nfp_flow->meta.act_len = 0; tun_type = NFP_FL_TUNNEL_NONE; diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index d1ce4531d01a..6505f7e2d1db 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1746,7 +1746,8 @@ unlock: } static int qede_parse_actions(struct qede_dev *edev, - struct flow_action *flow_action) + struct flow_action *flow_action, + struct netlink_ext_ack *extack) { const struct flow_action_entry *act; int i; @@ -1756,6 +1757,9 @@ static int qede_parse_actions(struct qede_dev *edev, return -EINVAL; } + if (!flow_action_basic_hw_stats_types_check(flow_action, extack)) + return -EOPNOTSUPP; + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_DROP: @@ -1970,7 +1974,7 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse tc actions and get the vf_id */ - if (qede_parse_actions(edev, &f->rule->action)) + if (qede_parse_actions(edev, &f->rule->action, f->common.extack)) goto unlock; if (qede_flow_find_fltr(edev, &t)) { @@ -2038,7 +2042,7 @@ static int qede_flow_spec_validate(struct qede_dev *edev, return -EINVAL; } - if (qede_parse_actions(edev, flow_action)) + if (qede_parse_actions(edev, flow_action, NULL)) return -EINVAL; return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 7a01dee2f9a8..a0e6118444b0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -367,7 +367,8 @@ static int tc_setup_cbs(struct stmmac_priv *priv, static int tc_parse_flow_actions(struct stmmac_priv *priv, struct flow_action *action, - struct stmmac_flow_entry *entry) + struct stmmac_flow_entry *entry, + struct netlink_ext_ack *extack) { struct flow_action_entry *act; int i; @@ -375,6 +376,9 @@ static int tc_parse_flow_actions(struct stmmac_priv *priv, if (!flow_action_has_entries(action)) return -EINVAL; + if (!flow_action_basic_hw_stats_types_check(action, extack)) + return -EOPNOTSUPP; + flow_action_for_each(i, act, action) { switch (act->id) { case FLOW_ACTION_DROP: @@ -530,7 +534,8 @@ static int tc_add_flow(struct stmmac_priv *priv, return -ENOENT; } - ret = tc_parse_flow_actions(priv, &rule->action, entry); + ret = tc_parse_flow_actions(priv, &rule->action, entry, + cls->common.extack); if (ret) return ret; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 93d17f37e980..8b40f612a565 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -251,6 +252,66 @@ static inline bool flow_offload_has_one_action(const struct flow_action *action) return action->num_entries == 1; } +static inline bool +flow_action_mixed_hw_stats_types_check(const struct flow_action *action, + struct netlink_ext_ack *extack) +{ + const struct flow_action_entry *action_entry; + u8 uninitialized_var(last_hw_stats_type); + int i; + + if (flow_offload_has_one_action(action)) + return true; + + for (i = 0; i < action->num_entries; i++) { + action_entry = &action->entries[i]; + if (i && action_entry->hw_stats_type != last_hw_stats_type) { + NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported"); + return false; + } + last_hw_stats_type = action_entry->hw_stats_type; + } + return true; +} + +static inline const struct flow_action_entry * +flow_action_first_entry_get(const struct flow_action *action) +{ + WARN_ON(!flow_action_has_entries(action)); + return &action->entries[0]; +} + +static inline bool +flow_action_hw_stats_types_check(const struct flow_action *action, + struct netlink_ext_ack *extack, + u8 allowed_hw_stats_type) +{ + const struct flow_action_entry *action_entry; + + if (!flow_action_has_entries(action)) + return true; + if (!flow_action_mixed_hw_stats_types_check(action, extack)) + return false; + action_entry = flow_action_first_entry_get(action); + if (allowed_hw_stats_type == 0 && + action_entry->hw_stats_type != FLOW_ACTION_HW_STATS_TYPE_ANY) { + NL_SET_ERR_MSG_MOD(extack, "Driver supports only default HW stats type \"any\""); + return false; + } else if (allowed_hw_stats_type != 0 && + action_entry->hw_stats_type != allowed_hw_stats_type) { + NL_SET_ERR_MSG_MOD(extack, "Driver does not support selected HW stats type"); + return false; + } + return true; +} + +static inline bool +flow_action_basic_hw_stats_types_check(const struct flow_action *action, + struct netlink_ext_ack *extack) +{ + return flow_action_hw_stats_types_check(action, extack, 0); +} + #define flow_action_for_each(__i, __act, __actions) \ for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[++__i]) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 79d9b4384d7b..fca9bfa8437e 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -865,6 +865,10 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, if (!flow_offload_has_one_action(&cls->rule->action)) return err; + if (!flow_action_basic_hw_stats_types_check(&cls->rule->action, + cls->common.extack)) + return err; + act = &cls->rule->action.entries[0]; if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) { -- cgit v1.2.3 From 44f8658017419dccbeefe64f30122fa191d0e173 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 7 Mar 2020 12:40:20 +0100 Subject: sched: act: allow user to specify type of HW stats for a filter Currently, user who is adding an action expects HW to report stats, however it does not have exact expectations about the stats types. That is aligned with TCA_ACT_HW_STATS_TYPE_ANY. Allow user to specify the type of HW stats for an action and require it. Pass the information down to flow_offload layer. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/act_api.h | 4 ++++ include/uapi/linux/pkt_cls.h | 22 ++++++++++++++++++++++ net/sched/act_api.c | 36 ++++++++++++++++++++++++++++++++++++ net/sched/cls_api.c | 7 +++++++ 4 files changed, 69 insertions(+) (limited to 'net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 71347a90a9d1..41337c7fc728 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -41,6 +41,7 @@ struct tc_action { struct tc_cookie __rcu *act_cookie; struct tcf_chain __rcu *goto_chain; u32 tcfa_flags; + u8 hw_stats_type; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt @@ -52,6 +53,9 @@ struct tc_action { #define tcf_rate_est common.tcfa_rate_est #define tcf_lock common.tcfa_lock +#define TCA_ACT_HW_STATS_TYPE_ANY (TCA_ACT_HW_STATS_TYPE_IMMEDIATE | \ + TCA_ACT_HW_STATS_TYPE_DELAYED) + /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 449a63971451..81cc1a869588 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -17,6 +17,7 @@ enum { TCA_ACT_PAD, TCA_ACT_COOKIE, TCA_ACT_FLAGS, + TCA_ACT_HW_STATS_TYPE, __TCA_ACT_MAX }; @@ -24,6 +25,27 @@ enum { * actions stats. */ +/* tca HW stats type + * When user does not pass the attribute, he does not care. + * It is the same as if he would pass the attribute with + * all supported bits set. + * In case no bits are set, user is not interested in getting any HW statistics. + */ +#define TCA_ACT_HW_STATS_TYPE_IMMEDIATE (1 << 0) /* Means that in dump, user + * gets the current HW stats + * state from the device + * queried at the dump time. + */ +#define TCA_ACT_HW_STATS_TYPE_DELAYED (1 << 1) /* Means that in dump, user gets + * HW stats that might be out + * of date for some time, maybe + * couple of seconds. This is + * the case when driver polls + * stats updates periodically + * or when it gets async stats update + * from the device. + */ + #define TCA_ACT_MAX __TCA_ACT_MAX #define TCA_OLD_COMPAT (TCA_ACT_MAX+1) #define TCA_ACT_MAX_PRIO 32 diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8c466a712cda..aa7b737fed2e 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -185,6 +185,7 @@ static size_t tcf_action_shared_attrs_size(const struct tc_action *act) return nla_total_size(0) /* action number nested */ + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */ + cookie_len /* TCA_ACT_COOKIE */ + + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_HW_STATS_TYPE */ + nla_total_size(0) /* TCA_ACT_STATS nested */ + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_FLAGS */ /* TCA_STATS_BASIC */ @@ -788,6 +789,17 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) } rcu_read_unlock(); + if (a->hw_stats_type != TCA_ACT_HW_STATS_TYPE_ANY) { + struct nla_bitfield32 hw_stats_type = { + a->hw_stats_type, + TCA_ACT_HW_STATS_TYPE_ANY, + }; + + if (nla_put(skb, TCA_ACT_HW_STATS_TYPE, sizeof(hw_stats_type), + &hw_stats_type)) + goto nla_put_failure; + } + if (a->tcfa_flags) { struct nla_bitfield32 flags = { a->tcfa_flags, a->tcfa_flags, }; @@ -854,7 +866,23 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) return c; } +static u8 tcf_action_hw_stats_type_get(struct nlattr *hw_stats_type_attr) +{ + struct nla_bitfield32 hw_stats_type_bf; + + /* If the user did not pass the attr, that means he does + * not care about the type. Return "any" in that case + * which is setting on all supported types. + */ + if (!hw_stats_type_attr) + return TCA_ACT_HW_STATS_TYPE_ANY; + hw_stats_type_bf = nla_get_bitfield32(hw_stats_type_attr); + return hw_stats_type_bf.value; +} + static const u32 tca_act_flags_allowed = TCA_ACT_FLAGS_NO_PERCPU_STATS; +static const u32 tca_act_hw_stats_type_allowed = TCA_ACT_HW_STATS_TYPE_ANY; + static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_KIND] = { .type = NLA_STRING }, [TCA_ACT_INDEX] = { .type = NLA_U32 }, @@ -863,6 +891,8 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_OPTIONS] = { .type = NLA_NESTED }, [TCA_ACT_FLAGS] = { .type = NLA_BITFIELD32, .validation_data = &tca_act_flags_allowed }, + [TCA_ACT_HW_STATS_TYPE] = { .type = NLA_BITFIELD32, + .validation_data = &tca_act_hw_stats_type_allowed }, }; struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, @@ -871,6 +901,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { + u8 hw_stats_type = TCA_ACT_HW_STATS_TYPE_ANY; struct nla_bitfield32 flags = { 0, 0 }; struct tc_action *a; struct tc_action_ops *a_o; @@ -903,6 +934,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, goto err_out; } } + hw_stats_type = + tcf_action_hw_stats_type_get(tb[TCA_ACT_HW_STATS_TYPE]); if (tb[TCA_ACT_FLAGS]) flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); } else { @@ -953,6 +986,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (!name && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->act_cookie, cookie); + if (!name) + a->hw_stats_type = hw_stats_type; + /* module count goes up only when brand new policy is created * if it exists and is only bound to in a_o->init() then * ACT_P_CREATED is not returned (a zero is). diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 4e766c5ab77a..e91448640a4f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3464,6 +3464,10 @@ int tc_setup_flow_action(struct flow_action *flow_action, struct tc_action *act; int i, j, k, err = 0; + BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_ANY != FLOW_ACTION_HW_STATS_TYPE_ANY); + BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_IMMEDIATE != FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE); + BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_DELAYED != FLOW_ACTION_HW_STATS_TYPE_DELAYED); + if (!exts) return 0; @@ -3476,6 +3480,9 @@ int tc_setup_flow_action(struct flow_action *flow_action, err = tcf_act_get_cookie(entry, act); if (err) goto err_out_locked; + + entry->hw_stats_type = act->hw_stats_type; + if (is_tcf_gact_ok(act)) { entry->id = FLOW_ACTION_ACCEPT; } else if (is_tcf_gact_shot(act)) { -- cgit v1.2.3 From 138470a9b2cc2e26e6018300394afc3858a54e6a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Mar 2020 14:27:48 -0700 Subject: net/sched: act_ct: fix lockdep splat in tcf_ct_flow_table_get Convert zones_lock spinlock to zones_mutex mutex, and struct (tcf_ct_flow_table)->ref to a refcount, so that control path can use regular GFP_KERNEL allocations from standard process context. This is more robust in case of memory pressure. The refcount is needed because tcf_ct_flow_table_put() can be called from RCU callback, thus in BH context. The issue was spotted by syzbot, as rhashtable_init() was called with a spinlock held, which is bad since GFP_KERNEL allocations can sleep. Note to developers : Please make sure your patches are tested with CONFIG_DEBUG_ATOMIC_SLEEP=y BUG: sleeping function called from invalid context at mm/slab.h:565 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 9582, name: syz-executor610 2 locks held by syz-executor610/9582: #0: ffffffff8a34eb80 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:72 [inline] #0: ffffffff8a34eb80 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x3f9/0xad0 net/core/rtnetlink.c:5437 #1: ffffffff8a3961b8 (zones_lock){+...}, at: spin_lock_bh include/linux/spinlock.h:343 [inline] #1: ffffffff8a3961b8 (zones_lock){+...}, at: tcf_ct_flow_table_get+0xa3/0x1700 net/sched/act_ct.c:67 Preemption disabled at: [<0000000000000000>] 0x0 CPU: 0 PID: 9582 Comm: syz-executor610 Not tainted 5.6.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x188/0x20d lib/dump_stack.c:118 ___might_sleep.cold+0x1f4/0x23d kernel/sched/core.c:6798 slab_pre_alloc_hook mm/slab.h:565 [inline] slab_alloc_node mm/slab.c:3227 [inline] kmem_cache_alloc_node_trace+0x272/0x790 mm/slab.c:3593 __do_kmalloc_node mm/slab.c:3615 [inline] __kmalloc_node+0x38/0x60 mm/slab.c:3623 kmalloc_node include/linux/slab.h:578 [inline] kvmalloc_node+0x61/0xf0 mm/util.c:574 kvmalloc include/linux/mm.h:645 [inline] kvzalloc include/linux/mm.h:653 [inline] bucket_table_alloc+0x8b/0x480 lib/rhashtable.c:175 rhashtable_init+0x3d2/0x750 lib/rhashtable.c:1054 nf_flow_table_init+0x16d/0x310 net/netfilter/nf_flow_table_core.c:498 tcf_ct_flow_table_get+0xe33/0x1700 net/sched/act_ct.c:82 tcf_ct_init+0xba4/0x18a6 net/sched/act_ct.c:1050 tcf_action_init_1+0x697/0xa20 net/sched/act_api.c:945 tcf_action_init+0x1e9/0x2f0 net/sched/act_api.c:1001 tcf_action_add+0xdb/0x370 net/sched/act_api.c:1411 tc_ctl_action+0x366/0x456 net/sched/act_api.c:1466 rtnetlink_rcv_msg+0x44e/0xad0 net/core/rtnetlink.c:5440 netlink_rcv_skb+0x15a/0x410 net/netlink/af_netlink.c:2478 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:672 ____sys_sendmsg+0x6b9/0x7d0 net/socket.c:2343 ___sys_sendmsg+0x100/0x170 net/socket.c:2397 __sys_sendmsg+0xec/0x1b0 net/socket.c:2430 do_syscall_64+0xf6/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4403d9 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffd719af218 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 00000000004403d9 RDX: 0000000000000000 RSI: 0000000020000300 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000005 R09: 00000000004002c8 R10: 0000000000000008 R11: 00000000000 Fixes: c34b961a2492 ("net/sched: act_ct: Create nf flow table per zone") Signed-off-by: Eric Dumazet Cc: Paul Blakey Cc: Jiri Pirko Reported-by: syzbot Signed-off-by: David S. Miller --- net/sched/act_ct.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 23eba61f0f81..3d9e678d7d53 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -35,15 +35,15 @@ static struct workqueue_struct *act_ct_wq; static struct rhashtable zones_ht; -static DEFINE_SPINLOCK(zones_lock); +static DEFINE_MUTEX(zones_mutex); struct tcf_ct_flow_table { struct rhash_head node; /* In zones tables */ struct rcu_work rwork; struct nf_flowtable nf_ft; + refcount_t ref; u16 zone; - u32 ref; bool dying; }; @@ -64,14 +64,15 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params) struct tcf_ct_flow_table *ct_ft; int err = -ENOMEM; - spin_lock_bh(&zones_lock); + mutex_lock(&zones_mutex); ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); - if (ct_ft) - goto take_ref; + if (ct_ft && refcount_inc_not_zero(&ct_ft->ref)) + goto out_unlock; - ct_ft = kzalloc(sizeof(*ct_ft), GFP_ATOMIC); + ct_ft = kzalloc(sizeof(*ct_ft), GFP_KERNEL); if (!ct_ft) goto err_alloc; + refcount_set(&ct_ft->ref, 1); ct_ft->zone = params->zone; err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params); @@ -84,10 +85,9 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params) goto err_init; __module_get(THIS_MODULE); -take_ref: +out_unlock: params->ct_ft = ct_ft; - ct_ft->ref++; - spin_unlock_bh(&zones_lock); + mutex_unlock(&zones_mutex); return 0; @@ -96,7 +96,7 @@ err_init: err_insert: kfree(ct_ft); err_alloc: - spin_unlock_bh(&zones_lock); + mutex_unlock(&zones_mutex); return err; } @@ -116,13 +116,11 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params) { struct tcf_ct_flow_table *ct_ft = params->ct_ft; - spin_lock_bh(&zones_lock); - if (--params->ct_ft->ref == 0) { + if (refcount_dec_and_test(¶ms->ct_ft->ref)) { rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work); queue_rcu_work(act_ct_wq, &ct_ft->rwork); } - spin_unlock_bh(&zones_lock); } static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, -- cgit v1.2.3 From 7b70973d7edb2f005511102d5a2e0116464a46a1 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:32 +0000 Subject: bpf: sockmap: Only check ULP for TCP sockets The sock map code checks that a socket does not have an active upper layer protocol before inserting it into the map. This requires casting via inet_csk, which isn't valid for UDP sockets. Guard checks for ULP by checking inet_sk(sk)->is_icsk first. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-2-lmb@cloudflare.com --- include/linux/skmsg.h | 8 +++++++- include/net/inet_connection_sock.h | 6 ++++++ net/core/sock_map.c | 6 ++---- net/ipv4/tcp_ulp.c | 7 ------- 4 files changed, 15 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 112765bd146d..4d3d75d63066 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -360,7 +360,13 @@ static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { sk->sk_prot->unhash = psock->saved_unhash; - tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); + if (inet_csk_has_ulp(sk)) { + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); + } else { + sk->sk_write_space = psock->saved_write_space; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, psock->sk_proto); + } } static inline void sk_psock_set_state(struct sk_psock *psock, diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 895546058a20..a3f076befa4f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -335,4 +335,10 @@ static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) if (icsk->icsk_ack.pingpong < U8_MAX) icsk->icsk_ack.pingpong++; } + +static inline bool inet_csk_has_ulp(struct sock *sk) +{ + return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; +} + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 2e0f465295c3..cb8f740f7949 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -384,7 +384,6 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, struct sock *sk, u64 flags) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); - struct inet_connection_sock *icsk = inet_csk(sk); struct sk_psock_link *link; struct sk_psock *psock; struct sock *osk; @@ -395,7 +394,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, return -EINVAL; if (unlikely(idx >= map->max_entries)) return -E2BIG; - if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data))) + if (inet_csk_has_ulp(sk)) return -EINVAL; link = sk_psock_init_link(); @@ -738,7 +737,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, struct sock *sk, u64 flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct inet_connection_sock *icsk = inet_csk(sk); u32 key_size = map->key_size, hash; struct bpf_htab_elem *elem, *elem_new; struct bpf_htab_bucket *bucket; @@ -749,7 +747,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, WARN_ON_ONCE(!rcu_read_lock_held()); if (unlikely(flags > BPF_EXIST)) return -EINVAL; - if (unlikely(icsk->icsk_ulp_data)) + if (inet_csk_has_ulp(sk)) return -EINVAL; link = sk_psock_init_link(); diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 2703f24c5d1a..7c27aa629af1 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -105,13 +105,6 @@ void tcp_update_ulp(struct sock *sk, struct proto *proto, { struct inet_connection_sock *icsk = inet_csk(sk); - if (!icsk->icsk_ulp_ops) { - sk->sk_write_space = write_space; - /* Pairs with lockless read in sk_clone_lock() */ - WRITE_ONCE(sk->sk_prot, proto); - return; - } - if (icsk->icsk_ulp_ops->update) icsk->icsk_ulp_ops->update(sk, proto, write_space); } -- cgit v1.2.3 From 1a2e20132db7bb76dd4f97b8364bd167227dd15f Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:33 +0000 Subject: skmsg: Update saved hooks only once Only update psock->saved_* if psock->sk_proto has not been initialized yet. This allows us to get rid of tcp_bpf_reinit_sk_prot. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-3-lmb@cloudflare.com --- include/linux/skmsg.h | 20 ++++++++++++++++---- net/ipv4/tcp_bpf.c | 16 +--------------- 2 files changed, 17 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 4d3d75d63066..2be51b7a5800 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -347,11 +347,23 @@ static inline void sk_psock_update_proto(struct sock *sk, struct sk_psock *psock, struct proto *ops) { - psock->saved_unhash = sk->sk_prot->unhash; - psock->saved_close = sk->sk_prot->close; - psock->saved_write_space = sk->sk_write_space; + /* Initialize saved callbacks and original proto only once, since this + * function may be called multiple times for a psock, e.g. when + * psock->progs.msg_parser is updated. + * + * Since we've not installed the new proto, psock is not yet in use and + * we can initialize it without synchronization. + */ + if (!psock->sk_proto) { + struct proto *orig = READ_ONCE(sk->sk_prot); + + psock->saved_unhash = orig->unhash; + psock->saved_close = orig->close; + psock->saved_write_space = sk->sk_write_space; + + psock->sk_proto = orig; + } - psock->sk_proto = sk->sk_prot; /* Pairs with lockless read in sk_clone_lock() */ WRITE_ONCE(sk->sk_prot, ops); } diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 7d6e1b75d4d4..3327afa05c3d 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -637,20 +637,6 @@ static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock) sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]); } -static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock) -{ - int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; - int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; - - /* Reinit occurs when program types change e.g. TCP_BPF_TX is removed - * or added requiring sk_prot hook updates. We keep original saved - * hooks in this case. - * - * Pairs with lockless read in sk_clone_lock(). - */ - WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]); -} - static int tcp_bpf_assert_proto_ops(struct proto *ops) { /* In order to avoid retpoline, we make assumptions when we call @@ -670,7 +656,7 @@ void tcp_bpf_reinit(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - tcp_bpf_reinit_sk_prot(sk, psock); + tcp_bpf_update_sk_prot(sk, psock); rcu_read_unlock(); } -- cgit v1.2.3 From d19da360ee0f3e6c1375391db1a724b66fd43312 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:34 +0000 Subject: bpf: tcp: Move assertions into tcp_bpf_get_proto We need to ensure that sk->sk_prot uses certain callbacks, so that code that directly calls e.g. tcp_sendmsg in certain corner cases works. To avoid spurious asserts, we must to do this only if sk_psock_update_proto has not yet been called. The same invariants apply for tcp_bpf_check_v6_needs_rebuild, so move the call as well. Doing so allows us to merge tcp_bpf_init and tcp_bpf_reinit. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-4-lmb@cloudflare.com --- include/net/tcp.h | 1 - net/core/sock_map.c | 25 +++++++++---------------- net/ipv4/tcp_bpf.c | 42 ++++++++++++++++++++++-------------------- 3 files changed, 31 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 07f947cc80e6..ccf39d80b695 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2196,7 +2196,6 @@ struct sk_msg; struct sk_psock; int tcp_bpf_init(struct sock *sk); -void tcp_bpf_reinit(struct sock *sk); int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, int flags); int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, diff --git a/net/core/sock_map.c b/net/core/sock_map.c index cb8f740f7949..fafcbd22ecba 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -145,8 +145,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, struct sock *sk) { struct bpf_prog *msg_parser, *skb_parser, *skb_verdict; - bool skb_progs, sk_psock_is_new = false; struct sk_psock *psock; + bool skb_progs; int ret; skb_verdict = READ_ONCE(progs->skb_verdict); @@ -191,18 +191,14 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, ret = -ENOMEM; goto out_progs; } - sk_psock_is_new = true; } if (msg_parser) psock_set_prog(&psock->progs.msg_parser, msg_parser); - if (sk_psock_is_new) { - ret = tcp_bpf_init(sk); - if (ret < 0) - goto out_drop; - } else { - tcp_bpf_reinit(sk); - } + + ret = tcp_bpf_init(sk); + if (ret < 0) + goto out_drop; write_lock_bh(&sk->sk_callback_lock); if (skb_progs && !psock->parser.enabled) { @@ -239,15 +235,12 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk) if (IS_ERR(psock)) return PTR_ERR(psock); - if (psock) { - tcp_bpf_reinit(sk); - return 0; + if (!psock) { + psock = sk_psock_init(sk, map->numa_node); + if (!psock) + return -ENOMEM; } - psock = sk_psock_init(sk, map->numa_node); - if (!psock) - return -ENOMEM; - ret = tcp_bpf_init(sk); if (ret < 0) sk_psock_put(sk, psock); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 3327afa05c3d..ed8a8f3c9afe 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -629,14 +629,6 @@ static int __init tcp_bpf_v4_build_proto(void) } core_initcall(tcp_bpf_v4_build_proto); -static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock) -{ - int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; - int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; - - sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]); -} - static int tcp_bpf_assert_proto_ops(struct proto *ops) { /* In order to avoid retpoline, we make assumptions when we call @@ -648,34 +640,44 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops) ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP; } -void tcp_bpf_reinit(struct sock *sk) +static struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) { - struct sk_psock *psock; + int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; + int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; - sock_owned_by_me(sk); + if (!psock->sk_proto) { + struct proto *ops = READ_ONCE(sk->sk_prot); - rcu_read_lock(); - psock = sk_psock(sk); - tcp_bpf_update_sk_prot(sk, psock); - rcu_read_unlock(); + if (tcp_bpf_assert_proto_ops(ops)) + return ERR_PTR(-EINVAL); + + tcp_bpf_check_v6_needs_rebuild(sk, ops); + } + + return &tcp_bpf_prots[family][config]; } int tcp_bpf_init(struct sock *sk) { - struct proto *ops = READ_ONCE(sk->sk_prot); struct sk_psock *psock; + struct proto *prot; sock_owned_by_me(sk); rcu_read_lock(); psock = sk_psock(sk); - if (unlikely(!psock || psock->sk_proto || - tcp_bpf_assert_proto_ops(ops))) { + if (unlikely(!psock)) { rcu_read_unlock(); return -EINVAL; } - tcp_bpf_check_v6_needs_rebuild(sk, ops); - tcp_bpf_update_sk_prot(sk, psock); + + prot = tcp_bpf_get_proto(sk, psock); + if (IS_ERR(prot)) { + rcu_read_unlock(); + return PTR_ERR(prot); + } + + sk_psock_update_proto(sk, psock, prot); rcu_read_unlock(); return 0; } -- cgit v1.2.3 From f747632b608f90217a4e9ebb1deba8a37612aa32 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:36 +0000 Subject: bpf: sockmap: Move generic sockmap hooks from BPF TCP The init, close and unhash handlers from TCP sockmap are generic, and can be reused by UDP sockmap. Move the helpers into the sockmap code base and expose them. This requires tcp_bpf_get_proto and tcp_bpf_clone to be conditional on BPF_STREAM_PARSER. The moved functions are unmodified, except that sk_psock_unlink is renamed to sock_map_unlink to better match its behaviour. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-6-lmb@cloudflare.com --- include/linux/bpf.h | 4 +- include/linux/skmsg.h | 28 ------------- include/net/tcp.h | 15 ++++--- net/core/sock_map.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++--- net/ipv4/tcp_bpf.c | 84 +++------------------------------------ 5 files changed, 118 insertions(+), 119 deletions(-) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 40c53924571d..94a329b9da81 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1419,6 +1419,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) #if defined(CONFIG_BPF_STREAM_PARSER) int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +void sock_map_unhash(struct sock *sk); +void sock_map_close(struct sock *sk, long timeout); #else static inline int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which) @@ -1431,7 +1433,7 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr, { return -EINVAL; } -#endif +#endif /* CONFIG_BPF_STREAM_PARSER */ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 2be51b7a5800..8a709f63c5e5 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -323,14 +323,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link) } struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock); -#if defined(CONFIG_BPF_STREAM_PARSER) -void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link); -#else -static inline void sk_psock_unlink(struct sock *sk, - struct sk_psock_link *link) -{ -} -#endif void __sk_psock_purge_ingress_msg(struct sk_psock *psock); @@ -399,26 +391,6 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock, return test_bit(bit, &psock->state); } -static inline struct sk_psock *sk_psock_get_checked(struct sock *sk) -{ - struct sk_psock *psock; - - rcu_read_lock(); - psock = sk_psock(sk); - if (psock) { - if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { - psock = ERR_PTR(-EBUSY); - goto out; - } - - if (!refcount_inc_not_zero(&psock->refcnt)) - psock = ERR_PTR(-EBUSY); - } -out: - rcu_read_unlock(); - return psock; -} - static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; diff --git a/include/net/tcp.h b/include/net/tcp.h index ad3abeaa703e..43fa07a36fa6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2195,19 +2195,22 @@ void tcp_update_ulp(struct sock *sk, struct proto *p, struct sk_msg; struct sk_psock; +#ifdef CONFIG_BPF_STREAM_PARSER +struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); +void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); +#else +static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) +{ +} +#endif /* CONFIG_BPF_STREAM_PARSER */ + #ifdef CONFIG_NET_SOCK_MSG -int tcp_bpf_init(struct sock *sk); int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, int flags); int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); -void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); -#else -static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) -{ -} #endif /* CONFIG_NET_SOCK_MSG */ /* Call BPF_SOCK_OPS program that returns an int. If the return value diff --git a/net/core/sock_map.c b/net/core/sock_map.c index fafcbd22ecba..cb240d87e068 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -141,6 +141,51 @@ static void sock_map_unref(struct sock *sk, void *link_raw) } } +static int sock_map_init_proto(struct sock *sk) +{ + struct sk_psock *psock; + struct proto *prot; + + sock_owned_by_me(sk); + + rcu_read_lock(); + psock = sk_psock(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + return -EINVAL; + } + + prot = tcp_bpf_get_proto(sk, psock); + if (IS_ERR(prot)) { + rcu_read_unlock(); + return PTR_ERR(prot); + } + + sk_psock_update_proto(sk, psock, prot); + rcu_read_unlock(); + return 0; +} + +static struct sk_psock *sock_map_psock_get_checked(struct sock *sk) +{ + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock(sk); + if (psock) { + if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { + psock = ERR_PTR(-EBUSY); + goto out; + } + + if (!refcount_inc_not_zero(&psock->refcnt)) + psock = ERR_PTR(-EBUSY); + } +out: + rcu_read_unlock(); + return psock; +} + static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, struct sock *sk) { @@ -172,7 +217,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, } } - psock = sk_psock_get_checked(sk); + psock = sock_map_psock_get_checked(sk); if (IS_ERR(psock)) { ret = PTR_ERR(psock); goto out_progs; @@ -196,7 +241,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, if (msg_parser) psock_set_prog(&psock->progs.msg_parser, msg_parser); - ret = tcp_bpf_init(sk); + ret = sock_map_init_proto(sk); if (ret < 0) goto out_drop; @@ -231,7 +276,7 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk) struct sk_psock *psock; int ret; - psock = sk_psock_get_checked(sk); + psock = sock_map_psock_get_checked(sk); if (IS_ERR(psock)) return PTR_ERR(psock); @@ -241,7 +286,7 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk) return -ENOMEM; } - ret = tcp_bpf_init(sk); + ret = sock_map_init_proto(sk); if (ret < 0) sk_psock_put(sk, psock); return ret; @@ -1120,7 +1165,7 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, return 0; } -void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link) +static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link) { switch (link->map->map_type) { case BPF_MAP_TYPE_SOCKMAP: @@ -1133,3 +1178,54 @@ void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link) break; } } + +static void sock_map_remove_links(struct sock *sk, struct sk_psock *psock) +{ + struct sk_psock_link *link; + + while ((link = sk_psock_link_pop(psock))) { + sock_map_unlink(sk, link); + sk_psock_free_link(link); + } +} + +void sock_map_unhash(struct sock *sk) +{ + void (*saved_unhash)(struct sock *sk); + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + if (sk->sk_prot->unhash) + sk->sk_prot->unhash(sk); + return; + } + + saved_unhash = psock->saved_unhash; + sock_map_remove_links(sk, psock); + rcu_read_unlock(); + saved_unhash(sk); +} + +void sock_map_close(struct sock *sk, long timeout) +{ + void (*saved_close)(struct sock *sk, long timeout); + struct sk_psock *psock; + + lock_sock(sk); + rcu_read_lock(); + psock = sk_psock(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + release_sock(sk); + return sk->sk_prot->close(sk, timeout); + } + + saved_close = psock->saved_close; + sock_map_remove_links(sk, psock); + rcu_read_unlock(); + release_sock(sk); + saved_close(sk, timeout); +} diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index ed8a8f3c9afe..fe7b4fbc31c1 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -528,57 +528,7 @@ out_err: return copied ? copied : err; } -static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock) -{ - struct sk_psock_link *link; - - while ((link = sk_psock_link_pop(psock))) { - sk_psock_unlink(sk, link); - sk_psock_free_link(link); - } -} - -static void tcp_bpf_unhash(struct sock *sk) -{ - void (*saved_unhash)(struct sock *sk); - struct sk_psock *psock; - - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - if (sk->sk_prot->unhash) - sk->sk_prot->unhash(sk); - return; - } - - saved_unhash = psock->saved_unhash; - tcp_bpf_remove(sk, psock); - rcu_read_unlock(); - saved_unhash(sk); -} - -static void tcp_bpf_close(struct sock *sk, long timeout) -{ - void (*saved_close)(struct sock *sk, long timeout); - struct sk_psock *psock; - - lock_sock(sk); - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - release_sock(sk); - return sk->sk_prot->close(sk, timeout); - } - - saved_close = psock->saved_close; - tcp_bpf_remove(sk, psock); - rcu_read_unlock(); - release_sock(sk); - saved_close(sk, timeout); -} - +#ifdef CONFIG_BPF_STREAM_PARSER enum { TCP_BPF_IPV4, TCP_BPF_IPV6, @@ -599,8 +549,8 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], struct proto *base) { prot[TCP_BPF_BASE] = *base; - prot[TCP_BPF_BASE].unhash = tcp_bpf_unhash; - prot[TCP_BPF_BASE].close = tcp_bpf_close; + prot[TCP_BPF_BASE].unhash = sock_map_unhash; + prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read; @@ -640,7 +590,7 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops) ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP; } -static struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) +struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) { int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; @@ -657,31 +607,6 @@ static struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) return &tcp_bpf_prots[family][config]; } -int tcp_bpf_init(struct sock *sk) -{ - struct sk_psock *psock; - struct proto *prot; - - sock_owned_by_me(sk); - - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - return -EINVAL; - } - - prot = tcp_bpf_get_proto(sk, psock); - if (IS_ERR(prot)) { - rcu_read_unlock(); - return PTR_ERR(prot); - } - - sk_psock_update_proto(sk, psock, prot); - rcu_read_unlock(); - return 0; -} - /* If a child got cloned from a listening socket that had tcp_bpf * protocol callbacks installed, we need to restore the callbacks to * the default ones because the child does not inherit the psock state @@ -695,3 +620,4 @@ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE]) newsk->sk_prot = sk->sk_prot_creator; } +#endif /* CONFIG_BPF_STREAM_PARSER */ -- cgit v1.2.3 From cb21802b39632ba1fa9b31ea134d2079a47600ef Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:37 +0000 Subject: bpf: sockmap: Simplify sock_map_init_proto We can take advantage of the fact that both callers of sock_map_init_proto are holding a RCU read lock, and have verified that psock is valid. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-7-lmb@cloudflare.com --- net/core/sock_map.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index cb240d87e068..edfdce17b951 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -141,28 +141,17 @@ static void sock_map_unref(struct sock *sk, void *link_raw) } } -static int sock_map_init_proto(struct sock *sk) +static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock) { - struct sk_psock *psock; struct proto *prot; sock_owned_by_me(sk); - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - return -EINVAL; - } - prot = tcp_bpf_get_proto(sk, psock); - if (IS_ERR(prot)) { - rcu_read_unlock(); + if (IS_ERR(prot)) return PTR_ERR(prot); - } sk_psock_update_proto(sk, psock, prot); - rcu_read_unlock(); return 0; } @@ -241,7 +230,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, if (msg_parser) psock_set_prog(&psock->progs.msg_parser, msg_parser); - ret = sock_map_init_proto(sk); + ret = sock_map_init_proto(sk, psock); if (ret < 0) goto out_drop; @@ -286,7 +275,7 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk) return -ENOMEM; } - ret = sock_map_init_proto(sk); + ret = sock_map_init_proto(sk, psock); if (ret < 0) sk_psock_put(sk, psock); return ret; -- cgit v1.2.3 From edc6741cc66059532ba621928e3f1b02a53a2f39 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:38 +0000 Subject: bpf: Add sockmap hooks for UDP sockets Add basic psock hooks for UDP sockets. This allows adding and removing sockets, as well as automatic removal on unhash and close. Signed-off-by: Lorenz Bauer Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-8-lmb@cloudflare.com --- MAINTAINERS | 1 + include/net/udp.h | 5 +++++ net/ipv4/Makefile | 1 + net/ipv4/udp_bpf.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+) create mode 100644 net/ipv4/udp_bpf.c (limited to 'net') diff --git a/MAINTAINERS b/MAINTAINERS index c23884e084be..14554bde1c06 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9370,6 +9370,7 @@ F: include/linux/skmsg.h F: net/core/skmsg.c F: net/core/sock_map.c F: net/ipv4/tcp_bpf.c +F: net/ipv4/udp_bpf.c LANTIQ / INTEL Ethernet drivers M: Hauke Mehrtens diff --git a/include/net/udp.h b/include/net/udp.h index e55d5f765807..a8fa6c0c6ded 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -503,4 +503,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, return segs; } +#ifdef CONFIG_BPF_STREAM_PARSER +struct sk_psock; +struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); +#endif /* BPF_STREAM_PARSER */ + #endif /* _UDP_H */ diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9d97bace13c8..9e1a186a3671 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o +obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c new file mode 100644 index 000000000000..eddd973e6575 --- /dev/null +++ b/net/ipv4/udp_bpf.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Cloudflare Ltd https://cloudflare.com */ + +#include +#include +#include + +enum { + UDP_BPF_IPV4, + UDP_BPF_IPV6, + UDP_BPF_NUM_PROTS, +}; + +static struct proto *udpv6_prot_saved __read_mostly; +static DEFINE_SPINLOCK(udpv6_prot_lock); +static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS]; + +static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base) +{ + *prot = *base; + prot->unhash = sock_map_unhash; + prot->close = sock_map_close; +} + +static void udp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops) +{ + if (sk->sk_family == AF_INET6 && + unlikely(ops != smp_load_acquire(&udpv6_prot_saved))) { + spin_lock_bh(&udpv6_prot_lock); + if (likely(ops != udpv6_prot_saved)) { + udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV6], ops); + smp_store_release(&udpv6_prot_saved, ops); + } + spin_unlock_bh(&udpv6_prot_lock); + } +} + +static int __init udp_bpf_v4_build_proto(void) +{ + udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot); + return 0; +} +core_initcall(udp_bpf_v4_build_proto); + +struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) +{ + int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6; + + if (!psock->sk_proto) + udp_bpf_check_v6_needs_rebuild(sk, READ_ONCE(sk->sk_prot)); + + return &udp_bpf_prots[family]; +} -- cgit v1.2.3 From 7b98cd42b0492237887b62f5ba05931169bcfcf6 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:39 +0000 Subject: bpf: sockmap: Add UDP support Allow adding hashed UDP sockets to sockmaps. Signed-off-by: Lorenz Bauer Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-9-lmb@cloudflare.com --- net/core/sock_map.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index edfdce17b951..a7075b3b4489 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -11,6 +11,7 @@ #include #include #include +#include struct bpf_stab { struct bpf_map map; @@ -147,7 +148,19 @@ static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock) sock_owned_by_me(sk); - prot = tcp_bpf_get_proto(sk, psock); + switch (sk->sk_type) { + case SOCK_STREAM: + prot = tcp_bpf_get_proto(sk, psock); + break; + + case SOCK_DGRAM: + prot = udp_bpf_get_proto(sk, psock); + break; + + default: + return -EINVAL; + } + if (IS_ERR(prot)) return PTR_ERR(prot); @@ -162,7 +175,7 @@ static struct sk_psock *sock_map_psock_get_checked(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); if (psock) { - if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { + if (sk->sk_prot->close != sock_map_close) { psock = ERR_PTR(-EBUSY); goto out; } @@ -474,15 +487,31 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops) ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB; } -static bool sock_map_sk_is_suitable(const struct sock *sk) +static bool sk_is_tcp(const struct sock *sk) { return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; } +static bool sk_is_udp(const struct sock *sk) +{ + return sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP; +} + +static bool sock_map_sk_is_suitable(const struct sock *sk) +{ + return sk_is_tcp(sk) || sk_is_udp(sk); +} + static bool sock_map_sk_state_allowed(const struct sock *sk) { - return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); + if (sk_is_tcp(sk)) + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); + else if (sk_is_udp(sk)) + return sk_hashed(sk); + + return false; } static int sock_map_update_elem(struct bpf_map *map, void *key, -- cgit v1.2.3 From e08ab0b377a1489760533424437c5f4be7f484a4 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Mon, 9 Mar 2020 13:16:40 -0700 Subject: tcp: add bytes not sent to SCM_TIMESTAMPING_OPT_STATS Add TCP_NLA_BYTES_NOTSENT to SCM_TIMESTAMPING_OPT_STATS that reports bytes in the write queue but not sent. This is the same metric as what is exported with tcp_info.tcpi_notsent_bytes. Signed-off-by: Yousuk Seung Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 1a7fc856e237..f2acb2566333 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -312,6 +312,7 @@ enum { TCP_NLA_REORD_SEEN, /* reordering events seen */ TCP_NLA_SRTT, /* smoothed RTT in usecs */ TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ + TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 48aa457a9516..b7134f76f840 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3344,6 +3344,7 @@ static size_t tcp_opt_stats_get_size(void) nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */ nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */ nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */ + nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */ 0; } @@ -3399,6 +3400,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); + nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT, + max_t(int, 0, tp->write_seq - tp->snd_nxt)); return stats; } -- cgit v1.2.3 From 3f95f55eb55daa17c047d731d1fb7854e5823478 Mon Sep 17 00:00:00 2001 From: Leslie Monis Date: Tue, 10 Mar 2020 00:40:33 +0530 Subject: net: sched: pie: change tc_pie_xstats->prob Commit 105e808c1da2 ("pie: remove pie_vars->accu_prob_overflows") changes the scale of probability values in PIE from (2^64 - 1) to (2^56 - 1). This affects the precision of tc_pie_xstats->prob in user space. This patch ensures user space is unaffected. Suggested-by: Eric Dumazet Signed-off-by: Leslie Monis Signed-off-by: David S. Miller --- net/sched/sch_pie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index f52442d39bf5..c65077f0c0f3 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -493,7 +493,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct pie_sched_data *q = qdisc_priv(sch); struct tc_pie_xstats st = { - .prob = q->vars.prob, + .prob = q->vars.prob << BITS_PER_BYTE, .delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) / NSEC_PER_USEC, .packets_in = q->stats.packets_in, -- cgit v1.2.3 From ec33916d47cbac546e0437b59e6ed779d3c31ac3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 6 Mar 2020 21:29:46 +0100 Subject: mptcp: don't grow mptcp socket receive buffer when rcvbuf is locked The mptcp rcvbuf size is adjusted according to the subflow rcvbuf size. This should not be done if userspace did set a fixed value. Fixes: 600911ff5f72bae ("mptcp: add rmem queue accounting") Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 4c075a9f7ed0..95007e433109 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -141,11 +141,13 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, bool more_data_avail; struct tcp_sock *tp; bool done = false; - int rcvbuf; - rcvbuf = max(ssk->sk_rcvbuf, sk->sk_rcvbuf); - if (rcvbuf > sk->sk_rcvbuf) - sk->sk_rcvbuf = rcvbuf; + if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { + int rcvbuf = max(ssk->sk_rcvbuf, sk->sk_rcvbuf); + + if (rcvbuf > sk->sk_rcvbuf) + sk->sk_rcvbuf = rcvbuf; + } tp = tcp_sk(ssk); do { -- cgit v1.2.3 From 1e09e5818b3a07ee824addf0d048df3797de687e Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Mar 2020 11:49:46 +0100 Subject: pktgen: Allow on loopback device When pktgen is used to measure the performance of dev_queue_xmit() packet handling in the core, it is preferable to not hand down packets to a low-level Ethernet driver as it would distort the measurements. Allow using pktgen on the loopback device, thus constraining measurements to core code. Signed-off-by: Lukas Wunner Signed-off-by: David S. Miller --- net/core/pktgen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index acc849df60b5..f2b3d8dd40f4 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2003,8 +2003,8 @@ static int pktgen_setup_dev(const struct pktgen_net *pn, return -ENODEV; } - if (odev->type != ARPHRD_ETHER) { - pr_err("not an ethernet device: \"%s\"\n", ifname); + if (odev->type != ARPHRD_ETHER && odev->type != ARPHRD_LOOPBACK) { + pr_err("not an ethernet or loopback device: \"%s\"\n", ifname); err = -EINVAL; } else if (!netif_running(odev)) { pr_err("device is down: \"%s\"\n", ifname); -- cgit v1.2.3 From 00b383b8abd1207b2c86e09834ad1617f1dd0388 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 9 Mar 2020 22:48:10 +0100 Subject: Bluetooth: Use bt_dev_err for RPA generation failure message When the RPA generation fails, indicate the error with a device specifc error message. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 2a1b64dbf76e..53179ae856ae 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1499,7 +1499,7 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); if (err < 0) { - BT_ERR("%s failed to generate new RPA", hdev->name); + bt_dev_err(hdev, "failed to generate new RPA"); return err; } -- cgit v1.2.3 From 8a5956197d7eb7a0cbb5b4271111d1bf6e17f25c Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Wed, 11 Mar 2020 14:18:57 +0000 Subject: Bluetooth: fix off by one in err_data_reporting cmd masks. This change fixes the off by one error in the erroneous command bit masks which can lead to the erroneous data commands being sent to a controller that doesn't support them. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9ce98762559b..196edc039b8e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -603,7 +603,7 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) if (hdev->commands[8] & 0x01) hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); - if (hdev->commands[18] & 0x02) + if (hdev->commands[18] & 0x04) hci_req_add(req, HCI_OP_READ_DEF_ERR_DATA_REPORTING, 0, NULL); /* Some older Broadcom based Bluetooth 1.2 controllers do not @@ -844,7 +844,7 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt) /* Set erroneous data reporting if supported to the wideband speech * setting value */ - if (hdev->commands[18] & 0x04) { + if (hdev->commands[18] & 0x08) { bool enabled = hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED); -- cgit v1.2.3 From 72da7b2ccabd5fd93d6b8d0093936e980602652b Mon Sep 17 00:00:00 2001 From: Joseph Hwang Date: Tue, 10 Mar 2020 09:31:50 -0700 Subject: Bluetooth: mgmt: add mgmt_cmd_status in add_advertising If an error occurs during request building in add_advertising(), remember to send MGMT_STATUS_FAILED command status back to bluetoothd. Signed-off-by: Joseph Hwang Signed-off-by: Manish Mandlik Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4da48618b271..b3a7f387da32 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6807,8 +6807,11 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, if (!err) err = hci_req_run(&req, add_advertising_complete); - if (err < 0) + if (err < 0) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_FAILED); mgmt_pending_remove(cmd); + } unlock: hci_dev_unlock(hdev); -- cgit v1.2.3 From 9952d90ea2885d7cbf80cd233f694f09a9c0eaec Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 11 Mar 2020 08:54:00 -0700 Subject: Bluetooth: Handle PM_SUSPEND_PREPARE and PM_POST_SUSPEND Register for PM_SUSPEND_PREPARE and PM_POST_SUSPEND to make sure the Bluetooth controller is prepared correctly for suspend/resume. Implement the registration, scheduling and task handling portions only in this patch. Signed-off-by: Abhishek Pandit-Subedi Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 23 +++++++++++ net/bluetooth/hci_core.c | 86 ++++++++++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.c | 15 +++++++ net/bluetooth/hci_request.h | 2 + 4 files changed, 126 insertions(+) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c498ac113930..d6f694b436bf 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -88,6 +88,20 @@ struct discovery_state { unsigned long scan_duration; }; +#define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ + +enum suspend_tasks { + SUSPEND_POWERING_DOWN, + + SUSPEND_PREPARE_NOTIFIER, + __SUSPEND_NUM_TASKS +}; + +enum suspended_state { + BT_RUNNING = 0, + BT_SUSPENDED, +}; + struct hci_conn_hash { struct list_head list; unsigned int acl_num; @@ -390,6 +404,15 @@ struct hci_dev { void *smp_bredr_data; struct discovery_state discovery; + + struct notifier_block suspend_notifier; + struct work_struct suspend_prepare; + enum suspended_state suspend_state_next; + enum suspended_state suspend_state; + + wait_queue_head_t suspend_wait_q; + DECLARE_BITMAP(suspend_tasks, __SUSPEND_NUM_TASKS); + struct hci_conn_hash conn_hash; struct list_head mgmt_pending; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 196edc039b8e..39aa21a1fe92 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include @@ -1787,6 +1789,9 @@ int hci_dev_do_close(struct hci_dev *hdev) clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); + if (test_and_clear_bit(SUSPEND_POWERING_DOWN, hdev->suspend_tasks)) + wake_up(&hdev->suspend_wait_q); + /* After this point our queues are empty * and no tasks are scheduled. */ hdev->close(hdev); @@ -3264,6 +3269,78 @@ void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, } } +static int hci_suspend_wait_event(struct hci_dev *hdev) +{ +#define WAKE_COND \ + (find_first_bit(hdev->suspend_tasks, __SUSPEND_NUM_TASKS) == \ + __SUSPEND_NUM_TASKS) + + int i; + int ret = wait_event_timeout(hdev->suspend_wait_q, + WAKE_COND, SUSPEND_NOTIFIER_TIMEOUT); + + if (ret == 0) { + bt_dev_dbg(hdev, "Timed out waiting for suspend"); + for (i = 0; i < __SUSPEND_NUM_TASKS; ++i) { + if (test_bit(i, hdev->suspend_tasks)) + bt_dev_dbg(hdev, "Bit %d is set", i); + clear_bit(i, hdev->suspend_tasks); + } + + ret = -ETIMEDOUT; + } else { + ret = 0; + } + + return ret; +} + +static void hci_prepare_suspend(struct work_struct *work) +{ + struct hci_dev *hdev = + container_of(work, struct hci_dev, suspend_prepare); + + hci_dev_lock(hdev); + hci_req_prepare_suspend(hdev, hdev->suspend_state_next); + hci_dev_unlock(hdev); +} + +static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct hci_dev *hdev = + container_of(nb, struct hci_dev, suspend_notifier); + int ret = 0; + + /* If powering down, wait for completion. */ + if (mgmt_powering_down(hdev)) { + set_bit(SUSPEND_POWERING_DOWN, hdev->suspend_tasks); + ret = hci_suspend_wait_event(hdev); + if (ret) + goto done; + } + + /* Suspend notifier should only act on events when powered. */ + if (!hdev_is_powered(hdev)) + goto done; + + if (action == PM_SUSPEND_PREPARE) { + hdev->suspend_state_next = BT_SUSPENDED; + set_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + queue_work(hdev->req_workqueue, &hdev->suspend_prepare); + + ret = hci_suspend_wait_event(hdev); + } else if (action == PM_POST_SUSPEND) { + hdev->suspend_state_next = BT_RUNNING; + set_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + queue_work(hdev->req_workqueue, &hdev->suspend_prepare); + + ret = hci_suspend_wait_event(hdev); + } + +done: + return ret ? notifier_from_errno(-EBUSY) : NOTIFY_STOP; +} /* Alloc HCI device */ struct hci_dev *hci_alloc_dev(void) { @@ -3341,6 +3418,7 @@ struct hci_dev *hci_alloc_dev(void) INIT_WORK(&hdev->tx_work, hci_tx_work); INIT_WORK(&hdev->power_on, hci_power_on); INIT_WORK(&hdev->error_reset, hci_error_reset); + INIT_WORK(&hdev->suspend_prepare, hci_prepare_suspend); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); @@ -3349,6 +3427,7 @@ struct hci_dev *hci_alloc_dev(void) skb_queue_head_init(&hdev->raw_q); init_waitqueue_head(&hdev->req_wait_q); + init_waitqueue_head(&hdev->suspend_wait_q); INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout); @@ -3460,6 +3539,11 @@ int hci_register_dev(struct hci_dev *hdev) hci_sock_dev_event(hdev, HCI_DEV_REG); hci_dev_hold(hdev); + hdev->suspend_notifier.notifier_call = hci_suspend_notifier; + error = register_pm_notifier(&hdev->suspend_notifier); + if (error) + goto err_wqueue; + queue_work(hdev->req_workqueue, &hdev->power_on); return id; @@ -3493,6 +3577,8 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_dev_do_close(hdev); + unregister_pm_notifier(&hdev->suspend_notifier); + if (!test_bit(HCI_INIT, &hdev->flags) && !hci_dev_test_flag(hdev, HCI_SETUP) && !hci_dev_test_flag(hdev, HCI_CONFIG)) { diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 53179ae856ae..2343166614f0 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -918,6 +918,21 @@ static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance) return adv_instance->scan_rsp_len; } +/* Call with hci_dev_lock */ +void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) +{ + if (next == hdev->suspend_state) { + bt_dev_dbg(hdev, "Same state before and after: %d", next); + goto done; + } + + hdev->suspend_state = next; + +done: + clear_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + wake_up(&hdev->suspend_wait_q); +} + static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) { u8 instance = hdev->cur_adv_instance; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index a7019fbeadd3..0e81614d235e 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -68,6 +68,8 @@ void __hci_req_update_eir(struct hci_request *req); void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); +void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next); + void hci_req_reenable_advertising(struct hci_dev *hdev); void __hci_req_enable_advertising(struct hci_request *req); void __hci_req_disable_advertising(struct hci_request *req); -- cgit v1.2.3 From 4f40afc6c76451daff7d0dcfc8a3d113ccf65bfc Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 11 Mar 2020 08:54:01 -0700 Subject: Bluetooth: Handle BR/EDR devices during suspend To handle BR/EDR devices, we first disable page scan and disconnect all connected devices. Once that is complete, we add event filters (for devices that can wake the system) and re-enable page scan. Signed-off-by: Abhishek Pandit-Subedi Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 17 ++++--- include/net/bluetooth/hci_core.h | 10 +++- net/bluetooth/hci_core.c | 22 ++++++-- net/bluetooth/hci_event.c | 24 +++++++++ net/bluetooth/hci_request.c | 106 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 169 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 4e86f1bb7a87..5f60e135aeb6 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -942,10 +942,14 @@ struct hci_cp_sniff_subrate { #define HCI_OP_RESET 0x0c03 #define HCI_OP_SET_EVENT_FLT 0x0c05 -struct hci_cp_set_event_flt { - __u8 flt_type; - __u8 cond_type; - __u8 condition[]; +#define HCI_SET_EVENT_FLT_SIZE 9 +struct hci_cp_set_event_filter { + __u8 flt_type; + __u8 cond_type; + struct { + bdaddr_t bdaddr; + __u8 auto_accept; + } __packed addr_conn_flt; } __packed; /* Filter types */ @@ -959,8 +963,9 @@ struct hci_cp_set_event_flt { #define HCI_CONN_SETUP_ALLOW_BDADDR 0x02 /* CONN_SETUP Conditions */ -#define HCI_CONN_SETUP_AUTO_OFF 0x01 -#define HCI_CONN_SETUP_AUTO_ON 0x02 +#define HCI_CONN_SETUP_AUTO_OFF 0x01 +#define HCI_CONN_SETUP_AUTO_ON 0x02 +#define HCI_CONN_SETUP_AUTO_ON_WITH_RS 0x03 #define HCI_OP_READ_STORED_LINK_KEY 0x0c0d struct hci_cp_read_stored_link_key { diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d6f694b436bf..1a4d732bdce6 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -91,6 +91,10 @@ struct discovery_state { #define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ enum suspend_tasks { + SUSPEND_SCAN_DISABLE, + SUSPEND_SCAN_ENABLE, + SUSPEND_DISCONNECTING, + SUSPEND_POWERING_DOWN, SUSPEND_PREPARE_NOTIFIER, @@ -99,7 +103,8 @@ enum suspend_tasks { enum suspended_state { BT_RUNNING = 0, - BT_SUSPENDED, + BT_SUSPEND_DISCONNECT, + BT_SUSPEND_COMPLETE, }; struct hci_conn_hash { @@ -409,6 +414,8 @@ struct hci_dev { struct work_struct suspend_prepare; enum suspended_state suspend_state_next; enum suspended_state suspend_state; + bool scanning_paused; + bool suspended; wait_queue_head_t suspend_wait_q; DECLARE_BITMAP(suspend_tasks, __SUSPEND_NUM_TASKS); @@ -418,6 +425,7 @@ struct hci_dev { struct list_head mgmt_pending; struct list_head blacklist; struct list_head whitelist; + struct list_head wakeable; struct list_head uuids; struct list_head link_keys; struct list_head long_term_keys; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 39aa21a1fe92..dbd2ad3a26ed 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3325,16 +3325,31 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, goto done; if (action == PM_SUSPEND_PREPARE) { - hdev->suspend_state_next = BT_SUSPENDED; + /* Suspend consists of two actions: + * - First, disconnect everything and make the controller not + * connectable (disabling scanning) + * - Second, program event filter/whitelist and enable scan + */ + hdev->suspend_state_next = BT_SUSPEND_DISCONNECT; set_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); queue_work(hdev->req_workqueue, &hdev->suspend_prepare); - ret = hci_suspend_wait_event(hdev); + + /* If the disconnect portion failed, don't attempt to complete + * by configuring the whitelist. The suspend notifier will + * follow a cancelled suspend with a PM_POST_SUSPEND + * notification. + */ + if (!ret) { + hdev->suspend_state_next = BT_SUSPEND_COMPLETE; + set_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + queue_work(hdev->req_workqueue, &hdev->suspend_prepare); + ret = hci_suspend_wait_event(hdev); + } } else if (action == PM_POST_SUSPEND) { hdev->suspend_state_next = BT_RUNNING; set_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); queue_work(hdev->req_workqueue, &hdev->suspend_prepare); - ret = hci_suspend_wait_event(hdev); } @@ -3399,6 +3414,7 @@ struct hci_dev *hci_alloc_dev(void) INIT_LIST_HEAD(&hdev->mgmt_pending); INIT_LIST_HEAD(&hdev->blacklist); INIT_LIST_HEAD(&hdev->whitelist); + INIT_LIST_HEAD(&hdev->wakeable); INIT_LIST_HEAD(&hdev->uuids); INIT_LIST_HEAD(&hdev->link_keys); INIT_LIST_HEAD(&hdev->long_term_keys); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b9186026508e..0908eaa7cacf 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2505,6 +2505,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_conn_complete *ev = (void *) skb->data; + struct inquiry_entry *ie; struct hci_conn *conn; BT_DBG("%s", hdev->name); @@ -2513,6 +2514,21 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { + /* Connection may not exist if auto-connected. Check the inquiry + * cache to see if we've already discovered this bdaddr before. + * If found and link is an ACL type, create a connection class + * automatically. + */ + ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr); + if (ie && ev->link_type == ACL_LINK) { + conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr, + HCI_ROLE_SLAVE); + if (!conn) { + bt_dev_err(hdev, "no memory for new conn"); + goto unlock; + } + } + if (ev->link_type != SCO_LINK) goto unlock; @@ -2774,6 +2790,14 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_disconn_cfm(conn, ev->reason); hci_conn_del(conn); + /* The suspend notifier is waiting for all devices to disconnect so + * clear the bit from pending tasks and inform the wait queue. + */ + if (list_empty(&hdev->conn_hash.list) && + test_and_clear_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks)) { + wake_up(&hdev->suspend_wait_q); + } + /* Re-enable advertising if necessary, since it might * have been disabled by the connection. From the * HCI_LE_Set_Advertise_Enable command description in diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 2343166614f0..051e1b16c988 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -918,15 +918,118 @@ static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance) return adv_instance->scan_rsp_len; } +static void hci_req_clear_event_filter(struct hci_request *req) +{ + struct hci_cp_set_event_filter f; + + memset(&f, 0, sizeof(f)); + f.flt_type = HCI_FLT_CLEAR_ALL; + hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &f); + + /* Update page scan state (since we may have modified it when setting + * the event filter). + */ + __hci_req_update_scan(req); +} + +static void hci_req_set_event_filter(struct hci_request *req) +{ + struct bdaddr_list *b; + struct hci_cp_set_event_filter f; + struct hci_dev *hdev = req->hdev; + u8 scan; + + /* Always clear event filter when starting */ + hci_req_clear_event_filter(req); + + list_for_each_entry(b, &hdev->wakeable, list) { + memset(&f, 0, sizeof(f)); + bacpy(&f.addr_conn_flt.bdaddr, &b->bdaddr); + f.flt_type = HCI_FLT_CONN_SETUP; + f.cond_type = HCI_CONN_SETUP_ALLOW_BDADDR; + f.addr_conn_flt.auto_accept = HCI_CONN_SETUP_AUTO_ON; + + bt_dev_dbg(hdev, "Adding event filters for %pMR", &b->bdaddr); + hci_req_add(req, HCI_OP_SET_EVENT_FLT, sizeof(f), &f); + } + + scan = !list_empty(&hdev->wakeable) ? SCAN_PAGE : SCAN_DISABLED; + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); +} + +static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + bt_dev_dbg(hdev, "Request complete opcode=0x%x, status=0x%x", opcode, + status); + if (test_and_clear_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks) || + test_and_clear_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks)) { + wake_up(&hdev->suspend_wait_q); + } +} + /* Call with hci_dev_lock */ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) { + struct hci_conn *conn; + struct hci_request req; + u8 page_scan; + int disconnect_counter; + if (next == hdev->suspend_state) { bt_dev_dbg(hdev, "Same state before and after: %d", next); goto done; } hdev->suspend_state = next; + hci_req_init(&req, hdev); + + if (next == BT_SUSPEND_DISCONNECT) { + /* Mark device as suspended */ + hdev->suspended = true; + + /* Disable page scan */ + page_scan = SCAN_DISABLED; + hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &page_scan); + + /* Mark task needing completion */ + set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); + + /* Prevent disconnects from causing scanning to be re-enabled */ + hdev->scanning_paused = true; + + /* Run commands before disconnecting */ + hci_req_run(&req, suspend_req_complete); + + disconnect_counter = 0; + /* Soft disconnect everything (power off) */ + list_for_each_entry(conn, &hdev->conn_hash.list, list) { + hci_disconnect(conn, HCI_ERROR_REMOTE_POWER_OFF); + disconnect_counter++; + } + + if (disconnect_counter > 0) { + bt_dev_dbg(hdev, + "Had %d disconnects. Will wait on them", + disconnect_counter); + set_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks); + } + } else if (next == BT_SUSPEND_COMPLETE) { + /* Unpause to take care of updating scanning params */ + hdev->scanning_paused = false; + /* Enable event filter for paired devices */ + hci_req_set_event_filter(&req); + /* Pause scan changes again. */ + hdev->scanning_paused = true; + hci_req_run(&req, suspend_req_complete); + } else { + hdev->suspended = false; + hdev->scanning_paused = false; + + hci_req_clear_event_filter(&req); + hci_req_run(&req, suspend_req_complete); + } + + hdev->suspend_state = next; done: clear_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); @@ -2030,6 +2133,9 @@ void __hci_req_update_scan(struct hci_request *req) if (mgmt_powering_down(hdev)) return; + if (hdev->scanning_paused) + return; + if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) || disconnected_whitelist_entries(hdev)) scan = SCAN_PAGE; -- cgit v1.2.3 From dd522a7429b07e4441871ae75ebbfcf53635bdd4 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 11 Mar 2020 08:54:02 -0700 Subject: Bluetooth: Handle LE devices during suspend To handle LE devices, we must first disable passive scanning and disconnect all connected devices. Once that is complete, we update the whitelist and re-enable scanning Signed-off-by: Abhishek Pandit-Subedi Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_request.c | 166 ++++++++++++++++++++++++++------------- 2 files changed, 113 insertions(+), 54 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1a4d732bdce6..2d58485d0335 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -607,6 +607,7 @@ struct hci_conn_params { struct hci_conn *conn; bool explicit_connect; + bool wakeable; }; extern struct list_head hci_dev_list; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 051e1b16c988..11624645cfcf 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -34,6 +34,9 @@ #define HCI_REQ_PEND 1 #define HCI_REQ_CANCELED 2 +#define LE_SUSPEND_SCAN_WINDOW 0x0012 +#define LE_SUSPEND_SCAN_INTERVAL 0x0060 + void hci_req_init(struct hci_request *req, struct hci_dev *hdev) { skb_queue_head_init(&req->cmd_q); @@ -654,6 +657,11 @@ void hci_req_add_le_scan_disable(struct hci_request *req) { struct hci_dev *hdev = req->hdev; + if (hdev->scanning_paused) { + bt_dev_dbg(hdev, "Scanning is paused for suspend"); + return; + } + if (use_ext_scan(hdev)) { struct hci_cp_le_set_ext_scan_enable cp; @@ -670,15 +678,55 @@ void hci_req_add_le_scan_disable(struct hci_request *req) } } -static void add_to_white_list(struct hci_request *req, - struct hci_conn_params *params) +static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr, + u8 bdaddr_type) +{ + struct hci_cp_le_del_from_white_list cp; + + cp.bdaddr_type = bdaddr_type; + bacpy(&cp.bdaddr, bdaddr); + + bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from whitelist", &cp.bdaddr, + cp.bdaddr_type); + hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, sizeof(cp), &cp); +} + +/* Adds connection to white list if needed. On error, returns -1. */ +static int add_to_white_list(struct hci_request *req, + struct hci_conn_params *params, u8 *num_entries, + bool allow_rpa) { struct hci_cp_le_add_to_white_list cp; + struct hci_dev *hdev = req->hdev; + + /* Already in white list */ + if (hci_bdaddr_list_lookup(&hdev->le_white_list, ¶ms->addr, + params->addr_type)) + return 0; + + /* Select filter policy to accept all advertising */ + if (*num_entries >= hdev->le_white_list_size) + return -1; + /* White list can not be used with RPAs */ + if (!allow_rpa && + hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) { + return -1; + } + + /* During suspend, only wakeable devices can be in whitelist */ + if (hdev->suspended && !params->wakeable) + return 0; + + *num_entries += 1; cp.bdaddr_type = params->addr_type; bacpy(&cp.bdaddr, ¶ms->addr); + bt_dev_dbg(hdev, "Add %pMR (0x%x) to whitelist", &cp.bdaddr, + cp.bdaddr_type); hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); + + return 0; } static u8 update_white_list(struct hci_request *req) @@ -686,7 +734,14 @@ static u8 update_white_list(struct hci_request *req) struct hci_dev *hdev = req->hdev; struct hci_conn_params *params; struct bdaddr_list *b; - uint8_t white_list_entries = 0; + u8 num_entries = 0; + bool pend_conn, pend_report; + /* We allow whitelisting even with RPAs in suspend. In the worst case, + * we won't be able to wake from devices that use the privacy1.2 + * features. Additionally, once we support privacy1.2 and IRK + * offloading, we can update this to also check for those conditions. + */ + bool allow_rpa = hdev->suspended; /* Go through the current white list programmed into the * controller one by one and check if that address is still @@ -695,29 +750,28 @@ static u8 update_white_list(struct hci_request *req) * command to remove it from the controller. */ list_for_each_entry(b, &hdev->le_white_list, list) { - /* If the device is neither in pend_le_conns nor - * pend_le_reports then remove it from the whitelist. + pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns, + &b->bdaddr, + b->bdaddr_type); + pend_report = hci_pend_le_action_lookup(&hdev->pend_le_reports, + &b->bdaddr, + b->bdaddr_type); + + /* If the device is not likely to connect or report, + * remove it from the whitelist. */ - if (!hci_pend_le_action_lookup(&hdev->pend_le_conns, - &b->bdaddr, b->bdaddr_type) && - !hci_pend_le_action_lookup(&hdev->pend_le_reports, - &b->bdaddr, b->bdaddr_type)) { - struct hci_cp_le_del_from_white_list cp; - - cp.bdaddr_type = b->bdaddr_type; - bacpy(&cp.bdaddr, &b->bdaddr); - - hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, - sizeof(cp), &cp); + if (!pend_conn && !pend_report) { + del_from_white_list(req, &b->bdaddr, b->bdaddr_type); continue; } - if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { - /* White list can not be used with RPAs */ + /* White list can not be used with RPAs */ + if (!allow_rpa && + hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { return 0x00; } - white_list_entries++; + num_entries++; } /* Since all no longer valid white list entries have been @@ -731,47 +785,17 @@ static u8 update_white_list(struct hci_request *req) * white list. */ list_for_each_entry(params, &hdev->pend_le_conns, action) { - if (hci_bdaddr_list_lookup(&hdev->le_white_list, - ¶ms->addr, params->addr_type)) - continue; - - if (white_list_entries >= hdev->le_white_list_size) { - /* Select filter policy to accept all advertising */ + if (add_to_white_list(req, params, &num_entries, allow_rpa)) return 0x00; - } - - if (hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - /* White list can not be used with RPAs */ - return 0x00; - } - - white_list_entries++; - add_to_white_list(req, params); } /* After adding all new pending connections, walk through * the list of pending reports and also add these to the - * white list if there is still space. + * white list if there is still space. Abort if space runs out. */ list_for_each_entry(params, &hdev->pend_le_reports, action) { - if (hci_bdaddr_list_lookup(&hdev->le_white_list, - ¶ms->addr, params->addr_type)) - continue; - - if (white_list_entries >= hdev->le_white_list_size) { - /* Select filter policy to accept all advertising */ + if (add_to_white_list(req, params, &num_entries, allow_rpa)) return 0x00; - } - - if (hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - /* White list can not be used with RPAs */ - return 0x00; - } - - white_list_entries++; - add_to_white_list(req, params); } /* Select filter policy to use white list */ @@ -866,6 +890,12 @@ void hci_req_add_le_passive_scan(struct hci_request *req) struct hci_dev *hdev = req->hdev; u8 own_addr_type; u8 filter_policy; + u8 window, interval; + + if (hdev->scanning_paused) { + bt_dev_dbg(hdev, "Scanning is paused for suspend"); + return; + } /* Set require_privacy to false since no SCAN_REQ are send * during passive scanning. Not using an non-resolvable address @@ -896,8 +926,17 @@ void hci_req_add_le_passive_scan(struct hci_request *req) (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) filter_policy |= 0x02; - hci_req_start_scan(req, LE_SCAN_PASSIVE, hdev->le_scan_interval, - hdev->le_scan_window, own_addr_type, filter_policy); + if (hdev->suspended) { + window = LE_SUSPEND_SCAN_WINDOW; + interval = LE_SUSPEND_SCAN_INTERVAL; + } else { + window = hdev->le_scan_window; + interval = hdev->le_scan_interval; + } + + bt_dev_dbg(hdev, "LE passive scan with whitelist = %d", filter_policy); + hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window, + own_addr_type, filter_policy); } static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance) @@ -957,6 +996,18 @@ static void hci_req_set_event_filter(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } +static void hci_req_config_le_suspend_scan(struct hci_request *req) +{ + /* Can't change params without disabling first */ + hci_req_add_le_scan_disable(req); + + /* Configure params and enable scanning */ + hci_req_add_le_passive_scan(req); + + /* Block suspend notifier on response */ + set_bit(SUSPEND_SCAN_ENABLE, req->hdev->suspend_tasks); +} + static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode) { bt_dev_dbg(hdev, "Request complete opcode=0x%x, status=0x%x", opcode, @@ -991,6 +1042,9 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) page_scan = SCAN_DISABLED; hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &page_scan); + /* Disable LE passive scan */ + hci_req_add_le_scan_disable(&req); + /* Mark task needing completion */ set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); @@ -1018,6 +1072,8 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) hdev->scanning_paused = false; /* Enable event filter for paired devices */ hci_req_set_event_filter(&req); + /* Enable passive scan at lower duty cycle */ + hci_req_config_le_suspend_scan(&req); /* Pause scan changes again. */ hdev->scanning_paused = true; hci_req_run(&req, suspend_req_complete); @@ -1026,6 +1082,8 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) hdev->scanning_paused = false; hci_req_clear_event_filter(&req); + /* Reset passive/background scanning to normal */ + hci_req_config_le_suspend_scan(&req); hci_req_run(&req, suspend_req_complete); } -- cgit v1.2.3 From 4867bd007d25a8dfd4ffc558534f7aec8b361789 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 11 Mar 2020 08:54:03 -0700 Subject: Bluetooth: Pause discovery and advertising during suspend To prevent spurious wake ups, we disable any discovery or advertising when we enter suspend and restore it when we exit suspend. While paused, we disable any management requests to modify discovery or advertising. Signed-off-by: Abhishek Pandit-Subedi Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 11 ++++++++++ net/bluetooth/hci_request.c | 44 ++++++++++++++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 41 +++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2d58485d0335..d4e28773d378 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -91,6 +91,12 @@ struct discovery_state { #define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ enum suspend_tasks { + SUSPEND_PAUSE_DISCOVERY, + SUSPEND_UNPAUSE_DISCOVERY, + + SUSPEND_PAUSE_ADVERTISING, + SUSPEND_UNPAUSE_ADVERTISING, + SUSPEND_SCAN_DISABLE, SUSPEND_SCAN_ENABLE, SUSPEND_DISCONNECTING, @@ -410,6 +416,11 @@ struct hci_dev { struct discovery_state discovery; + int discovery_old_state; + bool discovery_paused; + int advertising_old_state; + bool advertising_paused; + struct notifier_block suspend_notifier; struct work_struct suspend_prepare; enum suspended_state suspend_state_next; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 11624645cfcf..bf83179ab9d1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1021,6 +1021,7 @@ static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode) /* Call with hci_dev_lock */ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) { + int old_state; struct hci_conn *conn; struct hci_request req; u8 page_scan; @@ -1038,6 +1039,28 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) /* Mark device as suspended */ hdev->suspended = true; + /* Pause discovery if not already stopped */ + old_state = hdev->discovery.state; + if (old_state != DISCOVERY_STOPPED) { + set_bit(SUSPEND_PAUSE_DISCOVERY, hdev->suspend_tasks); + hci_discovery_set_state(hdev, DISCOVERY_STOPPING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + } + + hdev->discovery_paused = true; + hdev->discovery_old_state = old_state; + + /* Stop advertising */ + old_state = hci_dev_test_flag(hdev, HCI_ADVERTISING); + if (old_state) { + set_bit(SUSPEND_PAUSE_ADVERTISING, hdev->suspend_tasks); + cancel_delayed_work(&hdev->discov_off); + queue_delayed_work(hdev->req_workqueue, + &hdev->discov_off, 0); + } + + hdev->advertising_paused = true; + hdev->advertising_old_state = old_state; /* Disable page scan */ page_scan = SCAN_DISABLED; hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &page_scan); @@ -1084,6 +1107,27 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) hci_req_clear_event_filter(&req); /* Reset passive/background scanning to normal */ hci_req_config_le_suspend_scan(&req); + + /* Unpause advertising */ + hdev->advertising_paused = false; + if (hdev->advertising_old_state) { + set_bit(SUSPEND_UNPAUSE_ADVERTISING, + hdev->suspend_tasks); + hci_dev_set_flag(hdev, HCI_ADVERTISING); + queue_work(hdev->req_workqueue, + &hdev->discoverable_update); + hdev->advertising_old_state = 0; + } + + /* Unpause discovery */ + hdev->discovery_paused = false; + if (hdev->discovery_old_state != DISCOVERY_STOPPED && + hdev->discovery_old_state != DISCOVERY_STOPPING) { + set_bit(SUSPEND_UNPAUSE_DISCOVERY, hdev->suspend_tasks); + hci_discovery_set_state(hdev, DISCOVERY_STARTING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + } + hci_req_run(&req, suspend_req_complete); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index b3a7f387da32..6552003a170e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1390,6 +1390,12 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } + if (hdev->advertising_paused) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_BUSY); + goto failed; + } + if (!hdev_is_powered(hdev)) { bool changed = false; @@ -3929,6 +3935,13 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) } hci_dev_unlock(hdev); + + /* Handle suspend notifier */ + if (test_and_clear_bit(SUSPEND_UNPAUSE_DISCOVERY, + hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Unpaused discovery"); + wake_up(&hdev->suspend_wait_q); + } } static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, @@ -3990,6 +4003,13 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev, goto failed; } + /* Can't start discovery when it is paused */ + if (hdev->discovery_paused) { + err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_BUSY, + &cp->type, sizeof(cp->type)); + goto failed; + } + /* Clear the discovery filter first to free any previously * allocated memory for the UUID list. */ @@ -4157,6 +4177,12 @@ void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status) } hci_dev_unlock(hdev); + + /* Handle suspend notifier */ + if (test_and_clear_bit(SUSPEND_PAUSE_DISCOVERY, hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Paused discovery"); + wake_up(&hdev->suspend_wait_q); + } } static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, @@ -4388,6 +4414,17 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, if (match.sk) sock_put(match.sk); + /* Handle suspend notifier */ + if (test_and_clear_bit(SUSPEND_PAUSE_ADVERTISING, + hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Paused advertising"); + wake_up(&hdev->suspend_wait_q); + } else if (test_and_clear_bit(SUSPEND_UNPAUSE_ADVERTISING, + hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Unpaused advertising"); + wake_up(&hdev->suspend_wait_q); + } + /* If "Set Advertising" was just disabled and instance advertising was * set up earlier, then re-enable multi-instance advertising. */ @@ -4439,6 +4476,10 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); + if (hdev->advertising_paused) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_BUSY); + hci_dev_lock(hdev); val = !!cp->val; -- cgit v1.2.3 From 4cda75275f9f89f9485b0ca4d6950c95258a9bce Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 10 Mar 2020 17:53:35 +0100 Subject: net: sched: make newly activated qdiscs visible In their .attach callback, mq[prio] only add the qdiscs of the currently active TX queues to the device's qdisc hash list. If a user later increases the number of active TX queues, their qdiscs are not visible via eg. 'tc qdisc show'. Add a hook to netif_set_real_num_tx_queues() that walks all active TX queues and adds those which are missing to the hash list. CC: Eric Dumazet CC: Jamal Hadi Salim CC: Cong Wang CC: Jiri Pirko Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- include/net/sch_generic.h | 6 ++++++ net/core/dev.c | 1 + net/sched/sch_generic.c | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+) (limited to 'net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 151208704ed2..7bfc45c5b602 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -153,6 +153,11 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc) return !READ_ONCE(qdisc->q.qlen); } +static inline bool qdisc_hashed(struct Qdisc *qdisc) +{ + return hash_hashed(&qdisc->hash); +} + static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { @@ -629,6 +634,7 @@ void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); void qdisc_class_hash_destroy(struct Qdisc_class_hash *); int dev_qdisc_change_tx_queue_len(struct net_device *dev); +void dev_qdisc_set_real_num_tx_queues(struct net_device *dev); void dev_init_scheduler(struct net_device *dev); void dev_shutdown(struct net_device *dev); void dev_activate(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 25dab1598803..ccc03abeee52 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2875,6 +2875,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) netif_setup_tc(dev, txq); dev->real_num_tx_queues = txq; + dev_qdisc_set_real_num_tx_queues(dev); if (disabling) { synchronize_net(); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6c9595f1048a..36a40ebcf0ee 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1268,6 +1268,27 @@ int dev_qdisc_change_tx_queue_len(struct net_device *dev) return ret; } +void dev_qdisc_set_real_num_tx_queues(struct net_device *dev) +{ +#ifdef CONFIG_NET_SCHED + struct Qdisc *sch = dev->qdisc; + unsigned int ntx; + + if (!sch) + return; + + ASSERT_RTNL(); + + for (ntx = 0; ntx < dev->real_num_tx_queues; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *qdisc = dev_queue->qdisc; + + if (qdisc && !qdisc_hashed(qdisc)) + qdisc_hash_add(qdisc, false); + } +#endif +} + static void dev_init_scheduler_queue(struct net_device *dev, struct netdev_queue *dev_queue, void *_qdisc) -- cgit v1.2.3 From 0d8a42c93a7af05911e94bc3ec1c0d57d948d583 Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Wed, 11 Mar 2020 01:09:02 +0000 Subject: raw: Add missing annotations to raw_seq_start() and raw_seq_stop() Sparse reports warnings at raw_seq_start() and raw_seq_stop() warning: context imbalance in raw_seq_start() - wrong count at exit warning: context imbalance in raw_seq_stop() - unexpected unlock The root cause is the missing annotations at raw_seq_start() and raw_seq_stop() Add the missing __acquires(&h->lock) annotation Add the missing __releases(&h->lock) annotation Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/ipv4/raw.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 3183413ebc6c..47665919048f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -1034,6 +1034,7 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) } void *raw_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(&h->lock) { struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); @@ -1056,6 +1057,7 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(raw_seq_next); void raw_seq_stop(struct seq_file *seq, void *v) + __releases(&h->lock) { struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); -- cgit v1.2.3 From 734c8f75743983842736e6a99ec6be152b2b7f50 Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Wed, 11 Mar 2020 01:09:03 +0000 Subject: tcp: Add missing annotation for tcp_child_process() Sparse reports warning at tcp_child_process() warning: context imbalance in tcp_child_process() - unexpected unlock The root cause is the missing annotation at tcp_child_process() Add the missing __releases(&((child)->sk_lock.slock)) annotation Signed-off-by: Jules Irenge Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index c8274371c3d0..03af7c3e75ef 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -819,6 +819,7 @@ EXPORT_SYMBOL(tcp_check_req); int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb) + __releases(&((child)->sk_lock.slock)) { int ret = 0; int state = child->sk_state; -- cgit v1.2.3 From 64fbca011976ce1564f191f4154e4a6ade25779a Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Wed, 11 Mar 2020 01:09:06 +0000 Subject: net: Add missing annotation for *netlink_seq_start() Sparse reports a warning at netlink_seq_start() warning: context imbalance in netlink_seq_start() - wrong count at exit The root cause is the missing annotation at netlink_seq_start() Add the missing __acquires(RCU) annotation Signed-off-by: Jules Irenge Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 813bfab13296..19df49a6ad15 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2583,6 +2583,7 @@ static void *__netlink_seq_next(struct seq_file *seq) } static void *netlink_seq_start(struct seq_file *seq, loff_t *posp) + __acquires(RCU) { struct nl_seq_iter *iter = seq->private; void *obj = SEQ_START_TOKEN; -- cgit v1.2.3 From 767d3ded5fb88e95f137893a8d90d1738b897029 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 11 Mar 2020 19:50:53 +0100 Subject: net: mptcp: don't hang before sending 'MP capable with data' the following packetdrill script socket(..., SOCK_STREAM, IPPROTO_MPTCP) = 3 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) > S 0:0(0) < S. 0:0(0) ack 1 win 65535 > . 1:1(0) ack 1 win 256 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 fcntl(3, F_SETFL, O_RDWR) = 0 write(3, ..., 1000) = 1000 doesn't transmit 1KB data packet after a successful three-way-handshake, using mp_capable with data as required by protocol v1, and write() hangs forever: PID: 973 TASK: ffff97dd399cae80 CPU: 1 COMMAND: "packetdrill" #0 [ffffa9b94062fb78] __schedule at ffffffff9c90a000 #1 [ffffa9b94062fc08] schedule at ffffffff9c90a4a0 #2 [ffffa9b94062fc18] schedule_timeout at ffffffff9c90e00d #3 [ffffa9b94062fc90] wait_woken at ffffffff9c120184 #4 [ffffa9b94062fcb0] sk_stream_wait_connect at ffffffff9c75b064 #5 [ffffa9b94062fd20] mptcp_sendmsg at ffffffff9c8e801c #6 [ffffa9b94062fdc0] sock_sendmsg at ffffffff9c747324 #7 [ffffa9b94062fdd8] sock_write_iter at ffffffff9c7473c7 #8 [ffffa9b94062fe48] new_sync_write at ffffffff9c302976 #9 [ffffa9b94062fed0] vfs_write at ffffffff9c305685 #10 [ffffa9b94062ff00] ksys_write at ffffffff9c305985 #11 [ffffa9b94062ff38] do_syscall_64 at ffffffff9c004475 #12 [ffffa9b94062ff50] entry_SYSCALL_64_after_hwframe at ffffffff9ca0008c RIP: 00007f959407eaf7 RSP: 00007ffe9e95a910 RFLAGS: 00000293 RAX: ffffffffffffffda RBX: 0000000000000008 RCX: 00007f959407eaf7 RDX: 00000000000003e8 RSI: 0000000001785fe0 RDI: 0000000000000008 RBP: 0000000001785fe0 R8: 0000000000000000 R9: 0000000000000003 R10: 0000000000000007 R11: 0000000000000293 R12: 00000000000003e8 R13: 00007ffe9e95ae30 R14: 0000000000000000 R15: 0000000000000000 ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b Fix it ensuring that socket state is TCP_ESTABLISHED on reception of the third ack. Fixes: 1954b86016cf ("mptcp: Check connection state before attempting send") Suggested-by: Paolo Abeni Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 95007e433109..c0cef07f4382 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1049,6 +1049,10 @@ void mptcp_finish_connect(struct sock *ssk) WRITE_ONCE(msk->write_seq, subflow->idsn + 1); WRITE_ONCE(msk->ack_seq, ack_seq); WRITE_ONCE(msk->can_ack, 1); + if (inet_sk_state_load(sk) != TCP_ESTABLISHED) { + inet_sk_state_store(sk, TCP_ESTABLISHED); + sk->sk_state_change(sk); + } } static void mptcp_sock_graft(struct sock *sk, struct socket *parent) -- cgit v1.2.3 From b8d290525e3972b5e876b2649a42bf4081d753fe Mon Sep 17 00:00:00 2001 From: Joseph Hwang Date: Wed, 11 Mar 2020 19:20:14 -0700 Subject: Bluetooth: clean up connection in hci_cs_disconnect In bluetooth core specification 4.2, Vol 2, Part E, 7.8.9 LE Set Advertise Enable Command, it says The Controller shall continue advertising until ... or until a connection is created or ... In these cases, advertising is then disabled. Hence, advertising would be disabled before a connection is established. In current kernel implementation, advertising would be re-enabled when all connections are terminated. The correct disconnection flow looks like < HCI Command: Disconnect > HCI Event: Command Status Status: Success > HCI Event: Disconnect Complete Status: Success Specifically, the last Disconnect Complete Event would trigger a callback function hci_event.c:hci_disconn_complete_evt() to cleanup the connection and re-enable advertising when proper. However, sometimes, there might occur an exception in the controller when disconnection is being executed. The disconnection flow might then look like < HCI Command: Disconnect > HCI Event: Command Status Status: Unknown Connection Identifier Note that "> HCI Event: Disconnect Complete" is missing when such an exception occurs. This would result in advertising staying disabled forever since the connection in question is not cleaned up correctly. To fix the controller exception issue, we need to do some connection cleanup when the disconnect command status indicates an error. Signed-off-by: Joseph Hwang Signed-off-by: Manish Mandlik Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0908eaa7cacf..20408d386268 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2233,10 +2233,22 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); - if (conn) + if (conn) { + u8 type = conn->type; + mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); + /* If the disconnection failed for any reason, the upper layer + * does not retry to disconnect in current implementation. + * Hence, we need to do some basic cleanup here and re-enable + * advertising if necessary. + */ + hci_conn_del(conn); + if (type == LE_LINK) + hci_req_reenable_advertising(hdev); + } + hci_dev_unlock(hdev); } -- cgit v1.2.3 From 96298f640104e4cd9a913a6e50b0b981829b94ff Mon Sep 17 00:00:00 2001 From: Howard Chung Date: Thu, 12 Mar 2020 12:35:27 +0800 Subject: Bluetooth: L2CAP: handle l2cap config request during open state According to Core Spec Version 5.2 | Vol 3, Part A 6.1.5, the incoming L2CAP_ConfigReq should be handled during OPEN state. The section below shows the btmon trace when running L2CAP/COS/CFD/BV-12-C before and after this change. === Before === ... > ACL Data RX: Handle 256 flags 0x02 dlen 12 #22 L2CAP: Connection Request (0x02) ident 2 len 4 PSM: 1 (0x0001) Source CID: 65 < ACL Data TX: Handle 256 flags 0x00 dlen 16 #23 L2CAP: Connection Response (0x03) ident 2 len 8 Destination CID: 64 Source CID: 65 Result: Connection successful (0x0000) Status: No further information available (0x0000) < ACL Data TX: Handle 256 flags 0x00 dlen 12 #24 L2CAP: Configure Request (0x04) ident 2 len 4 Destination CID: 65 Flags: 0x0000 > HCI Event: Number of Completed Packets (0x13) plen 5 #25 Num handles: 1 Handle: 256 Count: 1 > HCI Event: Number of Completed Packets (0x13) plen 5 #26 Num handles: 1 Handle: 256 Count: 1 > ACL Data RX: Handle 256 flags 0x02 dlen 16 #27 L2CAP: Configure Request (0x04) ident 3 len 8 Destination CID: 64 Flags: 0x0000 Option: Unknown (0x10) [hint] 01 00 .. < ACL Data TX: Handle 256 flags 0x00 dlen 18 #28 L2CAP: Configure Response (0x05) ident 3 len 10 Source CID: 65 Flags: 0x0000 Result: Success (0x0000) Option: Maximum Transmission Unit (0x01) [mandatory] MTU: 672 > HCI Event: Number of Completed Packets (0x13) plen 5 #29 Num handles: 1 Handle: 256 Count: 1 > ACL Data RX: Handle 256 flags 0x02 dlen 14 #30 L2CAP: Configure Response (0x05) ident 2 len 6 Source CID: 64 Flags: 0x0000 Result: Success (0x0000) > ACL Data RX: Handle 256 flags 0x02 dlen 20 #31 L2CAP: Configure Request (0x04) ident 3 len 12 Destination CID: 64 Flags: 0x0000 Option: Unknown (0x10) [hint] 01 00 91 02 11 11 ...... < ACL Data TX: Handle 256 flags 0x00 dlen 14 #32 L2CAP: Command Reject (0x01) ident 3 len 6 Reason: Invalid CID in request (0x0002) Destination CID: 64 Source CID: 65 > HCI Event: Number of Completed Packets (0x13) plen 5 #33 Num handles: 1 Handle: 256 Count: 1 ... === After === ... > ACL Data RX: Handle 256 flags 0x02 dlen 12 #22 L2CAP: Connection Request (0x02) ident 2 len 4 PSM: 1 (0x0001) Source CID: 65 < ACL Data TX: Handle 256 flags 0x00 dlen 16 #23 L2CAP: Connection Response (0x03) ident 2 len 8 Destination CID: 64 Source CID: 65 Result: Connection successful (0x0000) Status: No further information available (0x0000) < ACL Data TX: Handle 256 flags 0x00 dlen 12 #24 L2CAP: Configure Request (0x04) ident 2 len 4 Destination CID: 65 Flags: 0x0000 > HCI Event: Number of Completed Packets (0x13) plen 5 #25 Num handles: 1 Handle: 256 Count: 1 > HCI Event: Number of Completed Packets (0x13) plen 5 #26 Num handles: 1 Handle: 256 Count: 1 > ACL Data RX: Handle 256 flags 0x02 dlen 16 #27 L2CAP: Configure Request (0x04) ident 3 len 8 Destination CID: 64 Flags: 0x0000 Option: Unknown (0x10) [hint] 01 00 .. < ACL Data TX: Handle 256 flags 0x00 dlen 18 #28 L2CAP: Configure Response (0x05) ident 3 len 10 Source CID: 65 Flags: 0x0000 Result: Success (0x0000) Option: Maximum Transmission Unit (0x01) [mandatory] MTU: 672 > HCI Event: Number of Completed Packets (0x13) plen 5 #29 Num handles: 1 Handle: 256 Count: 1 > ACL Data RX: Handle 256 flags 0x02 dlen 14 #30 L2CAP: Configure Response (0x05) ident 2 len 6 Source CID: 64 Flags: 0x0000 Result: Success (0x0000) > ACL Data RX: Handle 256 flags 0x02 dlen 20 #31 L2CAP: Configure Request (0x04) ident 3 len 12 Destination CID: 64 Flags: 0x0000 Option: Unknown (0x10) [hint] 01 00 91 02 11 11 ..... < ACL Data TX: Handle 256 flags 0x00 dlen 18 #32 L2CAP: Configure Response (0x05) ident 3 len 10 Source CID: 65 Flags: 0x0000 Result: Success (0x0000) Option: Maximum Transmission Unit (0x01) [mandatory] MTU: 672 < ACL Data TX: Handle 256 flags 0x00 dlen 12 #33 L2CAP: Configure Request (0x04) ident 3 len 4 Destination CID: 65 Flags: 0x0000 > HCI Event: Number of Completed Packets (0x13) plen 5 #34 Num handles: 1 Handle: 256 Count: 1 > HCI Event: Number of Completed Packets (0x13) plen 5 #35 Num handles: 1 Handle: 256 Count: 1 ... Signed-off-by: Howard Chung Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 697c0f7f2c1a..5e6e35ab44dd 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4300,7 +4300,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, return 0; } - if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2) { + if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2 && + chan->state != BT_CONNECTED) { cmd_reject_invalid_cid(conn, cmd->ident, chan->scid, chan->dcid); goto unlock; -- cgit v1.2.3 From b354e6c10eaf97a1f5aff4839795bd32ad32f0f9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 12 Mar 2020 14:33:09 +0300 Subject: Bluetooth: L2CAP: Fix a condition in l2cap_sock_recvmsg() Smatch complains about the indenting: net/bluetooth/l2cap_sock.c:1027 l2cap_sock_recvmsg() warn: inconsistent indenting It looks like this is supposed to be an "else if" condition. Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode") Signed-off-by: Dan Carpenter Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 0c636be3469e..40fb10b591bd 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1024,7 +1024,7 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, sk->sk_state = BT_CONNECTED; pi->chan->state = BT_CONNECTED; __l2cap_ecred_conn_rsp_defer(pi->chan); - } if (bdaddr_type_is_le(pi->chan->src_type)) { + } else if (bdaddr_type_is_le(pi->chan->src_type)) { sk->sk_state = BT_CONNECTED; pi->chan->state = BT_CONNECTED; __l2cap_le_connect_rsp_defer(pi->chan); -- cgit v1.2.3 From 7c4046b1c53bba3a0315f04bb0bb5f36888a747b Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 12 Mar 2020 18:57:54 +0100 Subject: Revert "net: sched: make newly activated qdiscs visible" This reverts commit 4cda75275f9f89f9485b0ca4d6950c95258a9bce from net-next. Brown bag time. Michal noticed that this change doesn't work at all when netif_set_real_num_tx_queues() gets called prior to an initial dev_activate(), as for instance igb does. Doing so dies with: [ 40.579142] BUG: kernel NULL pointer dereference, address: 0000000000000400 [ 40.586922] #PF: supervisor read access in kernel mode [ 40.592668] #PF: error_code(0x0000) - not-present page [ 40.598405] PGD 0 P4D 0 [ 40.601234] Oops: 0000 [#1] PREEMPT SMP PTI [ 40.605909] CPU: 18 PID: 1681 Comm: wickedd Tainted: G E 5.6.0-rc3-ethnl.50-default #1 [ 40.616205] Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.R3.27.D685.1305151734 05/15/2013 [ 40.627377] RIP: 0010:qdisc_hash_add.part.22+0x2e/0x90 [ 40.633115] Code: 00 55 53 89 f5 48 89 fb e8 2f 9b fb ff 85 c0 74 44 48 8b 43 40 48 8b 08 69 43 38 47 86 c8 61 c1 e8 1c 48 83 e8 80 48 8d 14 c1 <48> 8b 04 c1 48 8d 4b 28 48 89 53 30 48 89 43 28 48 85 c0 48 89 0a [ 40.654080] RSP: 0018:ffffb879864934d8 EFLAGS: 00010203 [ 40.659914] RAX: 0000000000000080 RBX: ffffffffb8328d80 RCX: 0000000000000000 [ 40.667882] RDX: 0000000000000400 RSI: 0000000000000000 RDI: ffffffffb831faa0 [ 40.675849] RBP: 0000000000000000 R08: ffffa0752c8b9088 R09: ffffa0752c8b9208 [ 40.683816] R10: 0000000000000006 R11: 0000000000000000 R12: ffffa0752d734000 [ 40.691783] R13: 0000000000000008 R14: 0000000000000000 R15: ffffa07113c18000 [ 40.699750] FS: 00007f94548e5880(0000) GS:ffffa0752e980000(0000) knlGS:0000000000000000 [ 40.708782] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 40.715189] CR2: 0000000000000400 CR3: 000000082b6ae006 CR4: 00000000001606e0 [ 40.723156] Call Trace: [ 40.725888] dev_qdisc_set_real_num_tx_queues+0x61/0x90 [ 40.731725] netif_set_real_num_tx_queues+0x94/0x1d0 [ 40.737286] __igb_open+0x19a/0x5d0 [igb] [ 40.741767] __dev_open+0xbb/0x150 [ 40.745567] __dev_change_flags+0x157/0x1a0 [ 40.750240] dev_change_flags+0x23/0x60 [...] Fixes: 4cda75275f9f ("net: sched: make newly activated qdiscs visible") Reported-by: Michal Kubecek CC: Michal Kubecek CC: Eric Dumazet CC: Jamal Hadi Salim CC: Cong Wang CC: Jiri Pirko Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- include/net/sch_generic.h | 6 ------ net/core/dev.c | 1 - net/sched/sch_generic.c | 21 --------------------- 3 files changed, 28 deletions(-) (limited to 'net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 7bfc45c5b602..151208704ed2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -153,11 +153,6 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc) return !READ_ONCE(qdisc->q.qlen); } -static inline bool qdisc_hashed(struct Qdisc *qdisc) -{ - return hash_hashed(&qdisc->hash); -} - static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { @@ -634,7 +629,6 @@ void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); void qdisc_class_hash_destroy(struct Qdisc_class_hash *); int dev_qdisc_change_tx_queue_len(struct net_device *dev); -void dev_qdisc_set_real_num_tx_queues(struct net_device *dev); void dev_init_scheduler(struct net_device *dev); void dev_shutdown(struct net_device *dev); void dev_activate(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index ccc03abeee52..25dab1598803 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2875,7 +2875,6 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) netif_setup_tc(dev, txq); dev->real_num_tx_queues = txq; - dev_qdisc_set_real_num_tx_queues(dev); if (disabling) { synchronize_net(); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 36a40ebcf0ee..6c9595f1048a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1268,27 +1268,6 @@ int dev_qdisc_change_tx_queue_len(struct net_device *dev) return ret; } -void dev_qdisc_set_real_num_tx_queues(struct net_device *dev) -{ -#ifdef CONFIG_NET_SCHED - struct Qdisc *sch = dev->qdisc; - unsigned int ntx; - - if (!sch) - return; - - ASSERT_RTNL(); - - for (ntx = 0; ntx < dev->real_num_tx_queues; ntx++) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); - struct Qdisc *qdisc = dev_queue->qdisc; - - if (qdisc && !qdisc_hashed(qdisc)) - qdisc_hash_add(qdisc, false); - } -#endif -} - static void dev_init_scheduler_queue(struct net_device *dev, struct netdev_queue *dev_queue, void *_qdisc) -- cgit v1.2.3 From 16f6c2518f9e0347eb54d368473ebd0904ac4298 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 10 Mar 2020 17:05:24 +0900 Subject: tcp: Remove unnecessary conditions in inet_csk_bind_conflict(). When we get an ephemeral port, the relax is false, so the SO_REUSEADDR conditions may be evaluated twice. We do not need to check the conditions again. Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a4db79b1b643..2e9549f49a82 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -146,17 +146,15 @@ static int inet_csk_bind_conflict(const struct sock *sk, (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if ((!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - (!reuseport || !sk2->sk_reuseport || - rcu_access_pointer(sk->sk_reuseport_cb) || - (sk2->sk_state != TCP_TIME_WAIT && - !uid_eq(uid, sock_i_uid(sk2))))) { - if (inet_rcv_saddr_equal(sk, sk2, true)) - break; - } - if (!relax && reuse && sk2->sk_reuse && + if (reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { + if (!relax && + inet_rcv_saddr_equal(sk, sk2, true)) + break; + } else if (!reuseport || !sk2->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, sock_i_uid(sk2)))) { if (inet_rcv_saddr_equal(sk, sk2, true)) break; } -- cgit v1.2.3 From 4b01a9674231a97553a55456d883f584e948a78d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 10 Mar 2020 17:05:25 +0900 Subject: tcp: bind(0) remove the SO_REUSEADDR restriction when ephemeral ports are exhausted. Commit aacd9289af8b82f5fb01bcdd53d0e3406d1333c7 ("tcp: bind() use stronger condition for bind_conflict") introduced a restriction to forbid to bind SO_REUSEADDR enabled sockets to the same (addr, port) tuple in order to assign ports dispersedly so that we can connect to the same remote host. The change results in accelerating port depletion so that we fail to bind sockets to the same local port even if we want to connect to the different remote hosts. You can reproduce this issue by following instructions below. 1. # sysctl -w net.ipv4.ip_local_port_range="32768 32768" 2. set SO_REUSEADDR to two sockets. 3. bind two sockets to (localhost, 0) and the latter fails. Therefore, when ephemeral ports are exhausted, bind(0) should fallback to the legacy behaviour to enable the SO_REUSEADDR option and make it possible to connect to different remote (addr, port) tuples. This patch allows us to bind SO_REUSEADDR enabled sockets to the same (addr, port) only when net.ipv4.ip_autobind_reuse is set 1 and all ephemeral ports are exhausted. This also allows connect() and listen() to share ports in the following way and may break some applications. So the ip_autobind_reuse is 0 by default and disables the feature. 1. setsockopt(sk1, SO_REUSEADDR) 2. setsockopt(sk2, SO_REUSEADDR) 3. bind(sk1, saddr, 0) 4. bind(sk2, saddr, 0) 5. connect(sk1, daddr) 6. listen(sk2) If it is set 1, we can fully utilize the 4-tuples, but we should use IP_BIND_ADDRESS_NO_PORT for bind()+connect() as possible. The notable thing is that if all sockets bound to the same port have both SO_REUSEADDR and SO_REUSEPORT enabled, we can bind sockets to an ephemeral port and also do listen(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 9 +++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/inet_connection_sock.c | 10 +++++++++- net/ipv4/sysctl_net_ipv4.c | 9 +++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5f53faff4e25..ee961d322d93 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -958,6 +958,15 @@ ip_nonlocal_bind - BOOLEAN which can be quite useful - but may break some applications. Default: 0 +ip_autobind_reuse - BOOLEAN + By default, bind() does not select the ports automatically even if + the new socket and all sockets bound to the port have SO_REUSEADDR. + ip_autobind_reuse allows bind() to reuse the port and this is useful + when you use bind()+connect(), but may break some applications. + The preferred solution is to use IP_BIND_ADDRESS_NO_PORT and this + option should only be set by experts. + Default: 0 + ip_dynaddr - BOOLEAN If set non-zero, enables support for dynamic addresses. If set to a non-zero value larger than 1, a kernel log diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 08b98414d94e..154b8f01499b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -101,6 +101,7 @@ struct netns_ipv4 { int sysctl_ip_fwd_use_pmtu; int sysctl_ip_fwd_update_priority; int sysctl_ip_nonlocal_bind; + int sysctl_ip_autobind_reuse; /* Shall we try to damage output packets if routing dev changes? */ int sysctl_ip_dynaddr; int sysctl_ip_early_demux; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 2e9549f49a82..497366b631f3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -174,12 +174,14 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int * int port = 0; struct inet_bind_hashbucket *head; struct net *net = sock_net(sk); + bool relax = false; int i, low, high, attempt_half; struct inet_bind_bucket *tb; u32 remaining, offset; int l3mdev; l3mdev = inet_sk_bound_l3mdev(sk); +ports_exhausted: attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; other_half_scan: inet_get_local_port_range(net, &low, &high); @@ -217,7 +219,7 @@ other_parity_scan: inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && tb->port == port) { - if (!inet_csk_bind_conflict(sk, tb, false, false)) + if (!inet_csk_bind_conflict(sk, tb, relax, false)) goto success; goto next_port; } @@ -237,6 +239,12 @@ next_port: attempt_half = 2; goto other_half_scan; } + + if (net->ipv4.sysctl_ip_autobind_reuse && !relax) { + /* We still have a chance to connect to different destinations */ + relax = true; + goto ports_exhausted; + } return NULL; success: *port_ret = port; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d9531b4b33f2..81b267e990a1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -763,6 +763,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "ip_autobind_reuse", + .data = &init_net.ipv4.sysctl_ip_autobind_reuse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, { .procname = "fwmark_reflect", .data = &init_net.ipv4.sysctl_fwmark_reflect, -- cgit v1.2.3 From 335759211a327d61244580070d74f55561c35895 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 10 Mar 2020 17:05:26 +0900 Subject: tcp: Forbid to bind more than one sockets haveing SO_REUSEADDR and SO_REUSEPORT per EUID. If there is no TCP_LISTEN socket on a ephemeral port, we can bind multiple sockets having SO_REUSEADDR to the same port. Then if all sockets bound to the port have also SO_REUSEPORT enabled and have the same EUID, all of them can be listened. This is not safe. Let's say, an application has root privilege and binds sockets to an ephemeral port with both of SO_REUSEADDR and SO_REUSEPORT. When none of sockets is not listened yet, a malicious user can use sudo, exhaust ephemeral ports, and bind sockets to the same ephemeral port, so he or she can call listen and steal the port. To prevent this issue, we must not bind more than one sockets that have the same EUID and both of SO_REUSEADDR and SO_REUSEPORT. On the other hand, if the sockets have different EUIDs, the issue above does not occur. After sockets with different EUIDs are bound to the same port and one of them is listened, no more socket can be listened. This is because the condition below is evaluated true and listen() for the second socket fails. } else if (!reuseport_ok || !reuseport || !sk2->sk_reuseport || rcu_access_pointer(sk->sk_reuseport_cb) || (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid(sk2)))) { if (inet_rcv_saddr_equal(sk, sk2, true)) break; } Therefore, on the same port, we cannot do listen() for multiple sockets with different EUIDs and any other listen syscalls fail, so the problem does not happen. In this case, we can still call connect() for other sockets that cannot be listened, so we have to succeed to call bind() in order to fully utilize 4-tuples. Summarizing the above, we should be able to bind only one socket having SO_REUSEADDR and SO_REUSEPORT per EUID. Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 497366b631f3..3b4f81790e3e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -131,7 +131,7 @@ static int inet_csk_bind_conflict(const struct sock *sk, { struct sock *sk2; bool reuse = sk->sk_reuse; - bool reuseport = !!sk->sk_reuseport && reuseport_ok; + bool reuseport = !!sk->sk_reuseport; kuid_t uid = sock_i_uid((struct sock *)sk); /* @@ -148,10 +148,16 @@ static int inet_csk_bind_conflict(const struct sock *sk, sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { - if (!relax && + if ((!relax || + (!reuseport_ok && + reuseport && sk2->sk_reuseport && + !rcu_access_pointer(sk->sk_reuseport_cb) && + (sk2->sk_state == TCP_TIME_WAIT || + uid_eq(uid, sock_i_uid(sk2))))) && inet_rcv_saddr_equal(sk, sk2, true)) break; - } else if (!reuseport || !sk2->sk_reuseport || + } else if (!reuseport_ok || + !reuseport || !sk2->sk_reuseport || rcu_access_pointer(sk->sk_reuseport_cb) || (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid(sk2)))) { -- cgit v1.2.3 From 978703f42549ac7d1a354bafbfc346a3ccf15f0d Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 12 Mar 2020 12:23:05 +0200 Subject: netfilter: flowtable: Add API for registering to flow table events Let drivers to add their cb allowing them to receive flow offload events of type TC_SETUP_CLSFLOWER (REPLACE/DEL/STATS) for flows managed by the flow table. Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/netfilter/nf_flow_table.h | 6 +++++ net/netfilter/nf_flow_table_core.c | 47 +++++++++++++++++++++++++++++++++++ net/netfilter/nf_flow_table_offload.c | 4 +++ 3 files changed, 57 insertions(+) (limited to 'net') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index e0f709d9d547..d9d0945b696e 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -44,6 +44,7 @@ struct nf_flowtable { struct delayed_work gc_work; unsigned int flags; struct flow_block flow_block; + struct mutex flow_block_lock; /* Guards flow_block */ possible_net_t net; }; @@ -129,6 +130,11 @@ struct nf_flow_route { struct flow_offload *flow_offload_alloc(struct nf_conn *ct); void flow_offload_free(struct flow_offload *flow); +int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv); +void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv); + int flow_offload_route_init(struct flow_offload *flow, const struct nf_flow_route *route); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 8af28e10b4e6..4af0327992cf 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -372,6 +372,50 @@ static void nf_flow_offload_work_gc(struct work_struct *work) queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } +int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv) +{ + struct flow_block *block = &flow_table->flow_block; + struct flow_block_cb *block_cb; + int err = 0; + + mutex_lock(&flow_table->flow_block_lock); + block_cb = flow_block_cb_lookup(block, cb, cb_priv); + if (block_cb) { + err = -EEXIST; + goto unlock; + } + + block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL); + if (IS_ERR(block_cb)) { + err = PTR_ERR(block_cb); + goto unlock; + } + + list_add_tail(&block_cb->list, &block->cb_list); + +unlock: + mutex_unlock(&flow_table->flow_block_lock); + return err; +} +EXPORT_SYMBOL_GPL(nf_flow_table_offload_add_cb); + +void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv) +{ + struct flow_block *block = &flow_table->flow_block; + struct flow_block_cb *block_cb; + + mutex_lock(&flow_table->flow_block_lock); + block_cb = flow_block_cb_lookup(block, cb, cb_priv); + if (block_cb) + list_del(&block_cb->list); + else + WARN_ON(true); + mutex_unlock(&flow_table->flow_block_lock); +} +EXPORT_SYMBOL_GPL(nf_flow_table_offload_del_cb); + static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, __be16 port, __be16 new_port) { @@ -494,6 +538,7 @@ int nf_flow_table_init(struct nf_flowtable *flowtable) INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); flow_block_init(&flowtable->flow_block); + mutex_init(&flowtable->flow_block_lock); err = rhashtable_init(&flowtable->rhashtable, &nf_flow_offload_rhash_params); @@ -550,11 +595,13 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) mutex_lock(&flowtable_lock); list_del(&flow_table->list); mutex_unlock(&flowtable_lock); + cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); nf_flow_table_offload_flush(flow_table); rhashtable_destroy(&flow_table->rhashtable); + mutex_destroy(&flow_table->flow_block_lock); } EXPORT_SYMBOL_GPL(nf_flow_table_free); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 06f00cdc3891..f5afdf023bb9 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -610,6 +610,7 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, if (cmd == FLOW_CLS_REPLACE) cls_flow.rule = flow_rule->rule; + mutex_lock(&flowtable->flow_block_lock); list_for_each_entry(block_cb, block_cb_list, list) { err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv); @@ -618,6 +619,7 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, i++; } + mutex_unlock(&flowtable->flow_block_lock); return i; } @@ -692,8 +694,10 @@ static void flow_offload_tuple_stats(struct flow_offload_work *offload, FLOW_CLS_STATS, &offload->flow->tuplehash[dir].tuple, &extack); + mutex_lock(&flowtable->flow_block_lock); list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv); + mutex_unlock(&flowtable->flow_block_lock); memcpy(stats, &cls_flow.stats, sizeof(*stats)); } -- cgit v1.2.3 From 9c26ba9b1f453a0c86b26e9ab5e8efedcb4470d8 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 12 Mar 2020 12:23:06 +0200 Subject: net/sched: act_ct: Instantiate flow table entry actions NF flow table API associate 5-tuple rule with an action list by calling the flow table type action() CB to fill the rule's actions. In action CB of act_ct, populate the ct offload entry actions with a new ct_metadata action. Initialize the ct_metadata with the ct mark, label and zone information. If ct nat was performed, then also append the relevant packet mangle actions (e.g. ipv4/ipv6/tcp/udp header rewrites). Drivers that offload the ft entries may match on the 5-tuple and perform the action list. Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Reviewed-by: Edward Cree Signed-off-by: David S. Miller --- include/net/flow_offload.h | 5 + include/net/netfilter/nf_flow_table.h | 23 ++++ net/netfilter/nf_flow_table_offload.c | 23 ---- net/sched/act_ct.c | 207 ++++++++++++++++++++++++++++++++++ 4 files changed, 235 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index d1b1e4aa310a..ba433497789b 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -136,6 +136,7 @@ enum flow_action_id { FLOW_ACTION_SAMPLE, FLOW_ACTION_POLICE, FLOW_ACTION_CT, + FLOW_ACTION_CT_METADATA, FLOW_ACTION_MPLS_PUSH, FLOW_ACTION_MPLS_POP, FLOW_ACTION_MPLS_MANGLE, @@ -225,6 +226,10 @@ struct flow_action_entry { int action; u16 zone; } ct; + struct { + u32 mark; + u32 labels[4]; + } ct_metadata; struct { /* FLOW_ACTION_MPLS_PUSH */ u32 label; __be16 proto; diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d9d0945b696e..c2d5cdd9904d 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -16,6 +16,29 @@ struct nf_flow_rule; struct flow_offload; enum flow_offload_tuple_dir; +struct nf_flow_key { + struct flow_dissector_key_meta meta; + struct flow_dissector_key_control control; + struct flow_dissector_key_basic basic; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + }; + struct flow_dissector_key_tcp tcp; + struct flow_dissector_key_ports tp; +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ + +struct nf_flow_match { + struct flow_dissector dissector; + struct nf_flow_key key; + struct nf_flow_key mask; +}; + +struct nf_flow_rule { + struct nf_flow_match match; + struct flow_rule *rule; +}; + struct nf_flowtable_type { struct list_head list; int family; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index f5afdf023bb9..42b73a084a63 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -23,29 +23,6 @@ struct flow_offload_work { struct flow_offload *flow; }; -struct nf_flow_key { - struct flow_dissector_key_meta meta; - struct flow_dissector_key_control control; - struct flow_dissector_key_basic basic; - union { - struct flow_dissector_key_ipv4_addrs ipv4; - struct flow_dissector_key_ipv6_addrs ipv6; - }; - struct flow_dissector_key_tcp tcp; - struct flow_dissector_key_ports tp; -} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ - -struct nf_flow_match { - struct flow_dissector dissector; - struct nf_flow_key key; - struct nf_flow_key mask; -}; - -struct nf_flow_rule { - struct nf_flow_match match; - struct flow_rule *rule; -}; - #define NF_FLOW_DISSECTOR(__match, __type, __field) \ (__match)->dissector.offset[__type] = \ offsetof(struct nf_flow_key, __field) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 3d9e678d7d53..9c522bc51f68 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -55,7 +55,214 @@ static const struct rhashtable_params zones_params = { .automatic_shrinking = true, }; +static struct flow_action_entry * +tcf_ct_flow_table_flow_action_get_next(struct flow_action *flow_action) +{ + int i = flow_action->num_entries++; + + return &flow_action->entries[i]; +} + +static void tcf_ct_add_mangle_action(struct flow_action *action, + enum flow_action_mangle_base htype, + u32 offset, + u32 mask, + u32 val) +{ + struct flow_action_entry *entry; + + entry = tcf_ct_flow_table_flow_action_get_next(action); + entry->id = FLOW_ACTION_MANGLE; + entry->mangle.htype = htype; + entry->mangle.mask = ~mask; + entry->mangle.offset = offset; + entry->mangle.val = val; +} + +/* The following nat helper functions check if the inverted reverse tuple + * (target) is different then the current dir tuple - meaning nat for ports + * and/or ip is needed, and add the relevant mangle actions. + */ +static void +tcf_ct_flow_table_add_action_nat_ipv4(const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple target, + struct flow_action *action) +{ + if (memcmp(&target.src.u3, &tuple->src.u3, sizeof(target.src.u3))) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_IP4, + offsetof(struct iphdr, saddr), + 0xFFFFFFFF, + be32_to_cpu(target.src.u3.ip)); + if (memcmp(&target.dst.u3, &tuple->dst.u3, sizeof(target.dst.u3))) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_IP4, + offsetof(struct iphdr, daddr), + 0xFFFFFFFF, + be32_to_cpu(target.dst.u3.ip)); +} + +static void +tcf_ct_add_ipv6_addr_mangle_action(struct flow_action *action, + union nf_inet_addr *addr, + u32 offset) +{ + int i; + + for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_IP6, + i * sizeof(u32) + offset, + 0xFFFFFFFF, be32_to_cpu(addr->ip6[i])); +} + +static void +tcf_ct_flow_table_add_action_nat_ipv6(const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple target, + struct flow_action *action) +{ + if (memcmp(&target.src.u3, &tuple->src.u3, sizeof(target.src.u3))) + tcf_ct_add_ipv6_addr_mangle_action(action, &target.src.u3, + offsetof(struct ipv6hdr, + saddr)); + if (memcmp(&target.dst.u3, &tuple->dst.u3, sizeof(target.dst.u3))) + tcf_ct_add_ipv6_addr_mangle_action(action, &target.dst.u3, + offsetof(struct ipv6hdr, + daddr)); +} + +static void +tcf_ct_flow_table_add_action_nat_tcp(const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple target, + struct flow_action *action) +{ + __be16 target_src = target.src.u.tcp.port; + __be16 target_dst = target.dst.u.tcp.port; + + if (target_src != tuple->src.u.tcp.port) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP, + offsetof(struct tcphdr, source), + 0xFFFF, be16_to_cpu(target_src)); + if (target_dst != tuple->dst.u.tcp.port) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP, + offsetof(struct tcphdr, dest), + 0xFFFF, be16_to_cpu(target_dst)); +} + +static void +tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple target, + struct flow_action *action) +{ + __be16 target_src = target.src.u.udp.port; + __be16 target_dst = target.dst.u.udp.port; + + if (target_src != tuple->src.u.udp.port) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP, + offsetof(struct udphdr, source), + 0xFFFF, be16_to_cpu(target_src)); + if (target_dst != tuple->dst.u.udp.port) + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP, + offsetof(struct udphdr, dest), + 0xFFFF, be16_to_cpu(target_dst)); +} + +static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, + enum ip_conntrack_dir dir, + struct flow_action *action) +{ + struct nf_conn_labels *ct_labels; + struct flow_action_entry *entry; + u32 *act_ct_labels; + + entry = tcf_ct_flow_table_flow_action_get_next(action); + entry->id = FLOW_ACTION_CT_METADATA; +#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) + entry->ct_metadata.mark = ct->mark; +#endif + + act_ct_labels = entry->ct_metadata.labels; + ct_labels = nf_ct_labels_find(ct); + if (ct_labels) + memcpy(act_ct_labels, ct_labels->bits, NF_CT_LABELS_MAX_SIZE); + else + memset(act_ct_labels, 0, NF_CT_LABELS_MAX_SIZE); +} + +static int tcf_ct_flow_table_add_action_nat(struct net *net, + struct nf_conn *ct, + enum ip_conntrack_dir dir, + struct flow_action *action) +{ + const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; + struct nf_conntrack_tuple target; + + nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); + + switch (tuple->src.l3num) { + case NFPROTO_IPV4: + tcf_ct_flow_table_add_action_nat_ipv4(tuple, target, + action); + break; + case NFPROTO_IPV6: + tcf_ct_flow_table_add_action_nat_ipv6(tuple, target, + action); + break; + default: + return -EOPNOTSUPP; + } + + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + tcf_ct_flow_table_add_action_nat_tcp(tuple, target, action); + break; + case IPPROTO_UDP: + tcf_ct_flow_table_add_action_nat_udp(tuple, target, action); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int tcf_ct_flow_table_fill_actions(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir tdir, + struct nf_flow_rule *flow_rule) +{ + struct flow_action *action = &flow_rule->rule->action; + int num_entries = action->num_entries; + struct nf_conn *ct = flow->ct; + enum ip_conntrack_dir dir; + int i, err; + + switch (tdir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + dir = IP_CT_DIR_ORIGINAL; + break; + case FLOW_OFFLOAD_DIR_REPLY: + dir = IP_CT_DIR_REPLY; + break; + default: + return -EOPNOTSUPP; + } + + err = tcf_ct_flow_table_add_action_nat(net, ct, dir, action); + if (err) + goto err_nat; + + tcf_ct_flow_table_add_action_meta(ct, dir, action); + return 0; + +err_nat: + /* Clear filled actions */ + for (i = num_entries; i < action->num_entries; i++) + memset(&action->entries[i], 0, sizeof(action->entries[i])); + action->num_entries = num_entries; + + return err; +} + static struct nf_flowtable_type flowtable_ct = { + .action = tcf_ct_flow_table_fill_actions, .owner = THIS_MODULE, }; -- cgit v1.2.3 From 30b0cf90c6dd82e7ebb3fcb5ba8447f1baeb80be Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 12 Mar 2020 12:23:07 +0200 Subject: net/sched: act_ct: Support restoring conntrack info on skbs Provide an API to restore the ct state pointer. This may be used by drivers to restore the ct state if they miss in tc chain after they already did the hardware connection tracking action (ct_metadata action). For example, consider the following rule on chain 0 that is in_hw, however chain 1 is not_in_hw: $ tc filter add dev ... chain 0 ... \ flower ... action ct pipe action goto chain 1 Packets of a flow offloaded (via nf flow table offload) by the driver hit this rule in hardware, will be marked with the ct metadata action (mark, label, zone) that does the equivalent of the software ct action, and when the packet jumps to hardware chain 1, there would be a miss. CT was already processed in hardware. Therefore, the driver's miss handling should restore the ct state on the skb, using the provided API, and continue the packet processing in chain 1. Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_offload.h | 1 + include/net/tc_act/tc_ct.h | 7 +++++++ net/sched/act_ct.c | 16 ++++++++++++++++ 3 files changed, 24 insertions(+) (limited to 'net') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index ba433497789b..a039c900c384 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -227,6 +227,7 @@ struct flow_action_entry { u16 zone; } ct; struct { + unsigned long cookie; u32 mark; u32 labels[4]; } ct_metadata; diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index cf3492e2a6a4..735da5912974 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -55,6 +55,13 @@ static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; } static inline int tcf_ct_action(const struct tc_action *a) { return 0; } #endif /* CONFIG_NF_CONNTRACK */ +#if IS_ENABLED(CONFIG_NET_ACT_CT) +void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie); +#else +static inline void +tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) { } +#endif + static inline bool is_tcf_ct(const struct tc_action *a) { #if defined(CONFIG_NET_CLS_ACT) && IS_ENABLED(CONFIG_NF_CONNTRACK) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 9c522bc51f68..31eef8a847d2 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -170,6 +170,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, { struct nf_conn_labels *ct_labels; struct flow_action_entry *entry; + enum ip_conntrack_info ctinfo; u32 *act_ct_labels; entry = tcf_ct_flow_table_flow_action_get_next(action); @@ -177,6 +178,10 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) entry->ct_metadata.mark = ct->mark; #endif + ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED : + IP_CT_ESTABLISHED_REPLY; + /* aligns with the CT reference on the SKB nf_ct_set */ + entry->ct_metadata.cookie = (unsigned long)ct | ctinfo; act_ct_labels = entry->ct_metadata.labels; ct_labels = nf_ct_labels_find(ct); @@ -1530,6 +1535,17 @@ static void __exit ct_cleanup_module(void) destroy_workqueue(act_ct_wq); } +void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) +{ + enum ip_conntrack_info ctinfo = cookie & NFCT_INFOMASK; + struct nf_conn *ct; + + ct = (struct nf_conn *)(cookie & NFCT_PTRMASK); + nf_conntrack_get(&ct->ct_general); + nf_ct_set(skb, ct, ctinfo); +} +EXPORT_SYMBOL_GPL(tcf_ct_flow_table_restore_skb); + module_init(ct_init_module); module_exit(ct_cleanup_module); MODULE_AUTHOR("Paul Blakey "); -- cgit v1.2.3 From 8b3646d6e0c4ca4ba5615facaef1312d6d40d123 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 12 Mar 2020 12:23:08 +0200 Subject: net/sched: act_ct: Support refreshing the flow table entries If driver deleted an FT entry, a FT failed to offload, or registered to the flow table after flows were already added, we still get packets in software. For those packets, while restoring the ct state from the flow table entry, refresh it's hardware offload. Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/netfilter/nf_flow_table.h | 3 +++ net/netfilter/nf_flow_table_core.c | 13 +++++++++++++ net/netfilter/nf_flow_table_ip.c | 15 ++------------- net/sched/act_ct.c | 1 + 4 files changed, 19 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index c2d5cdd9904d..6890f1ca3e31 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -162,6 +162,9 @@ int flow_offload_route_init(struct flow_offload *flow, const struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); +void flow_offload_refresh(struct nf_flowtable *flow_table, + struct flow_offload *flow); + struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple); void nf_flow_table_cleanup(struct net_device *dev); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 4af0327992cf..9a477bd563b7 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -252,6 +252,19 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) } EXPORT_SYMBOL_GPL(flow_offload_add); +void flow_offload_refresh(struct nf_flowtable *flow_table, + struct flow_offload *flow) +{ + flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT; + + if (likely(!nf_flowtable_hw_offload(flow_table) || + !test_and_clear_bit(NF_FLOW_HW_REFRESH, &flow->flags))) + return; + + nf_flow_offload_add(flow_table, flow); +} +EXPORT_SYMBOL_GPL(flow_offload_refresh); + static inline bool nf_flow_has_expired(const struct flow_offload *flow) { return nf_flow_timeout_delta(flow->timeout) <= 0; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 9e563fd3da0f..5272721080f8 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -232,13 +232,6 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, return NF_STOLEN; } -static bool nf_flow_offload_refresh(struct nf_flowtable *flow_table, - struct flow_offload *flow) -{ - return nf_flowtable_hw_offload(flow_table) && - test_and_clear_bit(NF_FLOW_HW_REFRESH, &flow->flags); -} - unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -279,8 +272,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff)) return NF_ACCEPT; - if (unlikely(nf_flow_offload_refresh(flow_table, flow))) - nf_flow_offload_add(flow_table, flow); + flow_offload_refresh(flow_table, flow); if (nf_flow_offload_dst_check(&rt->dst)) { flow_offload_teardown(flow); @@ -290,7 +282,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) return NF_DROP; - flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT; iph = ip_hdr(skb); ip_decrease_ttl(iph); skb->tstamp = 0; @@ -508,8 +499,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, sizeof(*ip6h))) return NF_ACCEPT; - if (unlikely(nf_flow_offload_refresh(flow_table, flow))) - nf_flow_offload_add(flow_table, flow); + flow_offload_refresh(flow_table, flow); if (nf_flow_offload_dst_check(&rt->dst)) { flow_offload_teardown(flow); @@ -522,7 +512,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (nf_flow_nat_ipv6(flow, skb, dir) < 0) return NF_DROP; - flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT; ip6h = ipv6_hdr(skb); ip6h->hop_limit--; skb->tstamp = 0; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 31eef8a847d2..16fc731e5b03 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -531,6 +531,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED : IP_CT_ESTABLISHED_REPLY; + flow_offload_refresh(nf_ft, flow); nf_conntrack_get(&ct->ct_general); nf_ct_set(skb, ct, ctinfo); -- cgit v1.2.3 From edd5861e597b7ec2fae2fa3bc8180164045b5075 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 12 Mar 2020 12:23:09 +0200 Subject: net/sched: act_ct: Enable hardware offload of flow table entires Pass the zone's flow table instance on the flow action to the drivers. Thus, allowing drivers to register FT add/del/stats callbacks. Finally, enable hardware offload on the flow table instance. Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_offload.h | 1 + include/net/tc_act/tc_ct.h | 10 ++++++++++ net/sched/act_ct.c | 2 ++ net/sched/cls_api.c | 1 + 4 files changed, 14 insertions(+) (limited to 'net') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index a039c900c384..ceaa3628796d 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -225,6 +225,7 @@ struct flow_action_entry { struct { /* FLOW_ACTION_CT */ int action; u16 zone; + struct nf_flowtable *flow_table; } ct; struct { unsigned long cookie; diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index 735da5912974..79654bcb9a29 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -27,6 +27,7 @@ struct tcf_ct_params { struct rcu_head rcu; struct tcf_ct_flow_table *ct_ft; + struct nf_flowtable *nf_ft; }; struct tcf_ct { @@ -50,9 +51,18 @@ static inline int tcf_ct_action(const struct tc_action *a) return to_ct_params(a)->ct_action; } +static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) +{ + return to_ct_params(a)->nf_ft; +} + #else static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; } static inline int tcf_ct_action(const struct tc_action *a) { return 0; } +static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) +{ + return NULL; +} #endif /* CONFIG_NF_CONNTRACK */ #if IS_ENABLED(CONFIG_NET_ACT_CT) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 16fc731e5b03..56b66d215a89 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -292,6 +292,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params) goto err_insert; ct_ft->nf_ft.type = &flowtable_ct; + ct_ft->nf_ft.flags |= NF_FLOWTABLE_HW_OFFLOAD; err = nf_flow_table_init(&ct_ft->nf_ft); if (err) goto err_init; @@ -299,6 +300,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params) __module_get(THIS_MODULE); out_unlock: params->ct_ft = ct_ft; + params->nf_ft = &ct_ft->nf_ft; mutex_unlock(&zones_mutex); return 0; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2b5b4ebda842..2046102a763e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3636,6 +3636,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); entry->ct.zone = tcf_ct_zone(act); + entry->ct.flow_table = tcf_ct_ft(act); } else if (is_tcf_mpls(act)) { switch (tcf_mpls_action(act)) { case TCA_MPLS_ACT_PUSH: -- cgit v1.2.3 From ee1c45e87595a1c85d8e348aa896b24f24a356ce Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 12 Mar 2020 12:23:13 +0200 Subject: flow_offload: Add flow_match_ct to get rule ct match Add relevant getter for ct info dissector. Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_offload.h | 6 ++++++ net/core/flow_offload.c | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'net') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index ceaa3628796d..efd8d47f6997 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -69,6 +69,10 @@ struct flow_match_enc_opts { struct flow_dissector_key_enc_opts *key, *mask; }; +struct flow_match_ct { + struct flow_dissector_key_ct *key, *mask; +}; + struct flow_rule; void flow_rule_match_meta(const struct flow_rule *rule, @@ -111,6 +115,8 @@ void flow_rule_match_enc_keyid(const struct flow_rule *rule, struct flow_match_enc_keyid *out); void flow_rule_match_enc_opts(const struct flow_rule *rule, struct flow_match_enc_opts *out); +void flow_rule_match_ct(const struct flow_rule *rule, + struct flow_match_ct *out); enum flow_action_id { FLOW_ACTION_ACCEPT = 0, diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index d21348202ba6..7440e6117c81 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -188,6 +188,13 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie) } EXPORT_SYMBOL(flow_action_cookie_destroy); +void flow_rule_match_ct(const struct flow_rule *rule, + struct flow_match_ct *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CT, out); +} +EXPORT_SYMBOL(flow_rule_match_ct); + struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)) -- cgit v1.2.3 From 98130546da115a8aab663bc0c0971cc0bcc50542 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:07:38 +0100 Subject: ethtool: rename ethnl_parse_header() to ethnl_parse_header_dev_get() Andrew Lunn pointed out that even if it's documented that ethnl_parse_header() takes reference to network device if it fills it into the target structure, its name doesn't make it apparent so that corresponding dev_put() looks like mismatched. Rename the function ethnl_parse_header_dev_get() to indicate that it takes a reference. Suggested-by: Andrew Lunn Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ethtool/debug.c | 6 ++++-- net/ethtool/linkinfo.c | 6 ++++-- net/ethtool/linkmodes.c | 6 ++++-- net/ethtool/netlink.c | 12 ++++++------ net/ethtool/netlink.h | 7 ++++--- net/ethtool/wol.c | 5 +++-- 6 files changed, 25 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c index aaef4843e6ba..87f288ee20c8 100644 --- a/net/ethtool/debug.c +++ b/net/ethtool/debug.c @@ -102,8 +102,10 @@ int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info) info->extack); if (ret < 0) return ret; - ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_DEBUG_HEADER], - genl_info_net(info), info->extack, true); + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_DEBUG_HEADER], + genl_info_net(info), info->extack, + true); if (ret < 0) return ret; dev = req_info.dev; diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c index 5d16cb4e8693..2df420068cbb 100644 --- a/net/ethtool/linkinfo.c +++ b/net/ethtool/linkinfo.c @@ -121,8 +121,10 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info) info->extack); if (ret < 0) return ret; - ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_LINKINFO_HEADER], - genl_info_net(info), info->extack, true); + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_LINKINFO_HEADER], + genl_info_net(info), info->extack, + true); if (ret < 0) return ret; dev = req_info.dev; diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index f049b97072fe..cb29cc8c5960 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -334,8 +334,10 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info) info->extack); if (ret < 0) return ret; - ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_LINKMODES_HEADER], - genl_info_net(info), info->extack, true); + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_LINKMODES_HEADER], + genl_info_net(info), info->extack, + true); if (ret < 0) return ret; dev = req_info.dev; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 180c194fab07..8eca55122ef3 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -18,7 +18,7 @@ static const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_MAX + 1] = { }; /** - * ethnl_parse_header() - parse request header + * ethnl_parse_header_dev_get() - parse request header * @req_info: structure to put results into * @header: nest attribute with request header * @net: request netns @@ -33,9 +33,9 @@ static const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_MAX + 1] = { * * Return: 0 on success or negative error code */ -int ethnl_parse_header(struct ethnl_req_info *req_info, - const struct nlattr *header, struct net *net, - struct netlink_ext_ack *extack, bool require_dev) +int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, + const struct nlattr *header, struct net *net, + struct netlink_ext_ack *extack, bool require_dev) { struct nlattr *tb[ETHTOOL_A_HEADER_MAX + 1]; const struct nlattr *devname_attr; @@ -253,8 +253,8 @@ static int ethnl_default_parse(struct ethnl_req_info *req_info, request_ops->request_policy, extack); if (ret < 0) goto out; - ret = ethnl_parse_header(req_info, tb[request_ops->hdr_attr], net, - extack, require_dev); + ret = ethnl_parse_header_dev_get(req_info, tb[request_ops->hdr_attr], + net, extack, require_dev); if (ret < 0) goto out; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 60efd87686ad..961708290074 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -10,9 +10,10 @@ struct ethnl_req_info; -int ethnl_parse_header(struct ethnl_req_info *req_info, - const struct nlattr *nest, struct net *net, - struct netlink_ext_ack *extack, bool require_dev); +int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, + const struct nlattr *nest, struct net *net, + struct netlink_ext_ack *extack, + bool require_dev); int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev, u16 attrtype); struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd, diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c index e1b8a65b64c4..1d2bcabee554 100644 --- a/net/ethtool/wol.c +++ b/net/ethtool/wol.c @@ -123,8 +123,9 @@ int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info) wol_set_policy, info->extack); if (ret < 0) return ret; - ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_WOL_HEADER], - genl_info_net(info), info->extack, true); + ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_WOL_HEADER], + genl_info_net(info), info->extack, + true); if (ret < 0) return ret; dev = req_info.dev; -- cgit v1.2.3 From f70bb06563ed07e4ba064f2785dba0bef96cd449 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:07:43 +0100 Subject: ethtool: update mapping of features to legacy ioctl requests Legacy ioctl request like ETHTOOL_GTXCSUM are still used by ethtool utility to get values of legacy flags (which rather work as feature groups). These are calculated from values of actual features and request to set them is implemented as an attempt to set all features mapping to them but there are two inconsistencies: - tx-checksum-fcoe-crc is shown under tx-checksumming but NETIF_F_FCOE_CRC is not included in ETHTOOL_GTXCSUM/ETHTOOL_STXCSUM - tx-scatter-gather-fraglist is shown under scatter-gather but NETIF_F_FRAGLIST is not included in ETHTOOL_GSG/ETHTOOL_SSG As the mapping in ethtool output is more correct from logical point of view, fix ethtool_get_feature_mask() to match it. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ethtool/ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index b2684ffa26de..ae97c82c7052 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -198,13 +198,14 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) switch (eth_cmd) { case ETHTOOL_GTXCSUM: case ETHTOOL_STXCSUM: - return NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC; + return NETIF_F_CSUM_MASK | NETIF_F_FCOE_CRC_BIT | + NETIF_F_SCTP_CRC; case ETHTOOL_GRXCSUM: case ETHTOOL_SRXCSUM: return NETIF_F_RXCSUM; case ETHTOOL_GSG: case ETHTOOL_SSG: - return NETIF_F_SG; + return NETIF_F_SG | NETIF_F_FRAGLIST; case ETHTOOL_GTSO: case ETHTOOL_STSO: return NETIF_F_ALL_TSO; -- cgit v1.2.3 From 0524399d4612f5af38b8383680dde4df4bc4eea2 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:07:48 +0100 Subject: ethtool: provide netdev features with FEATURES_GET request Implement FEATURES_GET request to get network device features. These are traditionally available via ETHTOOL_GFEATURES ioctl request. v2: - style cleanup suggested by Jakub Kicinski Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 51 +++++++++-- include/uapi/linux/ethtool_netlink.h | 17 ++++ net/ethtool/Makefile | 2 +- net/ethtool/common.h | 2 + net/ethtool/features.c | 131 +++++++++++++++++++++++++++ net/ethtool/ioctl.c | 2 - net/ethtool/netlink.c | 8 ++ net/ethtool/netlink.h | 1 + 8 files changed, 202 insertions(+), 12 deletions(-) create mode 100644 net/ethtool/features.c (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index f1f868479ceb..5713abf98534 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -189,6 +189,7 @@ Userspace to kernel: ``ETHTOOL_MSG_DEBUG_SET`` set debugging settings ``ETHTOOL_MSG_WOL_GET`` get wake-on-lan settings ``ETHTOOL_MSG_WOL_SET`` set wake-on-lan settings + ``ETHTOOL_MSG_FEATURES_GET`` get device features ===================================== ================================ Kernel to userspace: @@ -204,6 +205,7 @@ Kernel to userspace: ``ETHTOOL_MSG_DEBUG_NTF`` debugging settings notification ``ETHTOOL_MSG_WOL_GET_REPLY`` wake-on-lan settings ``ETHTOOL_MSG_WOL_NTF`` wake-on-lan settings notification + ``ETHTOOL_MSG_FEATURES_GET_REPLY`` device features ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -521,6 +523,37 @@ Request contents: ``WAKE_MAGICSECURE`` mode. +FEATURES_GET +============ + +Gets netdev features like ``ETHTOOL_GFEATURES`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested reply header + ``ETHTOOL_A_FEATURES_HW`` bitset dev->hw_features + ``ETHTOOL_A_FEATURES_WANTED`` bitset dev->wanted_features + ``ETHTOOL_A_FEATURES_ACTIVE`` bitset dev->features + ``ETHTOOL_A_FEATURES_NOCHANGE`` bitset NETIF_F_NEVER_CHANGE + ==================================== ====== ========================== + +Bitmaps in kernel response have the same meaning as bitmaps used in ioctl +interference but attribute names are different (they are based on +corresponding members of struct net_device). Legacy "flags" are not provided, +if userspace needs them (most likely only ethtool for backward compatibility), +it can calculate their values from related feature bits itself. +ETHA_FEATURES_HW uses mask consisting of all features recognized by kernel (to +provide all names when using verbose bitmap format), the other three use no +mask (simple bit lists). + + Request translation =================== @@ -551,30 +584,30 @@ have their netlink replacement yet. ``ETHTOOL_SRINGPARAM`` n/a ``ETHTOOL_GPAUSEPARAM`` n/a ``ETHTOOL_SPAUSEPARAM`` n/a - ``ETHTOOL_GRXCSUM`` n/a + ``ETHTOOL_GRXCSUM`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SRXCSUM`` n/a - ``ETHTOOL_GTXCSUM`` n/a + ``ETHTOOL_GTXCSUM`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_STXCSUM`` n/a - ``ETHTOOL_GSG`` n/a + ``ETHTOOL_GSG`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SSG`` n/a ``ETHTOOL_TEST`` n/a ``ETHTOOL_GSTRINGS`` ``ETHTOOL_MSG_STRSET_GET`` ``ETHTOOL_PHYS_ID`` n/a ``ETHTOOL_GSTATS`` n/a - ``ETHTOOL_GTSO`` n/a + ``ETHTOOL_GTSO`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_STSO`` n/a ``ETHTOOL_GPERMADDR`` rtnetlink ``RTM_GETLINK`` - ``ETHTOOL_GUFO`` n/a + ``ETHTOOL_GUFO`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SUFO`` n/a - ``ETHTOOL_GGSO`` n/a + ``ETHTOOL_GGSO`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SGSO`` n/a - ``ETHTOOL_GFLAGS`` n/a + ``ETHTOOL_GFLAGS`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SFLAGS`` n/a ``ETHTOOL_GPFLAGS`` n/a ``ETHTOOL_SPFLAGS`` n/a ``ETHTOOL_GRXFH`` n/a ``ETHTOOL_SRXFH`` n/a - ``ETHTOOL_GGRO`` n/a + ``ETHTOOL_GGRO`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SGRO`` n/a ``ETHTOOL_GRXRINGS`` n/a ``ETHTOOL_GRXCLSRLCNT`` n/a @@ -589,7 +622,7 @@ have their netlink replacement yet. ``ETHTOOL_GSSET_INFO`` ``ETHTOOL_MSG_STRSET_GET`` ``ETHTOOL_GRXFHINDIR`` n/a ``ETHTOOL_SRXFHINDIR`` n/a - ``ETHTOOL_GFEATURES`` n/a + ``ETHTOOL_GFEATURES`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SFEATURES`` n/a ``ETHTOOL_GCHANNELS`` n/a ``ETHTOOL_SCHANNELS`` n/a diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 7e0b460f872c..d0cc7a0334c8 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -24,6 +24,7 @@ enum { ETHTOOL_MSG_DEBUG_SET, ETHTOOL_MSG_WOL_GET, ETHTOOL_MSG_WOL_SET, + ETHTOOL_MSG_FEATURES_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -43,6 +44,7 @@ enum { ETHTOOL_MSG_DEBUG_NTF, ETHTOOL_MSG_WOL_GET_REPLY, ETHTOOL_MSG_WOL_NTF, + ETHTOOL_MSG_FEATURES_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -228,6 +230,21 @@ enum { ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 }; +/* FEATURES */ + +enum { + ETHTOOL_A_FEATURES_UNSPEC, + ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_FEATURES_HW, /* bitset */ + ETHTOOL_A_FEATURES_WANTED, /* bitset */ + ETHTOOL_A_FEATURES_ACTIVE, /* bitset */ + ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_FEATURES_CNT, + ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 424545a4aaec..5be8c9ab26d1 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -5,4 +5,4 @@ obj-y += ioctl.o common.o obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ - linkstate.o debug.o wol.o + linkstate.o debug.o wol.o features.o diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 40ba74e0b9bb..7dc1163800a7 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -6,6 +6,8 @@ #include #include +#define ETHTOOL_DEV_FEATURE_WORDS DIV_ROUND_UP(NETDEV_FEATURE_COUNT, 32) + /* compose link mode index from speed, type and duplex */ #define ETHTOOL_LINK_MODE(speed, type, duplex) \ ETHTOOL_LINK_MODE_ ## speed ## base ## type ## _ ## duplex ## _BIT diff --git a/net/ethtool/features.c b/net/ethtool/features.c new file mode 100644 index 000000000000..a0cc2b969053 --- /dev/null +++ b/net/ethtool/features.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" +#include "bitset.h" + +struct features_req_info { + struct ethnl_req_info base; +}; + +struct features_reply_data { + struct ethnl_reply_data base; + u32 hw[ETHTOOL_DEV_FEATURE_WORDS]; + u32 wanted[ETHTOOL_DEV_FEATURE_WORDS]; + u32 active[ETHTOOL_DEV_FEATURE_WORDS]; + u32 nochange[ETHTOOL_DEV_FEATURE_WORDS]; + u32 all[ETHTOOL_DEV_FEATURE_WORDS]; +}; + +#define FEATURES_REPDATA(__reply_base) \ + container_of(__reply_base, struct features_reply_data, base) + +static const struct nla_policy +features_get_policy[ETHTOOL_A_FEATURES_MAX + 1] = { + [ETHTOOL_A_FEATURES_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_FEATURES_HW] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_WANTED] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_ACTIVE] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_NOCHANGE] = { .type = NLA_REJECT }, +}; + +static void ethnl_features_to_bitmap32(u32 *dest, netdev_features_t src) +{ + unsigned int i; + + for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; i++) + dest[i] = src >> (32 * i); +} + +static int features_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct features_reply_data *data = FEATURES_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + netdev_features_t all_features; + + ethnl_features_to_bitmap32(data->hw, dev->hw_features); + ethnl_features_to_bitmap32(data->wanted, dev->wanted_features); + ethnl_features_to_bitmap32(data->active, dev->features); + ethnl_features_to_bitmap32(data->nochange, NETIF_F_NEVER_CHANGE); + all_features = GENMASK_ULL(NETDEV_FEATURE_COUNT - 1, 0); + ethnl_features_to_bitmap32(data->all, all_features); + + return 0; +} + +static int features_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct features_reply_data *data = FEATURES_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + unsigned int len = 0; + int ret; + + ret = ethnl_bitset32_size(data->hw, data->all, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + len += ret; + ret = ethnl_bitset32_size(data->wanted, NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + len += ret; + ret = ethnl_bitset32_size(data->active, NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + len += ret; + ret = ethnl_bitset32_size(data->nochange, NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + len += ret; + + return len; +} + +static int features_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct features_reply_data *data = FEATURES_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + int ret; + + ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_HW, data->hw, + data->all, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_WANTED, data->wanted, + NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_ACTIVE, data->active, + NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + return ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_NOCHANGE, + data->nochange, NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); +} + +const struct ethnl_request_ops ethnl_features_request_ops = { + .request_cmd = ETHTOOL_MSG_FEATURES_GET, + .reply_cmd = ETHTOOL_MSG_FEATURES_GET_REPLY, + .hdr_attr = ETHTOOL_A_FEATURES_HEADER, + .max_attr = ETHTOOL_A_FEATURES_MAX, + .req_info_size = sizeof(struct features_req_info), + .reply_data_size = sizeof(struct features_reply_data), + .request_policy = features_get_policy, + + .prepare_data = features_prepare_data, + .reply_size = features_reply_size, + .fill_reply = features_fill_reply, +}; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index ae97c82c7052..45d1bf1764b7 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -56,8 +56,6 @@ EXPORT_SYMBOL(ethtool_op_get_ts_info); /* Handlers for each ethtool command */ -#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32) - static int ethtool_get_features(struct net_device *dev, void __user *useraddr) { struct ethtool_gfeatures cmd = { diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 8eca55122ef3..e451a75e9577 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -215,6 +215,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_LINKSTATE_GET] = ðnl_linkstate_request_ops, [ETHTOOL_MSG_DEBUG_GET] = ðnl_debug_request_ops, [ETHTOOL_MSG_WOL_GET] = ðnl_wol_request_ops, + [ETHTOOL_MSG_FEATURES_GET] = ðnl_features_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -695,6 +696,13 @@ static const struct genl_ops ethtool_genl_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .doit = ethnl_set_wol, }, + { + .cmd = ETHTOOL_MSG_FEATURES_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 961708290074..be2325ea8493 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -337,6 +337,7 @@ extern const struct ethnl_request_ops ethnl_linkmodes_request_ops; extern const struct ethnl_request_ops ethnl_linkstate_request_ops; extern const struct ethnl_request_ops ethnl_debug_request_ops; extern const struct ethnl_request_ops ethnl_wol_request_ops; +extern const struct ethnl_request_ops ethnl_features_request_ops; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); -- cgit v1.2.3 From 88db6d1e4f6222d22c1c4b4d4d7166cfa9d2fe0e Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:07:53 +0100 Subject: ethtool: add ethnl_parse_bitset() helper Unlike other SET type commands, modifying netdev features is required to provide a reply telling userspace what was actually changed, compared to what was requested. For that purpose, the "modified" flag provided by ethnl_update_bitset() is not sufficient, we need full information which bits were requested to change. Therefore provide ethnl_parse_bitset() returning effective value and mask bitmaps equivalent to the contents of a bitset nested attribute. v2: use non-atomic __set_bit() (suggested by David Miller) Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ethtool/bitset.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++ net/ethtool/bitset.h | 4 +++ 2 files changed, 98 insertions(+) (limited to 'net') diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index ef9197541cb3..dae7402eaca3 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -588,6 +588,100 @@ int ethnl_update_bitset32(u32 *bitmap, unsigned int nbits, return 0; } +/** + * ethnl_parse_bitset() - Compute effective value and mask from bitset nest + * @val: unsigned long based bitmap to put value into + * @mask: unsigned long based bitmap to put mask into + * @nbits: size of @val and @mask bitmaps + * @attr: nest attribute to parse and apply + * @names: array of bit names; may be null for compact format + * @extack: extack for error reporting + * + * Provide @nbits size long bitmaps for value and mask so that + * x = (val & mask) | (x & ~mask) would modify any @nbits sized bitmap x + * the same way ethnl_update_bitset() with the same bitset attribute would. + * + * Return: negative error code on failure, 0 on success + */ +int ethnl_parse_bitset(unsigned long *val, unsigned long *mask, + unsigned int nbits, const struct nlattr *attr, + ethnl_string_array_t names, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[ETHTOOL_A_BITSET_MAX + 1]; + const struct nlattr *bit_attr; + bool no_mask; + int rem; + int ret; + + if (!attr) + return 0; + ret = nla_parse_nested(tb, ETHTOOL_A_BITSET_MAX, attr, bitset_policy, + extack); + if (ret < 0) + return ret; + no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; + + if (!tb[ETHTOOL_A_BITSET_BITS]) { + unsigned int change_bits; + + ret = ethnl_compact_sanity_checks(nbits, attr, tb, extack); + if (ret < 0) + return ret; + + change_bits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]); + bitmap_from_arr32(val, nla_data(tb[ETHTOOL_A_BITSET_VALUE]), + change_bits); + if (change_bits < nbits) + bitmap_clear(val, change_bits, nbits - change_bits); + if (no_mask) { + bitmap_fill(mask, nbits); + } else { + bitmap_from_arr32(mask, + nla_data(tb[ETHTOOL_A_BITSET_MASK]), + change_bits); + if (change_bits < nbits) + bitmap_clear(mask, change_bits, + nbits - change_bits); + } + + return 0; + } + + if (tb[ETHTOOL_A_BITSET_VALUE]) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_VALUE], + "value only allowed in compact bitset"); + return -EINVAL; + } + if (tb[ETHTOOL_A_BITSET_MASK]) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK], + "mask only allowed in compact bitset"); + return -EINVAL; + } + + bitmap_zero(val, nbits); + if (no_mask) + bitmap_fill(mask, nbits); + else + bitmap_zero(mask, nbits); + + nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) { + unsigned int idx; + bool bit_val; + + ret = ethnl_parse_bit(&idx, &bit_val, nbits, bit_attr, no_mask, + names, extack); + if (ret < 0) + return ret; + if (bit_val) + __set_bit(idx, val); + if (!no_mask) + __set_bit(idx, mask); + } + + return 0; +} + #if BITS_PER_LONG == 64 && defined(__BIG_ENDIAN) /* 64-bit big endian architectures are the only case when u32 based bitmaps diff --git a/net/ethtool/bitset.h b/net/ethtool/bitset.h index b849f9d19676..c2c2e0051d00 100644 --- a/net/ethtool/bitset.h +++ b/net/ethtool/bitset.h @@ -26,5 +26,9 @@ int ethnl_update_bitset(unsigned long *bitmap, unsigned int nbits, int ethnl_update_bitset32(u32 *bitmap, unsigned int nbits, const struct nlattr *attr, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod); +int ethnl_parse_bitset(unsigned long *val, unsigned long *mask, + unsigned int nbits, const struct nlattr *attr, + ethnl_string_array_t names, + struct netlink_ext_ack *extack); #endif /* _NET_ETHTOOL_BITSET_H */ -- cgit v1.2.3 From 0980bfcd6954f124e40a000b85335c197764de14 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:07:58 +0100 Subject: ethtool: set netdev features with FEATURES_SET request Implement FEATURES_SET netlink request to set network device features. These are traditionally set using ETHTOOL_SFEATURES ioctl request. Actual change is subject to netdev_change_features() sanity checks so that it can differ from what was requested. Unlike with most other SET requests, in addition to error code and optional extack, kernel provides an optional reply message (ETHTOOL_MSG_FEATURES_SET_REPLY) in the same format but with different semantics: information about difference between user request and actual result and difference between old and new state of dev->features. This reply message can be suppressed by setting ETHTOOL_FLAG_OMIT_REPLY flag in request header. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 56 +++++++-- include/uapi/linux/ethtool_netlink.h | 2 + net/ethtool/features.c | 169 +++++++++++++++++++++++++++ net/ethtool/netlink.c | 5 + net/ethtool/netlink.h | 1 + 5 files changed, 224 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 5713abf98534..d6706c4aa972 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -190,6 +190,7 @@ Userspace to kernel: ``ETHTOOL_MSG_WOL_GET`` get wake-on-lan settings ``ETHTOOL_MSG_WOL_SET`` set wake-on-lan settings ``ETHTOOL_MSG_FEATURES_GET`` get device features + ``ETHTOOL_MSG_FEATURES_SET`` set device features ===================================== ================================ Kernel to userspace: @@ -206,6 +207,7 @@ Kernel to userspace: ``ETHTOOL_MSG_WOL_GET_REPLY`` wake-on-lan settings ``ETHTOOL_MSG_WOL_NTF`` wake-on-lan settings notification ``ETHTOOL_MSG_FEATURES_GET_REPLY`` device features + ``ETHTOOL_MSG_FEATURES_SET_REPLY`` optional reply to FEATURES_SET ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -554,6 +556,42 @@ provide all names when using verbose bitmap format), the other three use no mask (simple bit lists). +FEATURES_SET +============ + +Request to set netdev features like ``ETHTOOL_SFEATURES`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested request header + ``ETHTOOL_A_FEATURES_WANTED`` bitset requested features + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested reply header + ``ETHTOOL_A_FEATURES_WANTED`` bitset diff wanted vs. result + ``ETHTOOL_A_FEATURES_ACTIVE`` bitset diff old vs. new active + ==================================== ====== ========================== + +Request constains only one bitset which can be either value/mask pair (request +to change specific feature bits and leave the rest) or only a value (request +to set all features to specified set). + +As request is subject to netdev_change_features() sanity checks, optional +kernel reply (can be suppressed by ``ETHTOOL_FLAG_OMIT_REPLY`` flag in request +header) informs client about the actual result. ``ETHTOOL_A_FEATURES_WANTED`` +reports the difference between client request and actual result: mask consists +of bits which differ between requested features and result (dev->features +after the operation), value consists of values of these bits in the request +(i.e. negated values from resulting features). ``ETHTOOL_A_FEATURES_ACTIVE`` +reports the difference between old and new dev->features: mask consists of +bits which have changed, values are their values in new dev->features (after +the operation). + + Request translation =================== @@ -585,30 +623,30 @@ have their netlink replacement yet. ``ETHTOOL_GPAUSEPARAM`` n/a ``ETHTOOL_SPAUSEPARAM`` n/a ``ETHTOOL_GRXCSUM`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SRXCSUM`` n/a + ``ETHTOOL_SRXCSUM`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GTXCSUM`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_STXCSUM`` n/a + ``ETHTOOL_STXCSUM`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GSG`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SSG`` n/a + ``ETHTOOL_SSG`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_TEST`` n/a ``ETHTOOL_GSTRINGS`` ``ETHTOOL_MSG_STRSET_GET`` ``ETHTOOL_PHYS_ID`` n/a ``ETHTOOL_GSTATS`` n/a ``ETHTOOL_GTSO`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_STSO`` n/a + ``ETHTOOL_STSO`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GPERMADDR`` rtnetlink ``RTM_GETLINK`` ``ETHTOOL_GUFO`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SUFO`` n/a + ``ETHTOOL_SUFO`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GGSO`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SGSO`` n/a + ``ETHTOOL_SGSO`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GFLAGS`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SFLAGS`` n/a + ``ETHTOOL_SFLAGS`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GPFLAGS`` n/a ``ETHTOOL_SPFLAGS`` n/a ``ETHTOOL_GRXFH`` n/a ``ETHTOOL_SRXFH`` n/a ``ETHTOOL_GGRO`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SGRO`` n/a + ``ETHTOOL_SGRO`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GRXRINGS`` n/a ``ETHTOOL_GRXCLSRLCNT`` n/a ``ETHTOOL_GRXCLSRULE`` n/a @@ -623,7 +661,7 @@ have their netlink replacement yet. ``ETHTOOL_GRXFHINDIR`` n/a ``ETHTOOL_SRXFHINDIR`` n/a ``ETHTOOL_GFEATURES`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SFEATURES`` n/a + ``ETHTOOL_SFEATURES`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GCHANNELS`` n/a ``ETHTOOL_SCHANNELS`` n/a ``ETHTOOL_SET_DUMP`` n/a diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index d0cc7a0334c8..6f7aaa6b7f42 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -25,6 +25,7 @@ enum { ETHTOOL_MSG_WOL_GET, ETHTOOL_MSG_WOL_SET, ETHTOOL_MSG_FEATURES_GET, + ETHTOOL_MSG_FEATURES_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -45,6 +46,7 @@ enum { ETHTOOL_MSG_WOL_GET_REPLY, ETHTOOL_MSG_WOL_NTF, ETHTOOL_MSG_FEATURES_GET_REPLY, + ETHTOOL_MSG_FEATURES_SET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/features.c b/net/ethtool/features.c index a0cc2b969053..4ac1e05684ce 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -129,3 +129,172 @@ const struct ethnl_request_ops ethnl_features_request_ops = { .reply_size = features_reply_size, .fill_reply = features_fill_reply, }; + +/* FEATURES_SET */ + +static const struct nla_policy +features_set_policy[ETHTOOL_A_FEATURES_MAX + 1] = { + [ETHTOOL_A_FEATURES_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_FEATURES_HW] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_WANTED] = { .type = NLA_NESTED }, + [ETHTOOL_A_FEATURES_ACTIVE] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_NOCHANGE] = { .type = NLA_REJECT }, +}; + +static void ethnl_features_to_bitmap(unsigned long *dest, netdev_features_t val) +{ + const unsigned int words = BITS_TO_LONGS(NETDEV_FEATURE_COUNT); + unsigned int i; + + bitmap_zero(dest, NETDEV_FEATURE_COUNT); + for (i = 0; i < words; i++) + dest[i] = (unsigned long)(val >> (i * BITS_PER_LONG)); +} + +static netdev_features_t ethnl_bitmap_to_features(unsigned long *src) +{ + const unsigned int nft_bits = sizeof(netdev_features_t) * BITS_PER_BYTE; + const unsigned int words = BITS_TO_LONGS(NETDEV_FEATURE_COUNT); + netdev_features_t ret = 0; + unsigned int i; + + for (i = 0; i < words; i++) + ret |= (netdev_features_t)(src[i]) << (i * BITS_PER_LONG); + ret &= ~(netdev_features_t)0 >> (nft_bits - NETDEV_FEATURE_COUNT); + return ret; +} + +static int features_send_reply(struct net_device *dev, struct genl_info *info, + const unsigned long *wanted, + const unsigned long *wanted_mask, + const unsigned long *active, + const unsigned long *active_mask, bool compact) +{ + struct sk_buff *rskb; + void *reply_payload; + int reply_len = 0; + int ret; + + reply_len = ethnl_reply_header_size(); + ret = ethnl_bitset_size(wanted, wanted_mask, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + goto err; + reply_len += ret; + ret = ethnl_bitset_size(active, active_mask, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + goto err; + reply_len += ret; + + ret = -ENOMEM; + rskb = ethnl_reply_init(reply_len, dev, ETHTOOL_MSG_FEATURES_SET_REPLY, + ETHTOOL_A_FEATURES_HEADER, info, + &reply_payload); + if (!rskb) + goto err; + + ret = ethnl_put_bitset(rskb, ETHTOOL_A_FEATURES_WANTED, wanted, + wanted_mask, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + goto nla_put_failure; + ret = ethnl_put_bitset(rskb, ETHTOOL_A_FEATURES_ACTIVE, active, + active_mask, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + goto nla_put_failure; + + genlmsg_end(rskb, reply_payload); + ret = genlmsg_reply(rskb, info); + return ret; + +nla_put_failure: + nlmsg_free(rskb); + WARN_ONCE(1, "calculated message payload length (%d) not sufficient\n", + reply_len); +err: + GENL_SET_ERR_MSG(info, "failed to send reply message"); + return ret; +} + +int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) +{ + DECLARE_BITMAP(wanted_diff_mask, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(active_diff_mask, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(old_active, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(new_active, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(req_wanted, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(req_mask, NETDEV_FEATURE_COUNT); + struct nlattr *tb[ETHTOOL_A_FEATURES_MAX + 1]; + struct ethnl_req_info req_info = {}; + struct net_device *dev; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_FEATURES_MAX, features_set_policy, + info->extack); + if (ret < 0) + return ret; + if (!tb[ETHTOOL_A_FEATURES_WANTED]) + return -EINVAL; + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_FEATURES_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + + rtnl_lock(); + ethnl_features_to_bitmap(old_active, dev->features); + ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT, + tb[ETHTOOL_A_FEATURES_WANTED], + netdev_features_strings, info->extack); + if (ret < 0) + goto out_rtnl; + if (ethnl_bitmap_to_features(req_mask) & ~NETIF_F_ETHTOOL_BITS) { + GENL_SET_ERR_MSG(info, "attempt to change non-ethtool features"); + ret = -EINVAL; + goto out_rtnl; + } + + /* set req_wanted bits not in req_mask from old_active */ + bitmap_and(req_wanted, req_wanted, req_mask, NETDEV_FEATURE_COUNT); + bitmap_andnot(new_active, old_active, req_mask, NETDEV_FEATURE_COUNT); + bitmap_or(req_wanted, new_active, req_wanted, NETDEV_FEATURE_COUNT); + if (bitmap_equal(req_wanted, old_active, NETDEV_FEATURE_COUNT)) { + ret = 0; + goto out_rtnl; + } + + dev->wanted_features = ethnl_bitmap_to_features(req_wanted); + __netdev_update_features(dev); + ethnl_features_to_bitmap(new_active, dev->features); + + ret = 0; + if (!(req_info.flags & ETHTOOL_FLAG_OMIT_REPLY)) { + bool compact = req_info.flags & ETHTOOL_FLAG_COMPACT_BITSETS; + + bitmap_xor(wanted_diff_mask, req_wanted, new_active, + NETDEV_FEATURE_COUNT); + bitmap_xor(active_diff_mask, old_active, new_active, + NETDEV_FEATURE_COUNT); + bitmap_and(wanted_diff_mask, wanted_diff_mask, req_mask, + NETDEV_FEATURE_COUNT); + bitmap_and(req_wanted, req_wanted, wanted_diff_mask, + NETDEV_FEATURE_COUNT); + bitmap_and(new_active, new_active, active_diff_mask, + NETDEV_FEATURE_COUNT); + + ret = features_send_reply(dev, info, req_wanted, + wanted_diff_mask, new_active, + active_diff_mask, compact); + } + +out_rtnl: + rtnl_unlock(); + dev_put(dev); + return ret; +} diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index e451a75e9577..757ea3fc98a0 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -703,6 +703,11 @@ static const struct genl_ops ethtool_genl_ops[] = { .dumpit = ethnl_default_dumpit, .done = ethnl_default_done, }, + { + .cmd = ETHTOOL_MSG_FEATURES_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_features, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index be2325ea8493..135836201e89 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -343,5 +343,6 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info); int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ -- cgit v1.2.3 From 9c6451ef4881c2f528c625766e8bb9af5e40941a Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:03 +0100 Subject: ethtool: add FEATURES_NTF notification Send ETHTOOL_MSG_FEATURES_NTF notification whenever network device features are modified using ETHTOOL_MSG_FEATURES_SET netlink message, ethtool ioctl request or any other way resulting in call to netdev_update_features() or netdev_change_features() Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 6 ++++++ include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/features.c | 4 ++++ net/ethtool/netlink.c | 29 +++++++++++++++++++++++++++- 4 files changed, 39 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index d6706c4aa972..47542f042e9d 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -208,6 +208,7 @@ Kernel to userspace: ``ETHTOOL_MSG_WOL_NTF`` wake-on-lan settings notification ``ETHTOOL_MSG_FEATURES_GET_REPLY`` device features ``ETHTOOL_MSG_FEATURES_SET_REPLY`` optional reply to FEATURES_SET + ``ETHTOOL_MSG_FEATURES_NTF`` netdev features notification ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -591,6 +592,11 @@ reports the difference between old and new dev->features: mask consists of bits which have changed, values are their values in new dev->features (after the operation). +``ETHTOOL_MSG_FEATURES_NTF`` notification is sent not only if device features +are modified using ``ETHTOOL_MSG_FEATURES_SET`` request or on of ethtool ioctl +request but also each time features are modified with netdev_update_features() +or netdev_change_features(). + Request translation =================== diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 6f7aaa6b7f42..3d0204cf96a6 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -47,6 +47,7 @@ enum { ETHTOOL_MSG_WOL_NTF, ETHTOOL_MSG_FEATURES_GET_REPLY, ETHTOOL_MSG_FEATURES_SET_REPLY, + ETHTOOL_MSG_FEATURES_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/features.c b/net/ethtool/features.c index 4ac1e05684ce..4e632dc987d8 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -230,6 +230,7 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) struct nlattr *tb[ETHTOOL_A_FEATURES_MAX + 1]; struct ethnl_req_info req_info = {}; struct net_device *dev; + bool mod; int ret; ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, @@ -272,6 +273,7 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) dev->wanted_features = ethnl_bitmap_to_features(req_wanted); __netdev_update_features(dev); ethnl_features_to_bitmap(new_active, dev->features); + mod = !bitmap_equal(old_active, new_active, NETDEV_FEATURE_COUNT); ret = 0; if (!(req_info.flags & ETHTOOL_FLAG_OMIT_REPLY)) { @@ -292,6 +294,8 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) wanted_diff_mask, new_active, active_diff_mask, compact); } + if (mod) + ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL); out_rtnl: rtnl_unlock(); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 757ea3fc98a0..5c0e361bfd66 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -528,6 +528,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_LINKMODES_NTF] = ðnl_linkmodes_request_ops, [ETHTOOL_MSG_DEBUG_NTF] = ðnl_debug_request_ops, [ETHTOOL_MSG_WOL_NTF] = ðnl_wol_request_ops, + [ETHTOOL_MSG_FEATURES_NTF] = ðnl_features_request_ops, }; /* default notification handler */ @@ -613,6 +614,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_LINKMODES_NTF] = ethnl_default_notify, [ETHTOOL_MSG_DEBUG_NTF] = ethnl_default_notify, [ETHTOOL_MSG_WOL_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_FEATURES_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) @@ -630,6 +632,29 @@ void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) } EXPORT_SYMBOL(ethtool_notify); +static void ethnl_notify_features(struct netdev_notifier_info *info) +{ + struct net_device *dev = netdev_notifier_info_to_dev(info); + + ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL); +} + +static int ethnl_netdev_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + switch (event) { + case NETDEV_FEAT_CHANGE: + ethnl_notify_features(ptr); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block ethnl_netdev_notifier = { + .notifier_call = ethnl_netdev_event, +}; + /* genetlink setup */ static const struct genl_ops ethtool_genl_ops[] = { @@ -736,7 +761,9 @@ static int __init ethnl_init(void) return ret; ethnl_ok = true; - return 0; + ret = register_netdevice_notifier(ðnl_netdev_notifier); + WARN(ret < 0, "ethtool: net device notifier registration failed"); + return ret; } subsys_initcall(ethnl_init); -- cgit v1.2.3 From e16c3386fc4d9611b6a2abaa639318c7ae793370 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:08 +0100 Subject: ethtool: provide private flags with PRIVFLAGS_GET request Implement PRIVFLAGS_GET request to get private flags for a network device. These are traditionally available via ETHTOOL_GPFLAGS ioctl request. Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 30 +++++- include/uapi/linux/ethtool_netlink.h | 14 +++ net/ethtool/Makefile | 2 +- net/ethtool/netlink.c | 8 ++ net/ethtool/netlink.h | 1 + net/ethtool/privflags.c | 136 +++++++++++++++++++++++++++ 6 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 net/ethtool/privflags.c (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 47542f042e9d..7bba4c940ef7 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -191,6 +191,7 @@ Userspace to kernel: ``ETHTOOL_MSG_WOL_SET`` set wake-on-lan settings ``ETHTOOL_MSG_FEATURES_GET`` get device features ``ETHTOOL_MSG_FEATURES_SET`` set device features + ``ETHTOOL_MSG_PRIVFLAGS_GET`` get private flags ===================================== ================================ Kernel to userspace: @@ -209,6 +210,7 @@ Kernel to userspace: ``ETHTOOL_MSG_FEATURES_GET_REPLY`` device features ``ETHTOOL_MSG_FEATURES_SET_REPLY`` optional reply to FEATURES_SET ``ETHTOOL_MSG_FEATURES_NTF`` netdev features notification + ``ETHTOOL_MSG_PRIVFLAGS_GET_REPLY`` private flags ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -598,6 +600,32 @@ request but also each time features are modified with netdev_update_features() or netdev_change_features(). +PRIVFLAGS_GET +============= + +Gets private flags like ``ETHTOOL_GPFLAGS`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PRIVFLAGS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PRIVFLAGS_HEADER`` nested reply header + ``ETHTOOL_A_PRIVFLAGS_FLAGS`` bitset private flags + ==================================== ====== ========================== + +``ETHTOOL_A_PRIVFLAGS_FLAGS`` is a bitset with values of device private flags. +These flags are defined by driver, their number and names (and also meaning) +are device dependent. For compact bitset format, names can be retrieved as +``ETH_SS_PRIV_FLAGS`` string set. If verbose bitset format is requested, +response uses all private flags supported by the device as mask so that client +gets the full information without having to fetch the string set with names. + + Request translation =================== @@ -647,7 +675,7 @@ have their netlink replacement yet. ``ETHTOOL_SGSO`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GFLAGS`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SFLAGS`` ``ETHTOOL_MSG_FEATURES_SET`` - ``ETHTOOL_GPFLAGS`` n/a + ``ETHTOOL_GPFLAGS`` ``ETHTOOL_MSG_PRIVFLAGS_GET`` ``ETHTOOL_SPFLAGS`` n/a ``ETHTOOL_GRXFH`` n/a ``ETHTOOL_SRXFH`` n/a diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 3d0204cf96a6..d94bbf5e4d1c 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -26,6 +26,7 @@ enum { ETHTOOL_MSG_WOL_SET, ETHTOOL_MSG_FEATURES_GET, ETHTOOL_MSG_FEATURES_SET, + ETHTOOL_MSG_PRIVFLAGS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -48,6 +49,7 @@ enum { ETHTOOL_MSG_FEATURES_GET_REPLY, ETHTOOL_MSG_FEATURES_SET_REPLY, ETHTOOL_MSG_FEATURES_NTF, + ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -248,6 +250,18 @@ enum { ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 }; +/* PRIVFLAGS */ + +enum { + ETHTOOL_A_PRIVFLAGS_UNSPEC, + ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_PRIVFLAGS_CNT, + ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 5be8c9ab26d1..58708bcc2968 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -5,4 +5,4 @@ obj-y += ioctl.o common.o obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ - linkstate.o debug.o wol.o features.o + linkstate.o debug.o wol.o features.o privflags.o diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 5c0e361bfd66..9cbb1d8b4d23 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -216,6 +216,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_DEBUG_GET] = ðnl_debug_request_ops, [ETHTOOL_MSG_WOL_GET] = ðnl_wol_request_ops, [ETHTOOL_MSG_FEATURES_GET] = ðnl_features_request_ops, + [ETHTOOL_MSG_PRIVFLAGS_GET] = ðnl_privflags_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -733,6 +734,13 @@ static const struct genl_ops ethtool_genl_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .doit = ethnl_set_features, }, + { + .cmd = ETHTOOL_MSG_PRIVFLAGS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 135836201e89..36cf077c3085 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -338,6 +338,7 @@ extern const struct ethnl_request_ops ethnl_linkstate_request_ops; extern const struct ethnl_request_ops ethnl_debug_request_ops; extern const struct ethnl_request_ops ethnl_wol_request_ops; extern const struct ethnl_request_ops ethnl_features_request_ops; +extern const struct ethnl_request_ops ethnl_privflags_request_ops; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c new file mode 100644 index 000000000000..169dd4a832f6 --- /dev/null +++ b/net/ethtool/privflags.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" +#include "bitset.h" + +struct privflags_req_info { + struct ethnl_req_info base; +}; + +struct privflags_reply_data { + struct ethnl_reply_data base; + const char (*priv_flag_names)[ETH_GSTRING_LEN]; + unsigned int n_priv_flags; + u32 priv_flags; +}; + +#define PRIVFLAGS_REPDATA(__reply_base) \ + container_of(__reply_base, struct privflags_reply_data, base) + +static const struct nla_policy +privflags_get_policy[ETHTOOL_A_PRIVFLAGS_MAX + 1] = { + [ETHTOOL_A_PRIVFLAGS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_PRIVFLAGS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_PRIVFLAGS_FLAGS] = { .type = NLA_REJECT }, +}; + +static int ethnl_get_priv_flags_info(struct net_device *dev, + unsigned int *count, + const char (**names)[ETH_GSTRING_LEN]) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + int nflags; + + nflags = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); + if (nflags < 0) + return nflags; + + if (names) { + *names = kcalloc(nflags, ETH_GSTRING_LEN, GFP_KERNEL); + if (!*names) + return -ENOMEM; + ops->get_strings(dev, ETH_SS_PRIV_FLAGS, (u8 *)*names); + } + + /* We can pass more than 32 private flags to userspace via netlink but + * we cannot get more with ethtool_ops::get_priv_flags(). Note that we + * must not adjust nflags before allocating the space for flag names + * as the buffer must be large enough for all flags. + */ + if (WARN_ONCE(nflags > 32, + "device %s reports more than 32 private flags (%d)\n", + netdev_name(dev), nflags)) + nflags = 32; + *count = nflags; + + return 0; +} + +static int privflags_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + const char (*names)[ETH_GSTRING_LEN]; + const struct ethtool_ops *ops; + unsigned int nflags; + int ret; + + ops = dev->ethtool_ops; + if (!ops->get_priv_flags || !ops->get_sset_count || !ops->get_strings) + return -EOPNOTSUPP; + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + + ret = ethnl_get_priv_flags_info(dev, &nflags, &names); + if (ret < 0) + goto out_ops; + data->priv_flags = ops->get_priv_flags(dev); + data->priv_flag_names = names; + data->n_priv_flags = nflags; + +out_ops: + ethnl_ops_complete(dev); + return ret; +} + +static int privflags_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const u32 all_flags = ~(u32)0 >> (32 - data->n_priv_flags); + + return ethnl_bitset32_size(&data->priv_flags, &all_flags, + data->n_priv_flags, + data->priv_flag_names, compact); +} + +static int privflags_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const u32 all_flags = ~(u32)0 >> (32 - data->n_priv_flags); + + return ethnl_put_bitset32(skb, ETHTOOL_A_PRIVFLAGS_FLAGS, + &data->priv_flags, &all_flags, + data->n_priv_flags, data->priv_flag_names, + compact); +} + +static void privflags_cleanup_data(struct ethnl_reply_data *reply_data) +{ + struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_data); + + kfree(data->priv_flag_names); +} + +const struct ethnl_request_ops ethnl_privflags_request_ops = { + .request_cmd = ETHTOOL_MSG_PRIVFLAGS_GET, + .reply_cmd = ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, + .hdr_attr = ETHTOOL_A_PRIVFLAGS_HEADER, + .max_attr = ETHTOOL_A_PRIVFLAGS_MAX, + .req_info_size = sizeof(struct privflags_req_info), + .reply_data_size = sizeof(struct privflags_reply_data), + .request_policy = privflags_get_policy, + + .prepare_data = privflags_prepare_data, + .reply_size = privflags_reply_size, + .fill_reply = privflags_fill_reply, + .cleanup_data = privflags_cleanup_data, +}; -- cgit v1.2.3 From f265d799596aff80fe19b0b3896a4abe13cdb534 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:13 +0100 Subject: ethtool: set device private flags with PRIVFLAGS_SET request Implement PRIVFLAGS_SET netlink request to set private flags of a network device. These are traditionally set with ETHTOOL_SPFLAGS ioctl request. Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 21 ++++++++- include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/netlink.c | 5 ++ net/ethtool/netlink.h | 1 + net/ethtool/privflags.c | 70 ++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 7bba4c940ef7..e553112fa2d7 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -192,6 +192,7 @@ Userspace to kernel: ``ETHTOOL_MSG_FEATURES_GET`` get device features ``ETHTOOL_MSG_FEATURES_SET`` set device features ``ETHTOOL_MSG_PRIVFLAGS_GET`` get private flags + ``ETHTOOL_MSG_PRIVFLAGS_SET`` set private flags ===================================== ================================ Kernel to userspace: @@ -211,6 +212,7 @@ Kernel to userspace: ``ETHTOOL_MSG_FEATURES_SET_REPLY`` optional reply to FEATURES_SET ``ETHTOOL_MSG_FEATURES_NTF`` netdev features notification ``ETHTOOL_MSG_PRIVFLAGS_GET_REPLY`` private flags + ``ETHTOOL_MSG_PRIVFLAGS_NTF`` private flags ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -626,6 +628,23 @@ response uses all private flags supported by the device as mask so that client gets the full information without having to fetch the string set with names. +PRIVFLAGS_SET +============= + +Sets or modifies values of device private flags like ``ETHTOOL_SPFLAGS`` +ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PRIVFLAGS_HEADER`` nested request header + ``ETHTOOL_A_PRIVFLAGS_FLAGS`` bitset private flags + ==================================== ====== ========================== + +``ETHTOOL_A_PRIVFLAGS_FLAGS`` can either set the whole set of private flags or +modify only values of some of them. + + Request translation =================== @@ -676,7 +695,7 @@ have their netlink replacement yet. ``ETHTOOL_GFLAGS`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SFLAGS`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GPFLAGS`` ``ETHTOOL_MSG_PRIVFLAGS_GET`` - ``ETHTOOL_SPFLAGS`` n/a + ``ETHTOOL_SPFLAGS`` ``ETHTOOL_MSG_PRIVFLAGS_SET`` ``ETHTOOL_GRXFH`` n/a ``ETHTOOL_SRXFH`` n/a ``ETHTOOL_GGRO`` ``ETHTOOL_MSG_FEATURES_GET`` diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index d94bbf5e4d1c..13e631c86825 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -27,6 +27,7 @@ enum { ETHTOOL_MSG_FEATURES_GET, ETHTOOL_MSG_FEATURES_SET, ETHTOOL_MSG_PRIVFLAGS_GET, + ETHTOOL_MSG_PRIVFLAGS_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 9cbb1d8b4d23..b6795aad7ccb 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -741,6 +741,11 @@ static const struct genl_ops ethtool_genl_ops[] = { .dumpit = ethnl_default_dumpit, .done = ethnl_default_done, }, + { + .cmd = ETHTOOL_MSG_PRIVFLAGS_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_privflags, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 36cf077c3085..ca789f587ef3 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -345,5 +345,6 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info); int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info); int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c index 169dd4a832f6..dfa76d552277 100644 --- a/net/ethtool/privflags.c +++ b/net/ethtool/privflags.c @@ -134,3 +134,73 @@ const struct ethnl_request_ops ethnl_privflags_request_ops = { .fill_reply = privflags_fill_reply, .cleanup_data = privflags_cleanup_data, }; + +/* PRIVFLAGS_SET */ + +static const struct nla_policy +privflags_set_policy[ETHTOOL_A_PRIVFLAGS_MAX + 1] = { + [ETHTOOL_A_PRIVFLAGS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_PRIVFLAGS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_PRIVFLAGS_FLAGS] = { .type = NLA_NESTED }, +}; + +int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[ETHTOOL_A_PRIVFLAGS_MAX + 1]; + const char (*names)[ETH_GSTRING_LEN] = NULL; + struct ethnl_req_info req_info = {}; + const struct ethtool_ops *ops; + struct net_device *dev; + unsigned int nflags; + bool mod = false; + bool compact; + u32 flags; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_PRIVFLAGS_MAX, privflags_set_policy, + info->extack); + if (ret < 0) + return ret; + if (!tb[ETHTOOL_A_PRIVFLAGS_FLAGS]) + return -EINVAL; + ret = ethnl_bitset_is_compact(tb[ETHTOOL_A_PRIVFLAGS_FLAGS], &compact); + if (ret < 0) + return ret; + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_PRIVFLAGS_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + ops = dev->ethtool_ops; + if (!ops->get_priv_flags || !ops->set_priv_flags || + !ops->get_sset_count || !ops->get_strings) + return -EOPNOTSUPP; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + ret = ethnl_get_priv_flags_info(dev, &nflags, compact ? NULL : &names); + if (ret < 0) + goto out_ops; + flags = ops->get_priv_flags(dev); + + ret = ethnl_update_bitset32(&flags, nflags, + tb[ETHTOOL_A_PRIVFLAGS_FLAGS], names, + info->extack, &mod); + if (ret < 0 || !mod) + goto out_free; + ret = ops->set_priv_flags(dev, flags); + +out_free: + kfree(names); +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); + dev_put(dev); + return ret; +} -- cgit v1.2.3 From 111dcba3c694b4ca7dc8b6abfd575745dd51be3d Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:18 +0100 Subject: ethtool: add PRIVFLAGS_NTF notification Send ETHTOOL_MSG_PRIVFLAGS_NTF notification whenever private flags of a network device are modified using ETHTOOL_MSG_PRIVFLAGS_SET netlink message or ETHTOOL_SPFLAGS ioctl request. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/ioctl.c | 2 ++ net/ethtool/netlink.c | 2 ++ net/ethtool/privflags.c | 3 +++ 4 files changed, 8 insertions(+) (limited to 'net') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 13e631c86825..7f23a7f0fca1 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -51,6 +51,7 @@ enum { ETHTOOL_MSG_FEATURES_SET_REPLY, ETHTOOL_MSG_FEATURES_NTF, ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, + ETHTOOL_MSG_PRIVFLAGS_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 45d1bf1764b7..298822289496 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2716,6 +2716,8 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, dev->ethtool_ops->get_priv_flags); + if (!rc) + ethtool_notify(dev, ETHTOOL_MSG_PRIVFLAGS_NTF, NULL); break; case ETHTOOL_SPFLAGS: rc = ethtool_set_value(dev, useraddr, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index b6795aad7ccb..dec82b0b80bd 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -530,6 +530,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_DEBUG_NTF] = ðnl_debug_request_ops, [ETHTOOL_MSG_WOL_NTF] = ðnl_wol_request_ops, [ETHTOOL_MSG_FEATURES_NTF] = ðnl_features_request_ops, + [ETHTOOL_MSG_PRIVFLAGS_NTF] = ðnl_privflags_request_ops, }; /* default notification handler */ @@ -616,6 +617,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_DEBUG_NTF] = ethnl_default_notify, [ETHTOOL_MSG_WOL_NTF] = ethnl_default_notify, [ETHTOOL_MSG_FEATURES_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_PRIVFLAGS_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c index dfa76d552277..e8f03b33db9b 100644 --- a/net/ethtool/privflags.c +++ b/net/ethtool/privflags.c @@ -194,6 +194,9 @@ int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info) if (ret < 0 || !mod) goto out_free; ret = ops->set_priv_flags(dev, flags); + if (ret < 0) + goto out_free; + ethtool_notify(dev, ETHTOOL_MSG_PRIVFLAGS_NTF, NULL); out_free: kfree(names); -- cgit v1.2.3 From e4a1717b677c5cb285e9b425c569e261084a484c Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:23 +0100 Subject: ethtool: provide ring sizes with RINGS_GET request Implement RINGS_GET request to get ring sizes of a network device. These are traditionally available via ETHTOOL_GRINGPARAM ioctl request. Omit attributes for ring types which are not supported by driver or device (zero reported for maximum). v2: (all suggested by Jakub Kicinski) - minor cleanup in rings_prepare_data() - more descriptive rings_reply_size() - omit attributes with zero max size Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 30 +++++++- include/uapi/linux/ethtool_netlink.h | 21 ++++++ net/ethtool/Makefile | 2 +- net/ethtool/netlink.c | 8 ++ net/ethtool/netlink.h | 1 + net/ethtool/rings.c | 108 +++++++++++++++++++++++++++ 6 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 net/ethtool/rings.c (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index e553112fa2d7..798c2f97d89b 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -193,6 +193,7 @@ Userspace to kernel: ``ETHTOOL_MSG_FEATURES_SET`` set device features ``ETHTOOL_MSG_PRIVFLAGS_GET`` get private flags ``ETHTOOL_MSG_PRIVFLAGS_SET`` set private flags + ``ETHTOOL_MSG_RINGS_GET`` get ring sizes ===================================== ================================ Kernel to userspace: @@ -213,6 +214,7 @@ Kernel to userspace: ``ETHTOOL_MSG_FEATURES_NTF`` netdev features notification ``ETHTOOL_MSG_PRIVFLAGS_GET_REPLY`` private flags ``ETHTOOL_MSG_PRIVFLAGS_NTF`` private flags + ``ETHTOOL_MSG_RINGS_GET_REPLY`` ring sizes ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -645,6 +647,32 @@ Request contents: modify only values of some of them. +RINGS_GET +========= + +Gets ring sizes like ``ETHTOOL_GRINGPARAM`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_HEADER`` nested reply header + ``ETHTOOL_A_RINGS_RX_MAX`` u32 max size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI_MAX`` u32 max size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO_MAX`` u32 max size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX_MAX`` u32 max size of TX ring + ``ETHTOOL_A_RINGS_RX`` u32 size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI`` u32 size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring + ==================================== ====== ========================== + + Request translation =================== @@ -671,7 +699,7 @@ have their netlink replacement yet. ``ETHTOOL_SEEPROM`` n/a ``ETHTOOL_GCOALESCE`` n/a ``ETHTOOL_SCOALESCE`` n/a - ``ETHTOOL_GRINGPARAM`` n/a + ``ETHTOOL_GRINGPARAM`` ``ETHTOOL_MSG_RINGS_GET`` ``ETHTOOL_SRINGPARAM`` n/a ``ETHTOOL_GPAUSEPARAM`` n/a ``ETHTOOL_SPAUSEPARAM`` n/a diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 7f23a7f0fca1..7cd220f8cf73 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -28,6 +28,7 @@ enum { ETHTOOL_MSG_FEATURES_SET, ETHTOOL_MSG_PRIVFLAGS_GET, ETHTOOL_MSG_PRIVFLAGS_SET, + ETHTOOL_MSG_RINGS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -52,6 +53,7 @@ enum { ETHTOOL_MSG_FEATURES_NTF, ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, ETHTOOL_MSG_PRIVFLAGS_NTF, + ETHTOOL_MSG_RINGS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -264,6 +266,25 @@ enum { ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 }; +/* RINGS */ + +enum { + ETHTOOL_A_RINGS_UNSPEC, + ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_RINGS_RX_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */ + ETHTOOL_A_RINGS_TX_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX, /* u32 */ + ETHTOOL_A_RINGS_RX_MINI, /* u32 */ + ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ + ETHTOOL_A_RINGS_TX, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_RINGS_CNT, + ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 58708bcc2968..b48b70ef4ed0 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -5,4 +5,4 @@ obj-y += ioctl.o common.o obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ - linkstate.o debug.o wol.o features.o privflags.o + linkstate.o debug.o wol.o features.o privflags.o rings.o diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index dec82b0b80bd..0dc25a490450 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -217,6 +217,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_WOL_GET] = ðnl_wol_request_ops, [ETHTOOL_MSG_FEATURES_GET] = ðnl_features_request_ops, [ETHTOOL_MSG_PRIVFLAGS_GET] = ðnl_privflags_request_ops, + [ETHTOOL_MSG_RINGS_GET] = ðnl_rings_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -748,6 +749,13 @@ static const struct genl_ops ethtool_genl_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .doit = ethnl_set_privflags, }, + { + .cmd = ETHTOOL_MSG_RINGS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index ca789f587ef3..01761932ed15 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -339,6 +339,7 @@ extern const struct ethnl_request_ops ethnl_debug_request_ops; extern const struct ethnl_request_ops ethnl_wol_request_ops; extern const struct ethnl_request_ops ethnl_features_request_ops; extern const struct ethnl_request_ops ethnl_privflags_request_ops; +extern const struct ethnl_request_ops ethnl_rings_request_ops; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c new file mode 100644 index 000000000000..d3129d8a252d --- /dev/null +++ b/net/ethtool/rings.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" + +struct rings_req_info { + struct ethnl_req_info base; +}; + +struct rings_reply_data { + struct ethnl_reply_data base; + struct ethtool_ringparam ringparam; +}; + +#define RINGS_REPDATA(__reply_base) \ + container_of(__reply_base, struct rings_reply_data, base) + +static const struct nla_policy +rings_get_policy[ETHTOOL_A_RINGS_MAX + 1] = { + [ETHTOOL_A_RINGS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_RINGS_RX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_MINI_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_JUMBO_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_TX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_MINI] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_TX] = { .type = NLA_REJECT }, +}; + +static int rings_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct rings_reply_data *data = RINGS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + int ret; + + if (!dev->ethtool_ops->get_ringparam) + return -EOPNOTSUPP; + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + dev->ethtool_ops->get_ringparam(dev, &data->ringparam); + ethnl_ops_complete(dev); + + return 0; +} + +static int rings_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + return nla_total_size(sizeof(u32)) + /* _RINGS_RX_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_TX_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */ + nla_total_size(sizeof(u32)); /* _RINGS_TX */ +} + +static int rings_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct rings_reply_data *data = RINGS_REPDATA(reply_base); + const struct ethtool_ringparam *ringparam = &data->ringparam; + + if ((ringparam->rx_max_pending && + (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MAX, + ringparam->rx_max_pending) || + nla_put_u32(skb, ETHTOOL_A_RINGS_RX, + ringparam->rx_pending))) || + (ringparam->rx_mini_max_pending && + (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MINI_MAX, + ringparam->rx_mini_max_pending) || + nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MINI, + ringparam->rx_mini_pending))) || + (ringparam->rx_jumbo_max_pending && + (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_JUMBO_MAX, + ringparam->rx_jumbo_max_pending) || + nla_put_u32(skb, ETHTOOL_A_RINGS_RX_JUMBO, + ringparam->rx_jumbo_pending))) || + (ringparam->tx_max_pending && + (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_MAX, + ringparam->tx_max_pending) || + nla_put_u32(skb, ETHTOOL_A_RINGS_TX, + ringparam->tx_pending)))) + return -EMSGSIZE; + + return 0; +} + +const struct ethnl_request_ops ethnl_rings_request_ops = { + .request_cmd = ETHTOOL_MSG_RINGS_GET, + .reply_cmd = ETHTOOL_MSG_RINGS_GET_REPLY, + .hdr_attr = ETHTOOL_A_RINGS_HEADER, + .max_attr = ETHTOOL_A_RINGS_MAX, + .req_info_size = sizeof(struct rings_req_info), + .reply_data_size = sizeof(struct rings_reply_data), + .request_policy = rings_get_policy, + + .prepare_data = rings_prepare_data, + .reply_size = rings_reply_size, + .fill_reply = rings_fill_reply, +}; -- cgit v1.2.3 From 2fc2929e807211a9535a6541f24b57fa1c469728 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:28 +0100 Subject: ethtool: set device ring sizes with RINGS_SET request Implement RINGS_SET netlink request to set ring sizes of a network device. These are traditionally set with ETHTOOL_SRINGPARAM ioctl request. Like the ioctl implementation, the generic ethtool code checks if supplied values do not exceed driver defined limits; if they do, first offending attribute is reported using extack. v2: - fix netdev reference leak in error path (found by Jakub Kicinsky) Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 23 ++++++- include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/netlink.c | 5 ++ net/ethtool/netlink.h | 1 + net/ethtool/rings.c | 89 ++++++++++++++++++++++++++++ 5 files changed, 118 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 798c2f97d89b..ba31ae8f1feb 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -194,6 +194,7 @@ Userspace to kernel: ``ETHTOOL_MSG_PRIVFLAGS_GET`` get private flags ``ETHTOOL_MSG_PRIVFLAGS_SET`` set private flags ``ETHTOOL_MSG_RINGS_GET`` get ring sizes + ``ETHTOOL_MSG_RINGS_SET`` set ring sizes ===================================== ================================ Kernel to userspace: @@ -673,6 +674,26 @@ Kernel response contents: ==================================== ====== ========================== +RINGS_SET +========= + +Sets ring sizes like ``ETHTOOL_SRINGPARAM`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_HEADER`` nested reply header + ``ETHTOOL_A_RINGS_RX`` u32 size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI`` u32 size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring + ==================================== ====== ========================== + +Kernel checks that requested ring sizes do not exceed limits reported by +driver. Driver may impose additional constraints and may not suspport all +attributes. + + Request translation =================== @@ -700,7 +721,7 @@ have their netlink replacement yet. ``ETHTOOL_GCOALESCE`` n/a ``ETHTOOL_SCOALESCE`` n/a ``ETHTOOL_GRINGPARAM`` ``ETHTOOL_MSG_RINGS_GET`` - ``ETHTOOL_SRINGPARAM`` n/a + ``ETHTOOL_SRINGPARAM`` ``ETHTOOL_MSG_RINGS_SET`` ``ETHTOOL_GPAUSEPARAM`` n/a ``ETHTOOL_SPAUSEPARAM`` n/a ``ETHTOOL_GRXCSUM`` ``ETHTOOL_MSG_FEATURES_GET`` diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 7cd220f8cf73..ae71801b7aac 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -29,6 +29,7 @@ enum { ETHTOOL_MSG_PRIVFLAGS_GET, ETHTOOL_MSG_PRIVFLAGS_SET, ETHTOOL_MSG_RINGS_GET, + ETHTOOL_MSG_RINGS_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 0dc25a490450..6a1ac8897a7e 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -756,6 +756,11 @@ static const struct genl_ops ethtool_genl_ops[] = { .dumpit = ethnl_default_dumpit, .done = ethnl_default_done, }, + { + .cmd = ETHTOOL_MSG_RINGS_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_rings, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 01761932ed15..b30426d01890 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -347,5 +347,6 @@ int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info); int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info); int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index d3129d8a252d..93f428e9a6c2 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -106,3 +106,92 @@ const struct ethnl_request_ops ethnl_rings_request_ops = { .reply_size = rings_reply_size, .fill_reply = rings_fill_reply, }; + +/* RINGS_SET */ + +static const struct nla_policy +rings_set_policy[ETHTOOL_A_RINGS_MAX + 1] = { + [ETHTOOL_A_RINGS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_RINGS_RX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_MINI_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_JUMBO_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_TX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_RX_MINI] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, +}; + +int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[ETHTOOL_A_RINGS_MAX + 1]; + struct ethtool_ringparam ringparam = {}; + struct ethnl_req_info req_info = {}; + const struct nlattr *err_attr; + const struct ethtool_ops *ops; + struct net_device *dev; + bool mod = false; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_RINGS_MAX, rings_set_policy, + info->extack); + if (ret < 0) + return ret; + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_RINGS_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + ops = dev->ethtool_ops; + ret = -EOPNOTSUPP; + if (!ops->get_ringparam || !ops->set_ringparam) + goto out_dev; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + ops->get_ringparam(dev, &ringparam); + + ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); + ethnl_update_u32(&ringparam.rx_mini_pending, + tb[ETHTOOL_A_RINGS_RX_MINI], &mod); + ethnl_update_u32(&ringparam.rx_jumbo_pending, + tb[ETHTOOL_A_RINGS_RX_JUMBO], &mod); + ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod); + ret = 0; + if (!mod) + goto out_ops; + + /* ensure new ring parameters are within limits */ + if (ringparam.rx_pending > ringparam.rx_max_pending) + err_attr = tb[ETHTOOL_A_RINGS_RX]; + else if (ringparam.rx_mini_pending > ringparam.rx_mini_max_pending) + err_attr = tb[ETHTOOL_A_RINGS_RX_MINI]; + else if (ringparam.rx_jumbo_pending > ringparam.rx_jumbo_max_pending) + err_attr = tb[ETHTOOL_A_RINGS_RX_JUMBO]; + else if (ringparam.tx_pending > ringparam.tx_max_pending) + err_attr = tb[ETHTOOL_A_RINGS_TX]; + else + err_attr = NULL; + if (err_attr) { + ret = -EINVAL; + NL_SET_ERR_MSG_ATTR(info->extack, err_attr, + "requested ring size exceeeds maximum"); + goto out_ops; + } + + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam); + +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); +out_dev: + dev_put(dev); + return ret; +} -- cgit v1.2.3 From bc9d1c995ecb8cccaac8ead2ed570544c2c885eb Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:33 +0100 Subject: ethtool: add RINGS_NTF notification Send ETHTOOL_MSG_RINGS_NTF notification whenever ring sizes of a network device are modified using ETHTOOL_MSG_RINGS_SET netlink message or ETHTOOL_SRINGPARAM ioctl request. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 1 + include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/ioctl.c | 6 +++++- net/ethtool/netlink.c | 2 ++ net/ethtool/rings.c | 3 +++ 5 files changed, 12 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index ba31ae8f1feb..026a5fd4a08b 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -216,6 +216,7 @@ Kernel to userspace: ``ETHTOOL_MSG_PRIVFLAGS_GET_REPLY`` private flags ``ETHTOOL_MSG_PRIVFLAGS_NTF`` private flags ``ETHTOOL_MSG_RINGS_GET_REPLY`` ring sizes + ``ETHTOOL_MSG_RINGS_NTF`` ring sizes ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index ae71801b7aac..abfc8fd626da 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -55,6 +55,7 @@ enum { ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, ETHTOOL_MSG_PRIVFLAGS_NTF, ETHTOOL_MSG_RINGS_GET_REPLY, + ETHTOOL_MSG_RINGS_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 298822289496..1d5c1b6b81a4 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1635,6 +1635,7 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) { struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM }; + int ret; if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam) return -EOPNOTSUPP; @@ -1651,7 +1652,10 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) ringparam.tx_pending > max.tx_max_pending) return -EINVAL; - return dev->ethtool_ops->set_ringparam(dev, &ringparam); + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam); + if (!ret) + ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF, NULL); + return ret; } static noinline_for_stack int ethtool_get_channels(struct net_device *dev, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 6a1ac8897a7e..653e009216cd 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -532,6 +532,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_WOL_NTF] = ðnl_wol_request_ops, [ETHTOOL_MSG_FEATURES_NTF] = ðnl_features_request_ops, [ETHTOOL_MSG_PRIVFLAGS_NTF] = ðnl_privflags_request_ops, + [ETHTOOL_MSG_RINGS_NTF] = ðnl_rings_request_ops, }; /* default notification handler */ @@ -619,6 +620,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_WOL_NTF] = ethnl_default_notify, [ETHTOOL_MSG_FEATURES_NTF] = ethnl_default_notify, [ETHTOOL_MSG_PRIVFLAGS_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_RINGS_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 93f428e9a6c2..c2ebf72be217 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -186,6 +186,9 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) } ret = dev->ethtool_ops->set_ringparam(dev, &ringparam); + if (ret < 0) + goto out_ops; + ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF, NULL); out_ops: ethnl_ops_complete(dev); -- cgit v1.2.3 From 0c84979c951a85fcb5c77ac2480fd476e283a07c Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:38 +0100 Subject: ethtool: provide channel counts with CHANNELS_GET request Implement CHANNELS_GET request to get channel counts of a network device. These are traditionally available via ETHTOOL_GCHANNELS ioctl request. Omit attributes for channel types which are not supported by driver or device (zero reported for maximum). v2: (all suggested by Jakub Kicinski) - minor cleanup in channels_prepare_data() - more descriptive channels_reply_size() - omit attributes with zero max count Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 30 +++++++- include/uapi/linux/ethtool_netlink.h | 21 ++++++ net/ethtool/Makefile | 3 +- net/ethtool/channels.c | 108 +++++++++++++++++++++++++++ net/ethtool/netlink.c | 8 ++ net/ethtool/netlink.h | 1 + 6 files changed, 169 insertions(+), 2 deletions(-) create mode 100644 net/ethtool/channels.c (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 026a5fd4a08b..decbbddfd8be 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -195,6 +195,7 @@ Userspace to kernel: ``ETHTOOL_MSG_PRIVFLAGS_SET`` set private flags ``ETHTOOL_MSG_RINGS_GET`` get ring sizes ``ETHTOOL_MSG_RINGS_SET`` set ring sizes + ``ETHTOOL_MSG_CHANNELS_GET`` get channel counts ===================================== ================================ Kernel to userspace: @@ -217,6 +218,7 @@ Kernel to userspace: ``ETHTOOL_MSG_PRIVFLAGS_NTF`` private flags ``ETHTOOL_MSG_RINGS_GET_REPLY`` ring sizes ``ETHTOOL_MSG_RINGS_NTF`` ring sizes + ``ETHTOOL_MSG_CHANNELS_GET_REPLY`` channel counts ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -695,6 +697,32 @@ driver. Driver may impose additional constraints and may not suspport all attributes. +CHANNELS_GET +============ + +Gets channel counts like ``ETHTOOL_GCHANNELS`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_CHANNELS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_CHANNELS_HEADER`` nested reply header + ``ETHTOOL_A_CHANNELS_RX_MAX`` u32 max receive channels + ``ETHTOOL_A_CHANNELS_TX_MAX`` u32 max transmit channels + ``ETHTOOL_A_CHANNELS_OTHER_MAX`` u32 max other channels + ``ETHTOOL_A_CHANNELS_COMBINED_MAX`` u32 max combined channels + ``ETHTOOL_A_CHANNELS_RX_COUNT`` u32 receive channel count + ``ETHTOOL_A_CHANNELS_TX_COUNT`` u32 transmit channel count + ``ETHTOOL_A_CHANNELS_OTHER_COUNT`` u32 other channel count + ``ETHTOOL_A_CHANNELS_COMBINED_COUNT`` u32 combined channel count + ===================================== ====== ========================== + + Request translation =================== @@ -765,7 +793,7 @@ have their netlink replacement yet. ``ETHTOOL_SRXFHINDIR`` n/a ``ETHTOOL_GFEATURES`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SFEATURES`` ``ETHTOOL_MSG_FEATURES_SET`` - ``ETHTOOL_GCHANNELS`` n/a + ``ETHTOOL_GCHANNELS`` ``ETHTOOL_MSG_CHANNELS_GET`` ``ETHTOOL_SCHANNELS`` n/a ``ETHTOOL_SET_DUMP`` n/a ``ETHTOOL_GET_DUMP_FLAG`` n/a diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index abfc8fd626da..2270eb115eca 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -30,6 +30,7 @@ enum { ETHTOOL_MSG_PRIVFLAGS_SET, ETHTOOL_MSG_RINGS_GET, ETHTOOL_MSG_RINGS_SET, + ETHTOOL_MSG_CHANNELS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -56,6 +57,7 @@ enum { ETHTOOL_MSG_PRIVFLAGS_NTF, ETHTOOL_MSG_RINGS_GET_REPLY, ETHTOOL_MSG_RINGS_NTF, + ETHTOOL_MSG_CHANNELS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -287,6 +289,25 @@ enum { ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) }; +/* CHANNELS */ + +enum { + ETHTOOL_A_CHANNELS_UNSPEC, + ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_CHANNELS_CNT, + ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index b48b70ef4ed0..b0bd3decad02 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -5,4 +5,5 @@ obj-y += ioctl.o common.o obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ - linkstate.o debug.o wol.o features.o privflags.o rings.o + linkstate.o debug.o wol.o features.o privflags.o rings.o \ + channels.o diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c new file mode 100644 index 000000000000..500dd5ad250b --- /dev/null +++ b/net/ethtool/channels.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" + +struct channels_req_info { + struct ethnl_req_info base; +}; + +struct channels_reply_data { + struct ethnl_reply_data base; + struct ethtool_channels channels; +}; + +#define CHANNELS_REPDATA(__reply_base) \ + container_of(__reply_base, struct channels_reply_data, base) + +static const struct nla_policy +channels_get_policy[ETHTOOL_A_CHANNELS_MAX + 1] = { + [ETHTOOL_A_CHANNELS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_CHANNELS_RX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_TX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_OTHER_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_COMBINED_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_RX_COUNT] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_TX_COUNT] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_OTHER_COUNT] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_COMBINED_COUNT] = { .type = NLA_REJECT }, +}; + +static int channels_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct channels_reply_data *data = CHANNELS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + int ret; + + if (!dev->ethtool_ops->get_channels) + return -EOPNOTSUPP; + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + dev->ethtool_ops->get_channels(dev, &data->channels); + ethnl_ops_complete(dev); + + return 0; +} + +static int channels_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + return nla_total_size(sizeof(u32)) + /* _CHANNELS_RX_MAX */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_TX_MAX */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_OTHER_MAX */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_COMBINED_MAX */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_RX_COUNT */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_TX_COUNT */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_OTHER_COUNT */ + nla_total_size(sizeof(u32)); /* _CHANNELS_COMBINED_COUNT */ +} + +static int channels_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct channels_reply_data *data = CHANNELS_REPDATA(reply_base); + const struct ethtool_channels *channels = &data->channels; + + if ((channels->max_rx && + (nla_put_u32(skb, ETHTOOL_A_CHANNELS_RX_MAX, + channels->max_rx) || + nla_put_u32(skb, ETHTOOL_A_CHANNELS_RX_COUNT, + channels->rx_count))) || + (channels->max_tx && + (nla_put_u32(skb, ETHTOOL_A_CHANNELS_TX_MAX, + channels->max_tx) || + nla_put_u32(skb, ETHTOOL_A_CHANNELS_TX_COUNT, + channels->tx_count))) || + (channels->max_other && + (nla_put_u32(skb, ETHTOOL_A_CHANNELS_OTHER_MAX, + channels->max_other) || + nla_put_u32(skb, ETHTOOL_A_CHANNELS_OTHER_COUNT, + channels->other_count))) || + (channels->max_combined && + (nla_put_u32(skb, ETHTOOL_A_CHANNELS_COMBINED_MAX, + channels->max_combined) || + nla_put_u32(skb, ETHTOOL_A_CHANNELS_COMBINED_COUNT, + channels->combined_count)))) + return -EMSGSIZE; + + return 0; +} + +const struct ethnl_request_ops ethnl_channels_request_ops = { + .request_cmd = ETHTOOL_MSG_CHANNELS_GET, + .reply_cmd = ETHTOOL_MSG_CHANNELS_GET_REPLY, + .hdr_attr = ETHTOOL_A_CHANNELS_HEADER, + .max_attr = ETHTOOL_A_CHANNELS_MAX, + .req_info_size = sizeof(struct channels_req_info), + .reply_data_size = sizeof(struct channels_reply_data), + .request_policy = channels_get_policy, + + .prepare_data = channels_prepare_data, + .reply_size = channels_reply_size, + .fill_reply = channels_fill_reply, +}; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 653e009216cd..ac35513abeed 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -218,6 +218,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_FEATURES_GET] = ðnl_features_request_ops, [ETHTOOL_MSG_PRIVFLAGS_GET] = ðnl_privflags_request_ops, [ETHTOOL_MSG_RINGS_GET] = ðnl_rings_request_ops, + [ETHTOOL_MSG_CHANNELS_GET] = ðnl_channels_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -763,6 +764,13 @@ static const struct genl_ops ethtool_genl_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .doit = ethnl_set_rings, }, + { + .cmd = ETHTOOL_MSG_CHANNELS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index b30426d01890..53bfe720ff1a 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -340,6 +340,7 @@ extern const struct ethnl_request_ops ethnl_wol_request_ops; extern const struct ethnl_request_ops ethnl_features_request_ops; extern const struct ethnl_request_ops ethnl_privflags_request_ops; extern const struct ethnl_request_ops ethnl_rings_request_ops; +extern const struct ethnl_request_ops ethnl_channels_request_ops; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); -- cgit v1.2.3 From e19c591eafad4d723a2eed385e253eca0506cc25 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:43 +0100 Subject: ethtool: set device channel counts with CHANNELS_SET request Implement CHANNELS_SET netlink request to set channel counts of a network device. These are traditionally set with ETHTOOL_SCHANNELS ioctl request. Like the ioctl implementation, the generic ethtool code checks if supplied values do not exceed driver defined limits; if they do, first offending attribute is reported using extack. Checks preventing removing channels used for RX indirection table or zerocopy AF_XDP socket are also implemented. Move ethtool_get_max_rxfh_channel() helper into common.c so that it can be used by both ioctl and netlink code. v2: - fix netdev reference leak in error path (found by Jakub Kicinsky) Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 23 +++++- include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/channels.c | 116 +++++++++++++++++++++++++++ net/ethtool/common.c | 31 +++++++ net/ethtool/common.h | 1 + net/ethtool/ioctl.c | 31 ------- net/ethtool/netlink.c | 5 ++ net/ethtool/netlink.h | 1 + 8 files changed, 177 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index decbbddfd8be..7df7476cf310 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -196,6 +196,7 @@ Userspace to kernel: ``ETHTOOL_MSG_RINGS_GET`` get ring sizes ``ETHTOOL_MSG_RINGS_SET`` set ring sizes ``ETHTOOL_MSG_CHANNELS_GET`` get channel counts + ``ETHTOOL_MSG_CHANNELS_SET`` set channel counts ===================================== ================================ Kernel to userspace: @@ -723,6 +724,26 @@ Kernel response contents: ===================================== ====== ========================== +CHANNELS_SET +============ + +Sets channel counts like ``ETHTOOL_SCHANNELS`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_CHANNELS_HEADER`` nested request header + ``ETHTOOL_A_CHANNELS_RX_COUNT`` u32 receive channel count + ``ETHTOOL_A_CHANNELS_TX_COUNT`` u32 transmit channel count + ``ETHTOOL_A_CHANNELS_OTHER_COUNT`` u32 other channel count + ``ETHTOOL_A_CHANNELS_COMBINED_COUNT`` u32 combined channel count + ===================================== ====== ========================== + +Kernel checks that requested channel counts do not exceed limits reported by +driver. Driver may impose additional constraints and may not suspport all +attributes. + + Request translation =================== @@ -794,7 +815,7 @@ have their netlink replacement yet. ``ETHTOOL_GFEATURES`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SFEATURES`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GCHANNELS`` ``ETHTOOL_MSG_CHANNELS_GET`` - ``ETHTOOL_SCHANNELS`` n/a + ``ETHTOOL_SCHANNELS`` ``ETHTOOL_MSG_CHANNELS_SET`` ``ETHTOOL_SET_DUMP`` n/a ``ETHTOOL_GET_DUMP_FLAG`` n/a ``ETHTOOL_GET_DUMP_DATA`` n/a diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 2270eb115eca..f1384a8f3534 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -31,6 +31,7 @@ enum { ETHTOOL_MSG_RINGS_GET, ETHTOOL_MSG_RINGS_SET, ETHTOOL_MSG_CHANNELS_GET, + ETHTOOL_MSG_CHANNELS_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 500dd5ad250b..ee232c11acae 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include + #include "netlink.h" #include "common.h" @@ -106,3 +108,117 @@ const struct ethnl_request_ops ethnl_channels_request_ops = { .reply_size = channels_reply_size, .fill_reply = channels_fill_reply, }; + +/* CHANNELS_SET */ + +static const struct nla_policy +channels_set_policy[ETHTOOL_A_CHANNELS_MAX + 1] = { + [ETHTOOL_A_CHANNELS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_CHANNELS_RX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_TX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_OTHER_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_COMBINED_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_RX_COUNT] = { .type = NLA_U32 }, + [ETHTOOL_A_CHANNELS_TX_COUNT] = { .type = NLA_U32 }, + [ETHTOOL_A_CHANNELS_OTHER_COUNT] = { .type = NLA_U32 }, + [ETHTOOL_A_CHANNELS_COMBINED_COUNT] = { .type = NLA_U32 }, +}; + +int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[ETHTOOL_A_CHANNELS_MAX + 1]; + unsigned int from_channel, old_total, i; + struct ethtool_channels channels = {}; + struct ethnl_req_info req_info = {}; + const struct nlattr *err_attr; + const struct ethtool_ops *ops; + struct net_device *dev; + u32 max_rx_in_use = 0; + bool mod = false; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_CHANNELS_MAX, channels_set_policy, + info->extack); + if (ret < 0) + return ret; + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_CHANNELS_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + ops = dev->ethtool_ops; + ret = -EOPNOTSUPP; + if (!ops->get_channels || !ops->set_channels) + goto out_dev; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + ops->get_channels(dev, &channels); + old_total = channels.combined_count + + max(channels.rx_count, channels.tx_count); + + ethnl_update_u32(&channels.rx_count, tb[ETHTOOL_A_CHANNELS_RX_COUNT], + &mod); + ethnl_update_u32(&channels.tx_count, tb[ETHTOOL_A_CHANNELS_TX_COUNT], + &mod); + ethnl_update_u32(&channels.other_count, + tb[ETHTOOL_A_CHANNELS_OTHER_COUNT], &mod); + ethnl_update_u32(&channels.combined_count, + tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT], &mod); + ret = 0; + if (!mod) + goto out_ops; + + /* ensure new channel counts are within limits */ + if (channels.rx_count > channels.max_rx) + err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT]; + else if (channels.tx_count > channels.max_tx) + err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT]; + else if (channels.other_count > channels.max_other) + err_attr = tb[ETHTOOL_A_CHANNELS_OTHER_COUNT]; + else if (channels.combined_count > channels.max_combined) + err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT]; + else + err_attr = NULL; + if (err_attr) { + ret = -EINVAL; + NL_SET_ERR_MSG_ATTR(info->extack, err_attr, + "requested channel count exceeeds maximum"); + goto out_ops; + } + + /* ensure the new Rx count fits within the configured Rx flow + * indirection table settings + */ + if (netif_is_rxfh_configured(dev) && + !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) && + (channels.combined_count + channels.rx_count) <= max_rx_in_use) { + GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing indirection table settings"); + return -EINVAL; + } + + /* Disabling channels, query zero-copy AF_XDP sockets */ + from_channel = channels.combined_count + + min(channels.rx_count, channels.tx_count); + for (i = from_channel; i < old_total; i++) + if (xdp_get_umem_from_qid(dev, i)) { + GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets"); + return -EINVAL; + } + + ret = dev->ethtool_ops->set_channels(dev, &channels); + +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); +out_dev: + dev_put(dev); + return ret; +} diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 7b6969af5ae7..0b22741b2f8f 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -258,3 +258,34 @@ int __ethtool_get_link(struct net_device *dev) return netif_running(dev) && dev->ethtool_ops->get_link(dev); } + +int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) +{ + u32 dev_size, current_max = 0; + u32 *indir; + int ret; + + if (!dev->ethtool_ops->get_rxfh_indir_size || + !dev->ethtool_ops->get_rxfh) + return -EOPNOTSUPP; + dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); + if (dev_size == 0) + return -EOPNOTSUPP; + + indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); + if (!indir) + return -ENOMEM; + + ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); + if (ret) + goto out; + + while (dev_size--) + current_max = max(current_max, indir[dev_size]); + + *max = current_max; + +out: + kfree(indir); + return ret; +} diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 7dc1163800a7..03946e16e623 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -29,5 +29,6 @@ int __ethtool_get_link(struct net_device *dev); bool convert_legacy_settings_to_link_ksettings( struct ethtool_link_ksettings *link_ksettings, const struct ethtool_cmd *legacy_settings); +int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max); #endif /* _ETHTOOL_COMMON_H */ diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 1d5c1b6b81a4..06224a03139e 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -929,37 +929,6 @@ void netdev_rss_key_fill(void *buffer, size_t len) } EXPORT_SYMBOL(netdev_rss_key_fill); -static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) -{ - u32 dev_size, current_max = 0; - u32 *indir; - int ret; - - if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->get_rxfh) - return -EOPNOTSUPP; - dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); - if (dev_size == 0) - return -EOPNOTSUPP; - - indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); - if (!indir) - return -ENOMEM; - - ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); - if (ret) - goto out; - - while (dev_size--) - current_max = max(current_max, indir[dev_size]); - - *max = current_max; - -out: - kfree(indir); - return ret; -} - static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index ac35513abeed..f61654b8f210 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -771,6 +771,11 @@ static const struct genl_ops ethtool_genl_ops[] = { .dumpit = ethnl_default_dumpit, .done = ethnl_default_done, }, + { + .cmd = ETHTOOL_MSG_CHANNELS_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_channels, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 53bfe720ff1a..45aad99a6021 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -349,5 +349,6 @@ int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info); int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info); int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ -- cgit v1.2.3 From 546379b9a01b6bdb5b267f8e7433944d5364cbf2 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:48 +0100 Subject: ethtool: add CHANNELS_NTF notification Send ETHTOOL_MSG_CHANNELS_NTF notification whenever channel counts of a network device are modified using ETHTOOL_MSG_CHANNELS_SET netlink message or ETHTOOL_SCHANNELS ioctl request. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 1 + include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/channels.c | 3 +++ net/ethtool/ioctl.c | 6 +++++- net/ethtool/netlink.c | 2 ++ 5 files changed, 12 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 7df7476cf310..31a601cafa3f 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -220,6 +220,7 @@ Kernel to userspace: ``ETHTOOL_MSG_RINGS_GET_REPLY`` ring sizes ``ETHTOOL_MSG_RINGS_NTF`` ring sizes ``ETHTOOL_MSG_CHANNELS_GET_REPLY`` channel counts + ``ETHTOOL_MSG_CHANNELS_NTF`` channel counts ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index f1384a8f3534..c7c7a1a550af 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -59,6 +59,7 @@ enum { ETHTOOL_MSG_RINGS_GET_REPLY, ETHTOOL_MSG_RINGS_NTF, ETHTOOL_MSG_CHANNELS_GET_REPLY, + ETHTOOL_MSG_CHANNELS_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index ee232c11acae..8dc5485333a4 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -213,6 +213,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) } ret = dev->ethtool_ops->set_channels(dev, &channels); + if (ret < 0) + goto out_ops; + ethtool_notify(dev, ETHTOOL_MSG_CHANNELS_NTF, NULL); out_ops: ethnl_ops_complete(dev); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 06224a03139e..258840b19fb5 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1649,6 +1649,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, u16 from_channel, to_channel; u32 max_rx_in_use = 0; unsigned int i; + int ret; if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels) return -EOPNOTSUPP; @@ -1680,7 +1681,10 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, if (xdp_get_umem_from_qid(dev, i)) return -EINVAL; - return dev->ethtool_ops->set_channels(dev, &channels); + ret = dev->ethtool_ops->set_channels(dev, &channels); + if (!ret) + ethtool_notify(dev, ETHTOOL_MSG_CHANNELS_NTF, NULL); + return ret; } static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index f61654b8f210..55c8ce4019d9 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -534,6 +534,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_FEATURES_NTF] = ðnl_features_request_ops, [ETHTOOL_MSG_PRIVFLAGS_NTF] = ðnl_privflags_request_ops, [ETHTOOL_MSG_RINGS_NTF] = ðnl_rings_request_ops, + [ETHTOOL_MSG_CHANNELS_NTF] = ðnl_channels_request_ops, }; /* default notification handler */ @@ -622,6 +623,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_FEATURES_NTF] = ethnl_default_notify, [ETHTOOL_MSG_PRIVFLAGS_NTF] = ethnl_default_notify, [ETHTOOL_MSG_RINGS_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_CHANNELS_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) -- cgit v1.2.3 From a8eceea84a3a3504e42f6495cf462027c5d19cb0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 12 Mar 2020 15:50:22 -0700 Subject: inet: Use fallthrough; Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe@perches.com/ And by hand: net/ipv6/ip6_fib.c has a fallthrough comment outside of an #ifdef block that causes gcc to emit a warning if converted in-place. So move the new fallthrough; inside the containing #ifdef/#endif too. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 4 ++-- net/ipv4/ah4.c | 2 +- net/ipv4/arp.c | 2 +- net/ipv4/devinet.c | 6 +++--- net/ipv4/fib_semantics.c | 4 ++-- net/ipv4/icmp.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/netfilter/nf_log_ipv4.c | 2 +- net/ipv4/netfilter/nf_nat_pptp.c | 4 ++-- net/ipv4/nexthop.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 6 +++--- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/udp.c | 2 +- net/ipv6/addrconf.c | 6 ++---- net/ipv6/ah6.c | 2 +- net/ipv6/exthdrs.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_fib.c | 8 ++++---- net/ipv6/ip6mr.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/netfilter/nf_log_ipv6.c | 2 +- net/ipv6/raw.c | 8 ++++---- net/ipv6/route.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 26 files changed, 41 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2fe295432c24..bd7b4e92e07f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -872,7 +872,7 @@ int inet_shutdown(struct socket *sock, int how) err = -ENOTCONN; /* Hack to wake up other listeners, who can poll for EPOLLHUP, even on eg. unconnected UDP sockets -- RR */ - /* fall through */ + fallthrough; default: sk->sk_shutdown |= how; if (sk->sk_prot->shutdown) @@ -886,7 +886,7 @@ int inet_shutdown(struct socket *sock, int how) case TCP_LISTEN: if (!(how & RCV_SHUTDOWN)) break; - /* fall through */ + fallthrough; case TCP_SYN_SENT: err = sk->sk_prot->disconnect(sk, O_NONBLOCK); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 974179b3b314..d99e1be94019 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -107,7 +107,7 @@ static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) if (optlen < 6) return -EINVAL; memcpy(daddr, optptr+optlen-4, 4); - /* Fall through */ + fallthrough; default: memset(optptr, 0, optlen); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 05eb42f347e8..687971d83b4e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1181,7 +1181,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSARP: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - /* fall through */ + fallthrough; case SIOCGARP: err = copy_from_user(&r, arg, sizeof(struct arpreq)); if (err) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e4632bd2026d..30fa42f5997d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1566,11 +1566,11 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, } } ip_mc_up(in_dev); - /* fall through */ + fallthrough; case NETDEV_CHANGEADDR: if (!IN_DEV_ARP_NOTIFY(in_dev)) break; - /* fall through */ + fallthrough; case NETDEV_NOTIFY_PEERS: /* Send gratuitous ARP to notify of link change */ inetdev_send_gratuitous_arp(dev, in_dev); @@ -1588,7 +1588,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (inetdev_valid_mtu(dev->mtu)) break; /* disable IP when MTU is not enough */ - /* fall through */ + fallthrough; case NETDEV_UNREGISTER: inetdev_destroy(in_dev); break; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a803cdd9400a..e4c62b8f57a8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1962,7 +1962,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) case NETDEV_DOWN: case NETDEV_UNREGISTER: nexthop_nh->fib_nh_flags |= RTNH_F_DEAD; - /* fall through */ + fallthrough; case NETDEV_CHANGE: nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN; break; @@ -1984,7 +1984,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) case NETDEV_DOWN: case NETDEV_UNREGISTER: fi->fib_flags |= RTNH_F_DEAD; - /* fall through */ + fallthrough; case NETDEV_CHANGE: fi->fib_flags |= RTNH_F_LINKDOWN; break; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f369e7ce685b..fc61f51d87a3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -865,7 +865,7 @@ static bool icmp_unreach(struct sk_buff *skb) case 3: if (!icmp_tag_validation(iph->protocol)) goto out; - /* fall through */ + fallthrough; case 0: info = ntohs(icmph->un.frag.mtu); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d84819893db9..aaaaf907e0d8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -333,7 +333,7 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk, switch (ret) { case NET_XMIT_CN: do_cn = true; - /* fall through */ + fallthrough; case NET_XMIT_SUCCESS: break; default: diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6e68def66822..9cf83cc85e4a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1465,7 +1465,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, case MRT_ADD_MFC: case MRT_DEL_MFC: parent = -1; - /* fall through */ + fallthrough; case MRT_ADD_MFC_PROXY: case MRT_DEL_MFC_PROXY: if (optlen != sizeof(mfc)) { diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 4b2d49cc9f1a..0c72156130b6 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -173,7 +173,7 @@ static void dump_ipv4_packet(struct net *net, struct nf_log_buf *m, case ICMP_REDIRECT: /* Max length: 24 "GATEWAY=255.255.255.255 " */ nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); - /* Fall through */ + fallthrough; case ICMP_DEST_UNREACH: case ICMP_SOURCE_QUENCH: case ICMP_TIME_EXCEEDED: diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index b2aeb7bf5dac..3c25a467b3ef 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -168,7 +168,7 @@ pptp_outbound_pkt(struct sk_buff *skb, pr_debug("unknown outbound packet 0x%04x:%s\n", msg, msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0]); - /* fall through */ + fallthrough; case PPTP_SET_LINK_INFO: /* only need to NAT in case PAC is behind NAT box */ case PPTP_START_SESSION_REQUEST: @@ -271,7 +271,7 @@ pptp_inbound_pkt(struct sk_buff *skb, pr_debug("unknown inbound packet %s\n", msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0]); - /* fall through */ + fallthrough; case PPTP_START_SESSION_REQUEST: case PPTP_START_SESSION_REPLY: case PPTP_STOP_SESSION_REQUEST: diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index d072c326dd64..fdfca534d094 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1327,7 +1327,7 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, case AF_UNSPEC: if (tb[NHA_GROUP]) break; - /* fallthrough */ + fallthrough; default: NL_SET_ERR_MSG(extack, "Invalid address family"); goto out; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b7134f76f840..5c57850fab4b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2251,7 +2251,7 @@ void tcp_set_state(struct sock *sk, int state) if (inet_csk(sk)->icsk_bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) inet_put_port(sk); - /* fall through */ + fallthrough; default: if (oldstate == TCP_ESTABLISHED) TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6b6b57000dad..bf4ced9273e8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2865,7 +2865,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, (*ack_flag & FLAG_LOST_RETRANS))) return; /* Change state if cwnd is undone or retransmits are lost */ - /* fall through */ + fallthrough; default: if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) @@ -6367,7 +6367,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) mptcp_incoming_options(sk, skb, &tp->rx_opt); break; } - /* fall through */ + fallthrough; case TCP_FIN_WAIT1: case TCP_FIN_WAIT2: /* RFC 793 says to queue data in these states, @@ -6382,7 +6382,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 1; } } - /* Fall through */ + fallthrough; case TCP_ESTABLISHED: tcp_data_queue(sk, skb); queued = 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 52acf0bc2ee5..83a5d24e13b8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2072,7 +2072,7 @@ do_time_wait: } } /* to ACK */ - /* fall through */ + fallthrough; case TCP_TW_ACK: tcp_v4_timewait_ack(sk, skb); break; @@ -2368,7 +2368,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq) break; st->bucket = 0; st->state = TCP_SEQ_STATE_ESTABLISHED; - /* Fallthrough */ + fallthrough; case TCP_SEQ_STATE_ESTABLISHED: if (st->bucket > tcp_hashinfo.ehash_mask) break; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a68e2ac37f26..2633fc231593 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2563,7 +2563,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case UDP_ENCAP_ESPINUDP_NON_IKE: up->encap_rcv = xfrm4_udp_encap_rcv; #endif - /* FALLTHROUGH */ + fallthrough; case UDP_ENCAP_L2TPINUDP: up->encap_type = val; lock_sock(sk); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cb493e15959c..c614249dfb7d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3299,7 +3299,7 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) switch (idev->cnf.addr_gen_mode) { case IN6_ADDR_GEN_MODE_RANDOM: ipv6_gen_mode_random_init(idev); - /* fallthrough */ + fallthrough; case IN6_ADDR_GEN_MODE_STABLE_PRIVACY: if (!ipv6_generate_stable_address(&addr, 0, idev)) addrconf_add_linklocal(idev, &addr, @@ -3517,9 +3517,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; run_pending = 1; - - /* fall through */ - + fallthrough; case NETDEV_UP: case NETDEV_CHANGE: if (dev->flags & IFF_SLAVE) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 871d6e52ec67..45e2adc56610 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -259,7 +259,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) case NEXTHDR_DEST: if (dir == XFRM_POLICY_OUT) ipv6_rearrange_destopt(iph, exthdr.opth); - /* fall through */ + fallthrough; case NEXTHDR_HOP: if (!zero_out_mutable_opts(exthdr.opth)) { net_dbg_ratelimited("overrun %sopts\n", diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index ab5add0fe6b4..bcb9f5e62808 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -97,7 +97,7 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff, */ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) break; - /* fall through */ + fallthrough; case 2: /* send ICMP PARM PROB regardless and drop packet */ icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff); return false; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ef408a5090a2..2688f3e82165 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -898,7 +898,7 @@ static int icmpv6_rcv(struct sk_buff *skb) hdr = icmp6_hdr(skb); /* to notify */ - /* fall through */ + fallthrough; case ICMPV6_DEST_UNREACH: case ICMPV6_TIME_EXCEED: case ICMPV6_PARAMPROB: diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 72abf892302f..46ed56719476 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2068,8 +2068,8 @@ static int fib6_walk_continue(struct fib6_walker *w) continue; } w->state = FWS_L; + fallthrough; #endif - /* fall through */ case FWS_L: left = rcu_dereference_protected(fn->left, 1); if (left) { @@ -2078,7 +2078,7 @@ static int fib6_walk_continue(struct fib6_walker *w) continue; } w->state = FWS_R; - /* fall through */ + fallthrough; case FWS_R: right = rcu_dereference_protected(fn->right, 1); if (right) { @@ -2088,7 +2088,7 @@ static int fib6_walk_continue(struct fib6_walker *w) } w->state = FWS_C; w->leaf = rcu_dereference_protected(fn->leaf, 1); - /* fall through */ + fallthrough; case FWS_C: if (w->leaf && fn->fn_flags & RTN_RTINFO) { int err; @@ -2107,7 +2107,7 @@ static int fib6_walk_continue(struct fib6_walker *w) } skip: w->state = FWS_U; - /* fall through */ + fallthrough; case FWS_U: if (fn == w->root) return 0; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d6483926f449..65a54d74acc1 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1691,7 +1691,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns case MRT6_ADD_MFC: case MRT6_DEL_MFC: parent = -1; - /* fall through */ + fallthrough; case MRT6_ADD_MFC_PROXY: case MRT6_DEL_MFC_PROXY: if (optlen < sizeof(mfc)) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 53caf59c591e..4a3feccd5b10 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1782,7 +1782,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); fib6_run_gc(0, net, false); - /* fallthrough */ + fallthrough; case NETDEV_UP: idev = in6_dev_get(dev); if (!idev) diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 22b80db6d882..da64550a5707 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -248,7 +248,7 @@ static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m, /* Max length: 17 "POINTER=ffffffff " */ nf_log_buf_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer)); - /* Fall through */ + fallthrough; case ICMPV6_DEST_UNREACH: case ICMPV6_PKT_TOOBIG: case ICMPV6_TIME_EXCEED: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index dfe5e603ffe1..0028aa1d7869 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1076,7 +1076,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; - /* fall through */ + fallthrough; default: return ipv6_setsockopt(sk, level, optname, optval, optlen); } @@ -1099,7 +1099,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; - /* fall through */ + fallthrough; default: return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -1161,7 +1161,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; - /* fall through */ + fallthrough; default: return ipv6_getsockopt(sk, level, optname, optval, optlen); } @@ -1184,7 +1184,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; - /* fall through */ + fallthrough; default: return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2931224b674e..2430c2f6819a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4370,7 +4370,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); break; } - /* FALLTHROUGH */ + fallthrough; case IPSTATS_MIB_OUTNOROUTES: IP6_INC_STATS(net, idev, ipstats_mib_noroutes); break; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index eaf09e6b7844..413b3425ac66 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1742,7 +1742,7 @@ do_time_wait: } } /* to ACK */ - /* fall through */ + fallthrough; case TCP_TW_ACK: tcp_v6_timewait_ack(sk, skb); break; -- cgit v1.2.3 From d831ee84bfc9173eecf30dbbc2553ae81b996c60 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 6 Mar 2020 08:59:23 +0000 Subject: bpf: Add bpf_xdp_output() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce new helper that reuses existing xdp perf_event output implementation, but can be called from raw_tracepoint programs that receive 'struct xdp_buff *' as a tracepoint argument. Signed-off-by: Eelco Chaudron Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/158348514556.2239.11050972434793741444.stgit@xdp-tutorial --- include/uapi/linux/bpf.h | 26 ++++++++++- kernel/bpf/verifier.c | 4 +- kernel/trace/bpf_trace.c | 3 ++ net/core/filter.c | 16 ++++++- tools/include/uapi/linux/bpf.h | 26 ++++++++++- .../testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c | 53 ++++++++++++++++++++++ .../testing/selftests/bpf/progs/test_xdp_bpf2bpf.c | 24 ++++++++++ 7 files changed, 148 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 15b239da775b..5d01c5c7e598 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2927,6 +2927,29 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct xdp_buff. + * + * This helper is similar to **bpf_perf_eventoutput**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3049,7 +3072,8 @@ union bpf_attr { FN(send_signal_thread), \ FN(jiffies64), \ FN(read_branch_records), \ - FN(get_ns_current_pid_tgid), + FN(get_ns_current_pid_tgid), \ + FN(xdp_output), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 55d376c53f7d..745f3cfdf3b2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3650,7 +3650,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (func_id != BPF_FUNC_perf_event_read && func_id != BPF_FUNC_perf_event_output && func_id != BPF_FUNC_skb_output && - func_id != BPF_FUNC_perf_event_read_value) + func_id != BPF_FUNC_perf_event_read_value && + func_id != BPF_FUNC_xdp_output) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -3740,6 +3741,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_perf_event_output: case BPF_FUNC_perf_event_read_value: case BPF_FUNC_skb_output: + case BPF_FUNC_xdp_output: if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) goto error; break; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b5071c7e93ca..e619eedb5919 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1145,6 +1145,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { }; extern const struct bpf_func_proto bpf_skb_output_proto; +extern const struct bpf_func_proto bpf_xdp_output_proto; BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags) @@ -1220,6 +1221,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #ifdef CONFIG_NET case BPF_FUNC_skb_output: return &bpf_skb_output_proto; + case BPF_FUNC_xdp_output: + return &bpf_xdp_output_proto; #endif default: return raw_tp_prog_func_proto(func_id, prog); diff --git a/net/core/filter.c b/net/core/filter.c index cd0a532db4e7..22219544410f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4061,7 +4061,8 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; - if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data))) + if (unlikely(!xdp || + xdp_size > (unsigned long)(xdp->data_end - xdp->data))) return -EFAULT; return bpf_event_output(map, flags, meta, meta_size, xdp->data, @@ -4079,6 +4080,19 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; +static int bpf_xdp_output_btf_ids[5]; +const struct bpf_func_proto bpf_xdp_output_proto = { + .func = bpf_xdp_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + .btf_id = bpf_xdp_output_btf_ids, +}; + BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) { return skb->sk ? sock_gen_cookie(skb->sk) : 0; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 15b239da775b..5d01c5c7e598 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2927,6 +2927,29 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct xdp_buff. + * + * This helper is similar to **bpf_perf_eventoutput**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3049,7 +3072,8 @@ union bpf_attr { FN(send_signal_thread), \ FN(jiffies64), \ FN(read_branch_records), \ - FN(get_ns_current_pid_tgid), + FN(get_ns_current_pid_tgid), \ + FN(xdp_output), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index 4ba011031d4c..a0f688c37023 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -4,17 +4,51 @@ #include "test_xdp.skel.h" #include "test_xdp_bpf2bpf.skel.h" +struct meta { + int ifindex; + int pkt_len; +}; + +static void on_sample(void *ctx, int cpu, void *data, __u32 size) +{ + int duration = 0; + struct meta *meta = (struct meta *)data; + struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta); + + if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta), + "check_size", "size %u < %zu\n", + size, sizeof(pkt_v4) + sizeof(*meta))) + return; + + if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex", + "meta->ifindex = %d\n", meta->ifindex)) + return; + + if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len", + "meta->pkt_len = %zd\n", sizeof(pkt_v4))) + return; + + if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), + "check_packet_content", "content not the same\n")) + return; + + *(bool *)ctx = true; +} + void test_xdp_bpf2bpf(void) { __u32 duration = 0, retval, size; char buf[128]; int err, pkt_fd, map_fd; + bool passed = false; struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); struct iptnl_info value4 = {.family = AF_INET}; struct test_xdp *pkt_skel = NULL; struct test_xdp_bpf2bpf *ftrace_skel = NULL; struct vip key4 = {.protocol = 6, .family = AF_INET}; struct bpf_program *prog; + struct perf_buffer *pb = NULL; + struct perf_buffer_opts pb_opts = {}; /* Load XDP program to introspect */ pkt_skel = test_xdp__open_and_load(); @@ -50,6 +84,14 @@ void test_xdp_bpf2bpf(void) if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err)) goto out; + /* Set up perf buffer */ + pb_opts.sample_cb = on_sample; + pb_opts.ctx = &passed; + pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), + 1, &pb_opts); + if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + goto out; + /* Run test program */ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); @@ -60,6 +102,15 @@ void test_xdp_bpf2bpf(void) err, errno, retval, size)) goto out; + /* Make sure bpf_xdp_output() was triggered and it sent the expected + * data to the perf ring buffer. + */ + err = perf_buffer__poll(pb, 100); + if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) + goto out; + + CHECK_FAIL(!passed); + /* Verify test results */ if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"), "result", "fentry failed err %llu\n", @@ -70,6 +121,8 @@ void test_xdp_bpf2bpf(void) "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit); out: + if (pb) + perf_buffer__free(pb); test_xdp__destroy(pkt_skel); test_xdp_bpf2bpf__destroy(ftrace_skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index 42dd2fedd588..a038e827f850 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -3,6 +3,8 @@ #include #include +char _license[] SEC("license") = "GPL"; + struct net_device { /* Structure does not need to contain all entries, * as "preserve_access_index" will use BTF to fix this... @@ -27,10 +29,32 @@ struct xdp_buff { struct xdp_rxq_info *rxq; } __attribute__((preserve_access_index)); +struct meta { + int ifindex; + int pkt_len; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} perf_buf_map SEC(".maps"); + __u64 test_result_fentry = 0; SEC("fentry/FUNC") int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) { + struct meta meta; + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + + meta.ifindex = xdp->rxq->dev->ifindex; + meta.pkt_len = data_end - data; + bpf_xdp_output(xdp, &perf_buf_map, + ((__u64) meta.pkt_len << 32) | + BPF_F_CURRENT_CPU, + &meta, sizeof(meta)); + test_result_fentry = xdp->rxq->dev->ifindex; return 0; } -- cgit v1.2.3 From 5ec82c49a21a3e50b0011d8c85c119e16c6413a0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 13 Mar 2020 11:25:34 +0000 Subject: ethtool: fix spelling mistake "exceeeds" -> "exceeds" There are a couple of spelling mistakes in NL_SET_ERR_MSG_ATTR messages. Fix these. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/ethtool/channels.c | 2 +- net/ethtool/rings.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 8dc5485333a4..389924b65d05 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -189,7 +189,7 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) if (err_attr) { ret = -EINVAL; NL_SET_ERR_MSG_ATTR(info->extack, err_attr, - "requested channel count exceeeds maximum"); + "requested channel count exceeds maximum"); goto out_ops; } diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index c2ebf72be217..5422526f4eef 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -181,7 +181,7 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) if (err_attr) { ret = -EINVAL; NL_SET_ERR_MSG_ATTR(info->extack, err_attr, - "requested ring size exceeeds maximum"); + "requested ring size exceeds maximum"); goto out_ops; } -- cgit v1.2.3 From 6a64037d4bf252bb8cf13917320c8e001da8997a Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Thu, 12 Mar 2020 20:55:57 +0100 Subject: bpf: Add bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER, so all the dispatchers have the common name prefix. And also a small '_' cleanup for bpf_dispatcher_nopfunc function name. Signed-off-by: Björn Töpel Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200312195610.346362-3-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 21 +++++++++++---------- include/linux/filter.h | 9 ++++----- net/core/filter.c | 5 ++--- 3 files changed, 17 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c2f815e9f7d0..fe1f8b075378 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -522,7 +522,7 @@ struct bpf_dispatcher { u32 image_off; }; -static __always_inline unsigned int bpf_dispatcher_nopfunc( +static __always_inline unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, unsigned int (*bpf_func)(const void *, @@ -537,7 +537,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); #define BPF_DISPATCHER_INIT(name) { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ - .func = &name##func, \ + .func = &name##_func, \ .progs = {}, \ .num_progs = 0, \ .image = NULL, \ @@ -545,7 +545,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr); } #define DEFINE_BPF_DISPATCHER(name) \ - noinline unsigned int name##func( \ + noinline unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ unsigned int (*bpf_func)(const void *, \ @@ -553,17 +553,18 @@ void bpf_trampoline_put(struct bpf_trampoline *tr); { \ return bpf_func(ctx, insnsi); \ } \ - EXPORT_SYMBOL(name##func); \ - struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name); + EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ + struct bpf_dispatcher bpf_dispatcher_##name = \ + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); #define DECLARE_BPF_DISPATCHER(name) \ - unsigned int name##func( \ + unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ unsigned int (*bpf_func)(const void *, \ const struct bpf_insn *)); \ - extern struct bpf_dispatcher name; -#define BPF_DISPATCHER_FUNC(name) name##func -#define BPF_DISPATCHER_PTR(name) (&name) + extern struct bpf_dispatcher bpf_dispatcher_##name; +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func +#define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); struct bpf_image { @@ -589,7 +590,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) #define DECLARE_BPF_DISPATCHER(name) -#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nop_func #define BPF_DISPATCHER_PTR(name) NULL static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, diff --git a/include/linux/filter.h b/include/linux/filter.h index 43b5e455d2f5..6249679275b3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -577,7 +577,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); ret; }) #define BPF_PROG_RUN(prog, ctx) \ - __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc) + __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func) /* * Use in preemptible and therefore migratable context to make sure that @@ -596,7 +596,7 @@ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, u32 ret; migrate_disable(); - ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc); + ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func); migrate_enable(); return ret; } @@ -722,7 +722,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, return res; } -DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp) +DECLARE_BPF_DISPATCHER(xdp) static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) @@ -733,8 +733,7 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, * already takes rcu_read_lock() when fetching the program, so * it's not necessary here anymore. */ - return __BPF_PROG_RUN(prog, xdp, - BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp)); + return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); } void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); diff --git a/net/core/filter.c b/net/core/filter.c index 22219544410f..96350a743539 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8859,10 +8859,9 @@ const struct bpf_prog_ops sk_reuseport_prog_ops = { }; #endif /* CONFIG_INET */ -DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp) +DEFINE_BPF_DISPATCHER(xdp) void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) { - bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp), - prev_prog, prog); + bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); } -- cgit v1.2.3 From 965995b7d7bef00c7952460fcf835ab8feb747ff Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 14 Mar 2020 18:06:06 +0800 Subject: Bluetooth: L2CAP: remove set but not used variable 'credits' net/bluetooth/l2cap_core.c: In function l2cap_ecred_conn_req: net/bluetooth/l2cap_core.c:5848:6: warning: variable credits set but not used [-Wunused-but-set-variable] commit 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode") involved this unused variable, remove it. Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5e6e35ab44dd..8b0fca39989d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5845,7 +5845,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, __le16 dcid[5]; } __packed pdu; struct l2cap_chan *chan, *pchan; - u16 credits, mtu, mps; + u16 mtu, mps; __le16 psm; u8 result, len = 0; int i, num_scid; @@ -5868,7 +5868,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, } psm = req->psm; - credits = 0; BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); -- cgit v1.2.3 From 14bc175d9c885c86239de3d730eea85ad67bfe7b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 13 Mar 2020 01:10:56 +0200 Subject: net: sched: Allow extending set of supported RED flags The qdiscs RED, GRED, SFQ and CHOKE use different subsets of the same pool of global RED flags. These are passed in tc_red_qopt.flags. However none of these qdiscs validate the flag field, and just copy it over wholesale to internal structures, and later dump it back. (An exception is GRED, which does validate for VQs -- however not for the main setup.) A broken userspace can therefore configure a qdisc with arbitrary unsupported flags, and later expect to see the flags on qdisc dump. The current ABI therefore allows storage of several bits of custom data to qdisc instances of the types mentioned above. How many bits, depends on which flags are meaningful for the qdisc in question. E.g. SFQ recognizes flags ECN and HARDDROP, and the rest is not interpreted. If SFQ ever needs to support ADAPTATIVE, it needs another way of doing it, and at the same time it needs to retain the possibility to store 6 bits of uninterpreted data. Likewise RED, which adds a new flag later in this patchset. To that end, this patch adds a new function, red_get_flags(), to split the passed flags of RED-like qdiscs to flags and user bits, and red_validate_flags() to validate the resulting configuration. It further adds a new attribute, TCA_RED_FLAGS, to pass arbitrary flags. Signed-off-by: Petr Machata Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/red.h | 33 ++++++++++++++++++++++++++++++++ include/uapi/linux/pkt_sched.h | 16 ++++++++++++++++ net/sched/sch_red.c | 43 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 89 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/red.h b/include/net/red.h index 9665582c4687..6a2aaa6c7c41 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -179,6 +179,39 @@ static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog) return true; } +static inline int red_get_flags(unsigned char qopt_flags, + unsigned char historic_mask, + struct nlattr *flags_attr, + unsigned char supported_mask, + struct nla_bitfield32 *p_flags, + unsigned char *p_userbits, + struct netlink_ext_ack *extack) +{ + struct nla_bitfield32 flags; + + if (qopt_flags && flags_attr) { + NL_SET_ERR_MSG_MOD(extack, "flags should be passed either through qopt, or through a dedicated attribute"); + return -EINVAL; + } + + if (flags_attr) { + flags = nla_get_bitfield32(flags_attr); + } else { + flags.selector = historic_mask; + flags.value = qopt_flags & historic_mask; + } + + *p_flags = flags; + *p_userbits = qopt_flags & ~historic_mask; + return 0; +} + +static inline int red_validate_flags(unsigned char flags, + struct netlink_ext_ack *extack) +{ + return 0; +} + static inline void red_set_parms(struct red_parms *p, u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog, u8 Scell_log, u8 *stab, u32 max_P) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index bbe791b24168..6325507935ea 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -256,6 +256,7 @@ enum { TCA_RED_PARMS, TCA_RED_STAB, TCA_RED_MAX_P, + TCA_RED_FLAGS, /* bitfield32 */ __TCA_RED_MAX, }; @@ -268,12 +269,27 @@ struct tc_red_qopt { unsigned char Wlog; /* log(W) */ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ unsigned char Scell_log; /* cell size for idle damping */ + + /* This field can be used for flags that a RED-like qdisc has + * historically supported. E.g. when configuring RED, it can be used for + * ECN, HARDDROP and ADAPTATIVE. For SFQ it can be used for ECN, + * HARDDROP. Etc. Because this field has not been validated, and is + * copied back on dump, any bits besides those to which a given qdisc + * has assigned a historical meaning need to be considered for free use + * by userspace tools. + * + * Any further flags need to be passed differently, e.g. through an + * attribute (such as TCA_RED_FLAGS above). Such attribute should allow + * passing both recent and historic flags in one value. + */ unsigned char flags; #define TC_RED_ECN 1 #define TC_RED_HARDDROP 2 #define TC_RED_ADAPTATIVE 4 }; +#define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE) + struct tc_red_xstats { __u32 early; /* Early drops */ __u32 pdrop; /* Drops due to queue limits */ diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 1695421333e3..d4ce111704dc 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -35,7 +35,11 @@ struct red_sched_data { u32 limit; /* HARD maximal queue length */ + unsigned char flags; + /* Non-flags in tc_red_qopt.flags. */ + unsigned char userbits; + struct timer_list adapt_timer; struct Qdisc *sch; struct red_parms parms; @@ -44,6 +48,8 @@ struct red_sched_data { struct Qdisc *qdisc; }; +static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS; + static inline int red_use_ecn(struct red_sched_data *q) { return q->flags & TC_RED_ECN; @@ -183,9 +189,12 @@ static void red_destroy(struct Qdisc *sch) } static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { + [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS }, [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, [TCA_RED_MAX_P] = { .type = NLA_U32 }, + [TCA_RED_FLAGS] = { .type = NLA_BITFIELD32, + .validation_data = &red_supported_flags }, }; static int red_change(struct Qdisc *sch, struct nlattr *opt, @@ -194,7 +203,10 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, struct Qdisc *old_child = NULL, *child = NULL; struct red_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_RED_MAX + 1]; + struct nla_bitfield32 flags_bf; struct tc_red_qopt *ctl; + unsigned char userbits; + unsigned char flags; int err; u32 max_P; @@ -216,6 +228,12 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) return -EINVAL; + err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS, + tb[TCA_RED_FLAGS], red_supported_flags, + &flags_bf, &userbits, extack); + if (err) + return err; + if (ctl->limit > 0) { child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit, extack); @@ -227,7 +245,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, } sch_tree_lock(sch); - q->flags = ctl->flags; + + flags = (q->flags & ~flags_bf.selector) | flags_bf.value; + err = red_validate_flags(flags, extack); + if (err) + goto unlock_out; + + q->flags = flags; + q->userbits = userbits; q->limit = ctl->limit; if (child) { qdisc_tree_flush_backlog(q->qdisc); @@ -256,6 +281,12 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (old_child) qdisc_put(old_child); return 0; + +unlock_out: + sch_tree_unlock(sch); + if (child) + qdisc_put(child); + return err; } static inline void red_adaptative_timer(struct timer_list *t) @@ -299,10 +330,15 @@ static int red_dump_offload_stats(struct Qdisc *sch) static int red_dump(struct Qdisc *sch, struct sk_buff *skb) { struct red_sched_data *q = qdisc_priv(sch); + struct nla_bitfield32 flags_bf = { + .selector = red_supported_flags, + .value = q->flags, + }; struct nlattr *opts = NULL; struct tc_red_qopt opt = { .limit = q->limit, - .flags = q->flags, + .flags = (q->flags & TC_RED_HISTORIC_FLAGS) | + q->userbits, .qth_min = q->parms.qth_min >> q->parms.Wlog, .qth_max = q->parms.qth_max >> q->parms.Wlog, .Wlog = q->parms.Wlog, @@ -319,7 +355,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || - nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P)) + nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) || + nla_put(skb, TCA_RED_FLAGS, sizeof(flags_bf), &flags_bf)) goto nla_put_failure; return nla_nest_end(skb, opts); -- cgit v1.2.3 From 0a7fad2376ba6b37c6b1a1072ed2a2381d82cd18 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 13 Mar 2020 01:10:57 +0200 Subject: net: sched: RED: Introduce an ECN nodrop mode When the RED Qdisc is currently configured to enable ECN, the RED algorithm is used to decide whether a certain SKB should be marked. If that SKB is not ECN-capable, it is early-dropped. It is also possible to keep all traffic in the queue, and just mark the ECN-capable subset of it, as appropriate under the RED algorithm. Some switches support this mode, and some installations make use of it. To that end, add a new RED flag, TC_RED_NODROP. When the Qdisc is configured with this flag, non-ECT traffic is enqueued instead of being early-dropped. Signed-off-by: Petr Machata Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 + include/net/red.h | 5 +++++ include/uapi/linux/pkt_sched.h | 1 + net/sched/sch_red.c | 31 +++++++++++++++++++++++++------ 4 files changed, 32 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index dbc89452f90b..1db8b27d4515 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -740,6 +740,7 @@ struct tc_red_qopt_offload_params { u32 limit; bool is_ecn; bool is_harddrop; + bool is_nodrop; struct gnet_stats_queue *qstats; }; diff --git a/include/net/red.h b/include/net/red.h index 6a2aaa6c7c41..fc455445f4b2 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -209,6 +209,11 @@ static inline int red_get_flags(unsigned char qopt_flags, static inline int red_validate_flags(unsigned char flags, struct netlink_ext_ack *extack) { + if ((flags & TC_RED_NODROP) && !(flags & TC_RED_ECN)) { + NL_SET_ERR_MSG_MOD(extack, "nodrop mode is only meaningful with ECN"); + return -EINVAL; + } + return 0; } diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 6325507935ea..ea39287d59c8 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -286,6 +286,7 @@ struct tc_red_qopt { #define TC_RED_ECN 1 #define TC_RED_HARDDROP 2 #define TC_RED_ADAPTATIVE 4 +#define TC_RED_NODROP 8 }; #define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index d4ce111704dc..3ef0a4f7399b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -48,7 +48,7 @@ struct red_sched_data { struct Qdisc *qdisc; }; -static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS; +static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP; static inline int red_use_ecn(struct red_sched_data *q) { @@ -60,6 +60,11 @@ static inline int red_use_harddrop(struct red_sched_data *q) return q->flags & TC_RED_HARDDROP; } +static int red_use_nodrop(struct red_sched_data *q) +{ + return q->flags & TC_RED_NODROP; +} + static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -80,23 +85,36 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, case RED_PROB_MARK: qdisc_qstats_overlimit(sch); - if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { + if (!red_use_ecn(q)) { q->stats.prob_drop++; goto congestion_drop; } - q->stats.prob_mark++; + if (INET_ECN_set_ce(skb)) { + q->stats.prob_mark++; + } else if (!red_use_nodrop(q)) { + q->stats.prob_drop++; + goto congestion_drop; + } + + /* Non-ECT packet in ECN nodrop mode: queue it. */ break; case RED_HARD_MARK: qdisc_qstats_overlimit(sch); - if (red_use_harddrop(q) || !red_use_ecn(q) || - !INET_ECN_set_ce(skb)) { + if (red_use_harddrop(q) || !red_use_ecn(q)) { + q->stats.forced_drop++; + goto congestion_drop; + } + + if (INET_ECN_set_ce(skb)) { + q->stats.forced_mark++; + } else if (!red_use_nodrop(q)) { q->stats.forced_drop++; goto congestion_drop; } - q->stats.forced_mark++; + /* Non-ECT packet in ECN nodrop mode: queue it. */ break; } @@ -171,6 +189,7 @@ static int red_offload(struct Qdisc *sch, bool enable) opt.set.limit = q->limit; opt.set.is_ecn = red_use_ecn(q); opt.set.is_harddrop = red_use_harddrop(q); + opt.set.is_nodrop = red_use_nodrop(q); opt.set.qstats = &sch->qstats; } else { opt.command = TC_RED_DESTROY; -- cgit v1.2.3 From e228c5c0882e809e4fb3eafd07ec25ff50f65ac5 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 13 Mar 2020 10:18:02 +0700 Subject: tipc: simplify trivial boolean return Checking and returning 'true' boolean is useless as it will be returning at end of function Signed-off-by: Hoang Le Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 0d515d20b056..4d0e0bdd997b 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -736,9 +736,6 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) msg_set_destport(msg, dport); *err = TIPC_OK; - if (!skb_cloned(skb)) - return true; - return true; } -- cgit v1.2.3 From 746a1eda682cb7b5cd22f1b437da2121af64fbfe Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 13 Mar 2020 10:18:03 +0700 Subject: tipc: add NULL pointer check to prevent kernel oops Calling: tipc_node_link_down()-> - tipc_node_write_unlock()->tipc_mon_peer_down() - tipc_mon_peer_down() just after disabling bearer could be caused kernel oops. Fix this by adding a sanity check to make sure valid memory access. Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/monitor.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 58708b4c7719..6dce2abf436e 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -322,9 +322,13 @@ static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head) void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *self; struct tipc_peer *peer, *prev, *head; + if (!mon) + return; + + self = get_self(net, bearer_id); write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer) @@ -407,11 +411,15 @@ exit: void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *self; struct tipc_peer *peer, *head; struct tipc_mon_domain *dom; int applied; + if (!mon) + return; + + self = get_self(net, bearer_id); write_lock_bh(&mon->lock); peer = get_peer(mon, addr); if (!peer) { -- cgit v1.2.3 From 58b09919626bf9067345289212ec030c61eb1034 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 13 Mar 2020 16:52:41 +0100 Subject: mptcp: create msk early This change moves the mptcp socket allocation from mptcp_accept() to subflow_syn_recv_sock(), so that subflow->conn is now always set for the non fallback scenario. It allows cleaning up a bit mptcp_accept() reducing the additional locking and will allow fourther cleanup in the next patch. Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 83 ++++++++++++++++++++++++++++------------------------ net/mptcp/protocol.h | 4 +-- net/mptcp/subflow.c | 32 +++++++++++++------- net/mptcp/token.c | 31 ++------------------ 4 files changed, 70 insertions(+), 80 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c0cef07f4382..04c3caed92df 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -820,9 +820,12 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) } #endif -static struct sock *mptcp_sk_clone_lock(const struct sock *sk) +struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) { + struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); + struct mptcp_sock *msk; + u64 ack_seq; if (!nsk) return NULL; @@ -832,6 +835,36 @@ static struct sock *mptcp_sk_clone_lock(const struct sock *sk) inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); #endif + __mptcp_init_sock(nsk); + + msk = mptcp_sk(nsk); + msk->local_key = subflow_req->local_key; + msk->token = subflow_req->token; + msk->subflow = NULL; + + if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) { + bh_unlock_sock(nsk); + + /* we can't call into mptcp_close() here - possible BH context + * free the sock directly + */ + nsk->sk_prot->destroy(nsk); + sk_free(nsk); + return NULL; + } + + msk->write_seq = subflow_req->idsn + 1; + if (subflow_req->remote_key_valid) { + msk->can_ack = true; + msk->remote_key = subflow_req->remote_key; + mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); + ack_seq++; + msk->ack_seq = ack_seq; + } + bh_unlock_sock(nsk); + + /* keep a single reference */ + __sock_put(nsk); return nsk; } @@ -859,40 +892,26 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, struct mptcp_subflow_context *subflow; struct sock *new_mptcp_sock; struct sock *ssk = newsk; - u64 ack_seq; subflow = mptcp_subflow_ctx(newsk); - lock_sock(sk); + new_mptcp_sock = subflow->conn; - local_bh_disable(); - new_mptcp_sock = mptcp_sk_clone_lock(sk); - if (!new_mptcp_sock) { - *err = -ENOBUFS; - local_bh_enable(); - release_sock(sk); - mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1, 0, 0); - tcp_close(newsk, 0); - return NULL; + /* is_mptcp should be false if subflow->conn is missing, see + * subflow_syn_recv_sock() + */ + if (WARN_ON_ONCE(!new_mptcp_sock)) { + tcp_sk(newsk)->is_mptcp = 0; + return newsk; } - __mptcp_init_sock(new_mptcp_sock); + /* acquire the 2nd reference for the owning socket */ + sock_hold(new_mptcp_sock); + local_bh_disable(); + bh_lock_sock(new_mptcp_sock); msk = mptcp_sk(new_mptcp_sock); - msk->local_key = subflow->local_key; - msk->token = subflow->token; - msk->subflow = NULL; msk->first = newsk; - mptcp_token_update_accept(newsk, new_mptcp_sock); - - msk->write_seq = subflow->idsn + 1; - if (subflow->can_ack) { - msk->can_ack = true; - msk->remote_key = subflow->remote_key; - mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); - ack_seq++; - msk->ack_seq = ack_seq; - } newsk = new_mptcp_sock; mptcp_copy_inaddrs(newsk, ssk); list_add(&subflow->node, &msk->conn_list); @@ -903,18 +922,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, inet_sk_state_store(new_mptcp_sock, TCP_SYN_RECV); bh_unlock_sock(new_mptcp_sock); local_bh_enable(); - release_sock(sk); - - /* the subflow can already receive packet, avoid racing with - * the receive path and process the pending ones - */ - lock_sock(ssk); - subflow->rel_write_seq = 1; - subflow->tcp_sock = ssk; - subflow->conn = new_mptcp_sock; - if (unlikely(!skb_queue_empty(&ssk->sk_receive_queue))) - mptcp_subflow_data_available(ssk); - release_sock(ssk); } return newsk; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 313558fa8185..9baf6fcba914 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -193,6 +193,7 @@ void mptcp_proto_init(void); int mptcp_proto_v6_init(void); #endif +struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx); @@ -202,8 +203,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk); int mptcp_token_new_request(struct request_sock *req); void mptcp_token_destroy_request(u32 token); int mptcp_token_new_connect(struct sock *sk); -int mptcp_token_new_accept(u32 token); -void mptcp_token_update_accept(struct sock *sk, struct sock *conn); +int mptcp_token_new_accept(u32 token, struct sock *conn); void mptcp_token_destroy(u32 token); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 0de2a44bdaa0..047b088e4617 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -182,6 +182,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; struct tcp_options_received opt_rx; + struct sock *new_msk = NULL; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -197,7 +198,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * out-of-order pkt, which will not carry the MP_CAPABLE * opt even on mptcp enabled paths */ - goto create_child; + goto create_msk; } opt_rx.mptcp.mp_capable = 0; @@ -207,7 +208,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, subflow_req->remote_key_valid = 1; } else { subflow_req->mp_capable = 0; + goto create_child; } + +create_msk: + new_msk = mptcp_sk_clone(listener->conn, req); + if (!new_msk) + subflow_req->mp_capable = 0; } create_child: @@ -221,22 +228,22 @@ create_child: * handshake */ if (!ctx) - return child; + goto out; if (ctx->mp_capable) { - if (mptcp_token_new_accept(ctx->token)) - goto close_child; + /* new mpc subflow takes ownership of the newly + * created mptcp socket + */ + ctx->conn = new_msk; + new_msk = NULL; } } +out: + /* dispose of the left over mptcp master, if any */ + if (unlikely(new_msk)) + sock_put(new_msk); return child; - -close_child: - pr_debug("closing child socket"); - tcp_send_active_reset(child, GFP_ATOMIC); - inet_csk_prepare_forced_close(child); - tcp_done(child); - return NULL; } static struct inet_connection_sock_af_ops subflow_specific; @@ -793,6 +800,9 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->tcp_data_ready = old_ctx->tcp_data_ready; new_ctx->tcp_state_change = old_ctx->tcp_state_change; new_ctx->tcp_write_space = old_ctx->tcp_write_space; + new_ctx->rel_write_seq = 1; + new_ctx->tcp_sock = newsk; + new_ctx->mp_capable = 1; new_ctx->fourth_ack = subflow_req->remote_key_valid; new_ctx->can_ack = subflow_req->remote_key_valid; diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 84d887806090..b71b53c0ac8d 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -128,45 +128,18 @@ int mptcp_token_new_connect(struct sock *sk) * * Called when a SYN packet creates a new logical connection, i.e. * is not a join request. - * - * We don't have an mptcp socket yet at that point. - * This is paired with mptcp_token_update_accept, called on accept(). */ -int mptcp_token_new_accept(u32 token) +int mptcp_token_new_accept(u32 token, struct sock *conn) { int err; spin_lock_bh(&token_tree_lock); - err = radix_tree_insert(&token_tree, token, &token_used); + err = radix_tree_insert(&token_tree, token, conn); spin_unlock_bh(&token_tree_lock); return err; } -/** - * mptcp_token_update_accept - update token to map to mptcp socket - * @conn: the new struct mptcp_sock - * @sk: the initial subflow for this mptcp socket - * - * Called when the first mptcp socket is created on accept to - * refresh the dummy mapping (done to reserve the token) with - * the mptcp_socket structure that wasn't allocated before. - */ -void mptcp_token_update_accept(struct sock *sk, struct sock *conn) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - void __rcu **slot; - - spin_lock_bh(&token_tree_lock); - slot = radix_tree_lookup_slot(&token_tree, subflow->token); - WARN_ON_ONCE(!slot); - if (slot) { - WARN_ON_ONCE(rcu_access_pointer(*slot) != &token_used); - radix_tree_replace_slot(&token_tree, slot, conn); - } - spin_unlock_bh(&token_tree_lock); -} - /** * mptcp_token_destroy_request - remove mptcp connection/token * @token - token of mptcp connection to remove -- cgit v1.2.3 From dc093db5cc052b7879807a1d2906ed2f54bf386c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 13 Mar 2020 16:52:42 +0100 Subject: mptcp: drop unneeded checks After the previous patch subflow->conn is always != NULL and is never changed. We can drop a bunch of now unneeded checks. v1 -> v2: - rebased on top of commit 2398e3991bda ("mptcp: always include dack if possible.") Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/options.c | 14 ++------------ net/mptcp/subflow.c | 18 +++++++----------- 2 files changed, 9 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 9c71f427e6e3..63c8ee49cef2 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -336,7 +336,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, unsigned int ack_size; bool ret = false; bool can_ack; - u64 ack_seq; u8 tcp_fin; if (skb) { @@ -368,16 +367,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, can_ack = true; opts->ext_copy.use_ack = 0; msk = mptcp_sk(subflow->conn); - if (likely(msk && READ_ONCE(msk->can_ack))) { - ack_seq = msk->ack_seq; - } else if (subflow->can_ack) { - mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq); - ack_seq++; - } else { - can_ack = false; - } - - if (unlikely(!can_ack)) { + if (!READ_ONCE(msk->can_ack)) { *size = ALIGN(dss_size, 4); return ret; } @@ -390,7 +380,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, dss_size += ack_size; - opts->ext_copy.data_ack = ack_seq; + opts->ext_copy.data_ack = msk->ack_seq; opts->ext_copy.ack64 = 1; opts->ext_copy.use_ack = 1; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 047b088e4617..8434c7f5f712 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -112,7 +112,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); - if (subflow->conn && !subflow->conn_finished) { + if (!subflow->conn_finished) { pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), subflow->remote_key); mptcp_finish_connect(sk); @@ -439,9 +439,6 @@ static bool subflow_check_data_avail(struct sock *ssk) if (subflow->data_avail) return true; - if (!subflow->conn) - return false; - msk = mptcp_sk(subflow->conn); for (;;) { u32 map_remaining; @@ -561,11 +558,10 @@ static void subflow_data_ready(struct sock *sk) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; - if (!parent || !subflow->mp_capable) { + if (!subflow->mp_capable) { subflow->tcp_data_ready(sk); - if (parent) - parent->sk_data_ready(parent); + parent->sk_data_ready(parent); return; } @@ -579,7 +575,7 @@ static void subflow_write_space(struct sock *sk) struct sock *parent = subflow->conn; sk_stream_write_space(sk); - if (parent && sk_stream_is_writeable(sk)) { + if (sk_stream_is_writeable(sk)) { set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags); smp_mb__after_atomic(); /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */ @@ -694,7 +690,7 @@ static bool subflow_is_done(const struct sock *sk) static void subflow_state_change(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct sock *parent = READ_ONCE(subflow->conn); + struct sock *parent = subflow->conn; __subflow_state_change(sk); @@ -702,10 +698,10 @@ static void subflow_state_change(struct sock *sk) * a fin packet carrying a DSS can be unnoticed if we don't trigger * the data available machinery here. */ - if (parent && subflow->mp_capable && mptcp_subflow_data_available(sk)) + if (subflow->mp_capable && mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); - if (parent && !(parent->sk_shutdown & RCV_SHUTDOWN) && + if (!(parent->sk_shutdown & RCV_SHUTDOWN) && !subflow->rx_eof && subflow_is_done(sk)) { subflow->rx_eof = 1; parent->sk_shutdown |= RCV_SHUTDOWN; -- cgit v1.2.3 From c3c831b0a241c6003c82bc1f2d55460d3f4c0ee5 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 30 Jan 2020 18:15:18 +0200 Subject: netfilter: flowtable: Use nf_flow_offload_tuple for stats as well This patch doesn't change any functionality. Signed-off-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 42b73a084a63..88695ff44e76 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -574,6 +574,7 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, struct nf_flow_rule *flow_rule, enum flow_offload_tuple_dir dir, int priority, int cmd, + struct flow_stats *stats, struct list_head *block_cb_list) { struct flow_cls_offload cls_flow = {}; @@ -598,6 +599,9 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, } mutex_unlock(&flowtable->flow_block_lock); + if (cmd == FLOW_CLS_STATS) + memcpy(stats, &cls_flow.stats, sizeof(*stats)); + return i; } @@ -607,7 +611,7 @@ static int flow_offload_tuple_add(struct flow_offload_work *offload, { return nf_flow_offload_tuple(offload->flowtable, offload->flow, flow_rule, dir, offload->priority, - FLOW_CLS_REPLACE, + FLOW_CLS_REPLACE, NULL, &offload->flowtable->flow_block.cb_list); } @@ -615,7 +619,7 @@ static void flow_offload_tuple_del(struct flow_offload_work *offload, enum flow_offload_tuple_dir dir) { nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, - offload->priority, FLOW_CLS_DESTROY, + offload->priority, FLOW_CLS_DESTROY, NULL, &offload->flowtable->flow_block.cb_list); } @@ -661,21 +665,9 @@ static void flow_offload_tuple_stats(struct flow_offload_work *offload, enum flow_offload_tuple_dir dir, struct flow_stats *stats) { - struct nf_flowtable *flowtable = offload->flowtable; - struct flow_cls_offload cls_flow = {}; - struct flow_block_cb *block_cb; - struct netlink_ext_ack extack; - __be16 proto = ETH_P_ALL; - - nf_flow_offload_init(&cls_flow, proto, offload->priority, - FLOW_CLS_STATS, - &offload->flow->tuplehash[dir].tuple, &extack); - - mutex_lock(&flowtable->flow_block_lock); - list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) - block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv); - mutex_unlock(&flowtable->flow_block_lock); - memcpy(stats, &cls_flow.stats, sizeof(*stats)); + nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, + offload->priority, FLOW_CLS_STATS, stats, + &offload->flowtable->flow_block.cb_list); } static void flow_offload_work_stats(struct flow_offload_work *offload) -- cgit v1.2.3 From 68983a354a655c35d3fb204489d383a2a051fda7 Mon Sep 17 00:00:00 2001 From: Manoj Basapathi Date: Thu, 6 Feb 2020 16:37:29 +0530 Subject: netfilter: xtables: Add snapshot of hardidletimer target This is a snapshot of hardidletimer netfilter target. This patch implements a hardidletimer Xtables target that can be used to identify when interfaces have been idle for a certain period of time. Timers are identified by labels and are created when a rule is set with a new label. The rules also take a timeout value (in seconds) as an option. If more than one rule uses the same timer label, the timer will be restarted whenever any of the rules get a hit. One entry for each timer is created in sysfs. This attribute contains the timer remaining for the timer to expire. The attributes are located under the xt_idletimer class: /sys/class/xt_idletimer/timers/