From 2303149d584feade8074295b717451b36ac63307 Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Tue, 5 Mar 2024 23:34:38 +0000 Subject: scsi: qedf: Replace deprecated strncpy() with strscpy() We expect slowpath_params.name to be NUL-terminated based on its future usage with other string APIs: | static int qed_slowpath_start(struct qed_dev *cdev, | struct qed_slowpath_params *params) ... | strscpy(drv_version.name, params->name, | MCP_DRV_VER_STR_SIZE - 4); Moreover, NUL-padding is not necessary as the only use for this slowpath name parameter is to copy into the drv_version.name field. Also, let's prefer using strscpy(src, dest, sizeof(src)) in two instances (one of which is outside of the scsi system but it is trivial and related to this patch). We can see the drv_version.name size here: | struct qed_mcp_drv_version { | u32 version; | u8 name[MCP_DRV_VER_STR_SIZE - 4]; | }; Reviewed-by: Kees Cook Signed-off-by: Justin Stitt Link: https://lore.kernel.org/r/20240305-strncpy-drivers-scsi-mpi3mr-mpi3mr_fw-c-v3-3-5b78a13ff984@google.com Signed-off-by: Martin K. Petersen --- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index c278f8893042..d39e198fe8db 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1351,7 +1351,7 @@ static int qed_slowpath_start(struct qed_dev *cdev, (params->drv_rev << 8) | (params->drv_eng); strscpy(drv_version.name, params->name, - MCP_DRV_VER_STR_SIZE - 4); + sizeof(drv_version.name)); rc = qed_mcp_send_drv_version(hwfn, hwfn->p_main_ptt, &drv_version); if (rc) { -- cgit v1.2.3 From 19c5c04d3178a8a4ca803369aa4948dad1ece902 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 13 Mar 2024 16:00:57 +0200 Subject: net: stmmac: dwmac-rk: Remove unused of_gpio.h of_gpio.h is deprecated and subject to remove. The driver doesn't use it, simply remove the unused header. Signed-off-by: Andy Shevchenko Reviewed-by: Sebastian Reichel Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 382e8de1255d..7ae04d8d291c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -12,10 +12,8 @@ #include #include #include -#include #include #include -#include #include #include #include -- cgit v1.2.3 From 237bb5f7f7f55ec5f773469a974c61a49c298625 Mon Sep 17 00:00:00 2001 From: Mikhail Lobanov Date: Wed, 13 Mar 2024 11:34:36 -0400 Subject: cxgb4: unnecessary check for 0 in the free_sge_txq_uld() function The free_sge_txq_old() function has an unnecessary txq check of 0. This check is not necessary, since the txq pointer is initialized by the uldtxq[i] address from the operation &txq_info->uldtxq[i], which ensures that txq is not equal to 0. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: ab677ff4ad15 ("cxgb4: Allocate Tx queues dynamically") Signed-off-by: Mikhail Lobanov Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 17faac715882..5c13bcb4550d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -406,7 +406,7 @@ free_sge_txq_uld(struct adapter *adap, struct sge_uld_txq_info *txq_info) for (i = 0; i < nq; i++) { struct sge_uld_txq *txq = &txq_info->uldtxq[i]; - if (txq && txq->q.desc) { + if (txq->q.desc) { tasklet_kill(&txq->qresume_tsk); t4_ofld_eq_free(adap, adap->mbox, adap->pf, 0, txq->q.cntxt_id); -- cgit v1.2.3 From 8c67b7a914cca1a5a8e1446f1b684df928ebf963 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 1 Mar 2024 12:54:07 +0100 Subject: ice: remove eswitch changing queues algorithm Changing queues used by eswitch will be done through PF netdev. There is no need to reserve queues if the number of used queues is known. Reviewed-by: Wojciech Drewek Reviewed-by: Marcin Szycik Signed-off-by: Michal Swiatkowski Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 6 ----- drivers/net/ethernet/intel/ice/ice_eswitch.c | 34 ---------------------------- drivers/net/ethernet/intel/ice/ice_eswitch.h | 4 ---- drivers/net/ethernet/intel/ice/ice_sriov.c | 3 --- 4 files changed, 47 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 365c03d1c462..9bb435b4338f 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -527,12 +527,6 @@ struct ice_eswitch { struct ice_esw_br_offloads *br_offloads; struct xarray reprs; bool is_running; - /* struct to allow cp queues management optimization */ - struct { - int to_reach; - int value; - bool is_reaching; - } qs; }; struct ice_agg_node { diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 9069725c71b4..2e999f801c0a 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -455,8 +455,6 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf) return -ENODEV; ctrl_vsi = pf->eswitch.control_vsi; - /* cp VSI is createad with 1 queue as default */ - pf->eswitch.qs.value = 1; pf->eswitch.uplink_vsi = uplink_vsi; if (ice_eswitch_setup_env(pf)) @@ -489,7 +487,6 @@ static void ice_eswitch_disable_switchdev(struct ice_pf *pf) ice_vsi_release(ctrl_vsi); pf->eswitch.is_running = false; - pf->eswitch.qs.is_reaching = false; } /** @@ -620,18 +617,6 @@ ice_eswitch_cp_change_queues(struct ice_eswitch *eswitch, int change) struct ice_vsi *cp = eswitch->control_vsi; int queues = 0; - if (eswitch->qs.is_reaching) { - if (eswitch->qs.to_reach >= eswitch->qs.value + change) { - queues = eswitch->qs.to_reach; - eswitch->qs.is_reaching = false; - } else { - queues = 0; - } - } else if ((change > 0 && cp->alloc_txq <= eswitch->qs.value) || - change < 0) { - queues = cp->alloc_txq + change; - } - if (queues) { cp->req_txq = queues; cp->req_rxq = queues; @@ -643,7 +628,6 @@ ice_eswitch_cp_change_queues(struct ice_eswitch *eswitch, int change) ice_vsi_open(cp); } - eswitch->qs.value += change; ice_eswitch_remap_rings_to_vectors(eswitch); } @@ -661,8 +645,6 @@ ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf) err = ice_eswitch_enable_switchdev(pf); if (err) return err; - /* Control plane VSI is created with 1 queue as default */ - pf->eswitch.qs.to_reach -= 1; change = 0; } @@ -756,19 +738,3 @@ int ice_eswitch_rebuild(struct ice_pf *pf) return 0; } - -/** - * ice_eswitch_reserve_cp_queues - reserve control plane VSI queues - * @pf: pointer to PF structure - * @change: how many more (or less) queues is needed - * - * Remember to call ice_eswitch_attach/detach() the "change" times. - */ -void ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change) -{ - if (pf->eswitch.qs.value + change < 0) - return; - - pf->eswitch.qs.to_reach = pf->eswitch.qs.value + change; - pf->eswitch.qs.is_reaching = true; -} diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h index 1a288a03a79a..59d51c0d14e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.h +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h @@ -26,7 +26,6 @@ void ice_eswitch_set_target_vsi(struct sk_buff *skb, struct ice_tx_offload_params *off); netdev_tx_t ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev); -void ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change); #else /* CONFIG_ICE_SWITCHDEV */ static inline void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf) { } @@ -77,8 +76,5 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev) { return NETDEV_TX_BUSY; } - -static inline void -ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change) { } #endif /* CONFIG_ICE_SWITCHDEV */ #endif /* _ICE_ESWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index a958fcf3e6be..65e1986af777 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -170,8 +170,6 @@ void ice_free_vfs(struct ice_pf *pf) else dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); - ice_eswitch_reserve_cp_queues(pf, -ice_get_num_vfs(pf)); - mutex_lock(&vfs->table_lock); ice_for_each_vf(pf, bkt, vf) { @@ -897,7 +895,6 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) goto err_unroll_sriov; } - ice_eswitch_reserve_cp_queues(pf, num_vfs); ret = ice_start_vfs(pf); if (ret) { dev_err(dev, "Failed to start %d VFs, err %d\n", num_vfs, ret); -- cgit v1.2.3 From defd52455aee4a0922e45155d6a348f02a99b775 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 1 Mar 2024 12:54:08 +0100 Subject: ice: do Tx through PF netdev in slow-path Tx can be done using PF netdev. Checks before Tx are unnecessary. Checking if switchdev mode is set seems too defensive (there is no PR netdev in legacy mode). If corresponding VF is disabled or during reset, PR netdev also should be down. Reviewed-by: Marcin Szycik Signed-off-by: Michal Swiatkowski Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 26 ++++++-------------------- drivers/net/ethernet/intel/ice/ice_repr.c | 12 ------------ drivers/net/ethernet/intel/ice/ice_repr.h | 2 -- 3 files changed, 6 insertions(+), 34 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 2e999f801c0a..8ad271534d80 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -236,7 +236,7 @@ ice_eswitch_release_repr(struct ice_pf *pf, struct ice_repr *repr) */ static int ice_eswitch_setup_repr(struct ice_pf *pf, struct ice_repr *repr) { - struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; + struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi; struct ice_vsi *vsi = repr->src_vsi; struct metadata_dst *dst; @@ -255,12 +255,11 @@ static int ice_eswitch_setup_repr(struct ice_pf *pf, struct ice_repr *repr) netif_napi_add(repr->netdev, &repr->q_vector->napi, ice_napi_poll); - netif_keep_dst(repr->netdev); + netif_keep_dst(uplink_vsi->netdev); dst = repr->dst; dst->u.port_info.port_id = vsi->vsi_num; - dst->u.port_info.lower_dev = repr->netdev; - ice_repr_set_traffic_vsi(repr, ctrl_vsi); + dst->u.port_info.lower_dev = uplink_vsi->netdev; return 0; @@ -318,27 +317,14 @@ void ice_eswitch_update_repr(unsigned long repr_id, struct ice_vsi *vsi) netdev_tx_t ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct ice_netdev_priv *np; - struct ice_repr *repr; - struct ice_vsi *vsi; - - np = netdev_priv(netdev); - vsi = np->vsi; - - if (!vsi || !ice_is_switchdev_running(vsi->back)) - return NETDEV_TX_BUSY; - - if (ice_is_reset_in_progress(vsi->back->state) || - test_bit(ICE_VF_DIS, vsi->back->state)) - return NETDEV_TX_BUSY; + struct ice_repr *repr = ice_netdev_to_repr(netdev); - repr = ice_netdev_to_repr(netdev); skb_dst_drop(skb); dst_hold((struct dst_entry *)repr->dst); skb_dst_set(skb, (struct dst_entry *)repr->dst); - skb->queue_mapping = repr->q_id; + skb->dev = repr->dst->u.port_info.lower_dev; - return ice_start_xmit(skb, netdev); + return dev_queue_xmit(skb); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 5f30fb131f74..6191bee6c536 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -439,15 +439,3 @@ void ice_repr_stop_tx_queues(struct ice_repr *repr) netif_carrier_off(repr->netdev); netif_tx_stop_all_queues(repr->netdev); } - -/** - * ice_repr_set_traffic_vsi - set traffic VSI for port representor - * @repr: repr on with VSI will be set - * @vsi: pointer to VSI that will be used by port representor to pass traffic - */ -void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi) -{ - struct ice_netdev_priv *np = netdev_priv(repr->netdev); - - np->vsi = vsi; -} diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h index f9aede315716..b107e058d538 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.h +++ b/drivers/net/ethernet/intel/ice/ice_repr.h @@ -28,8 +28,6 @@ void ice_repr_rem_vf(struct ice_repr *repr); void ice_repr_start_tx_queues(struct ice_repr *repr); void ice_repr_stop_tx_queues(struct ice_repr *repr); -void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi); - struct ice_repr *ice_netdev_to_repr(struct net_device *netdev); bool ice_is_port_repr_netdev(const struct net_device *netdev); -- cgit v1.2.3 From 50d62022f45580e2fc9b62fca486e6d0ea287c40 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 1 Mar 2024 12:54:09 +0100 Subject: ice: default Tx rule instead of to queue Steer all packets that miss other rules to PF VSI. Previously in switchdev mode, PF VSI received missed packets, but only ones marked as Rx. Now it is receiving all missed packets. To queue rule per PR isn't needed, because we use PF VSI instead of control VSI now, and it's already correctly configured. Add flag to correctly set LAN_EN bit in default Tx rule. It shouldn't allow packet to go outside when there is a match. Reviewed-by: Marcin Szycik Signed-off-by: Michal Swiatkowski Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 107 ++++----------------------- drivers/net/ethernet/intel/ice/ice_repr.h | 4 - drivers/net/ethernet/intel/ice/ice_switch.c | 4 + drivers/net/ethernet/intel/ice/ice_switch.h | 5 +- 4 files changed, 23 insertions(+), 97 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 8ad271534d80..50b3de700837 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -10,85 +10,6 @@ #include "ice_devlink.h" #include "ice_tc_lib.h" -/** - * ice_eswitch_del_sp_rules - delete adv rules added on PRs - * @pf: pointer to the PF struct - * - * Delete all advanced rules that were used to forward packets with the - * device's VSI index to the corresponding eswitch ctrl VSI queue. - */ -static void ice_eswitch_del_sp_rules(struct ice_pf *pf) -{ - struct ice_repr *repr; - unsigned long id; - - xa_for_each(&pf->eswitch.reprs, id, repr) { - if (repr->sp_rule.rid) - ice_rem_adv_rule_by_id(&pf->hw, &repr->sp_rule); - } -} - -/** - * ice_eswitch_add_sp_rule - add adv rule with device's VSI index - * @pf: pointer to PF struct - * @repr: pointer to the repr struct - * - * This function adds advanced rule that forwards packets with - * device's VSI index to the corresponding eswitch ctrl VSI queue. - */ -static int ice_eswitch_add_sp_rule(struct ice_pf *pf, struct ice_repr *repr) -{ - struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; - struct ice_adv_rule_info rule_info = { 0 }; - struct ice_adv_lkup_elem *list; - struct ice_hw *hw = &pf->hw; - const u16 lkups_cnt = 1; - int err; - - list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC); - if (!list) - return -ENOMEM; - - ice_rule_add_src_vsi_metadata(list); - - rule_info.sw_act.flag = ICE_FLTR_TX; - rule_info.sw_act.vsi_handle = ctrl_vsi->idx; - rule_info.sw_act.fltr_act = ICE_FWD_TO_Q; - rule_info.sw_act.fwd_id.q_id = hw->func_caps.common_cap.rxq_first_id + - ctrl_vsi->rxq_map[repr->q_id]; - rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE; - rule_info.flags_info.act_valid = true; - rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN; - rule_info.src_vsi = repr->src_vsi->idx; - - err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, - &repr->sp_rule); - if (err) - dev_err(ice_pf_to_dev(pf), "Unable to add slow-path rule for eswitch for PR %d", - repr->id); - - kfree(list); - return err; -} - -static int -ice_eswitch_add_sp_rules(struct ice_pf *pf) -{ - struct ice_repr *repr; - unsigned long id; - int err; - - xa_for_each(&pf->eswitch.reprs, id, repr) { - err = ice_eswitch_add_sp_rule(pf, repr); - if (err) { - ice_eswitch_del_sp_rules(pf); - return err; - } - } - - return 0; -} - /** * ice_eswitch_setup_env - configure eswitch HW filters * @pf: pointer to PF struct @@ -102,7 +23,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; struct net_device *netdev = uplink_vsi->netdev; struct ice_vsi_vlan_ops *vlan_ops; - bool rule_added = false; ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx); @@ -112,17 +32,19 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) netif_addr_unlock_bh(netdev); if (ice_vsi_add_vlan_zero(uplink_vsi)) + goto err_vlan_zero; + + if (ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, true, + ICE_FLTR_RX)) goto err_def_rx; - if (!ice_is_dflt_vsi_in_use(uplink_vsi->port_info)) { - if (ice_set_dflt_vsi(uplink_vsi)) - goto err_def_rx; - rule_added = true; - } + if (ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, true, + ICE_FLTR_TX)) + goto err_def_tx; vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi); if (vlan_ops->dis_rx_filtering(uplink_vsi)) - goto err_dis_rx; + goto err_vlan_filtering; if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override)) goto err_override_uplink; @@ -141,10 +63,15 @@ err_override_control: ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); err_override_uplink: vlan_ops->ena_rx_filtering(uplink_vsi); -err_dis_rx: - if (rule_added) - ice_clear_dflt_vsi(uplink_vsi); +err_vlan_filtering: + ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, + ICE_FLTR_TX); +err_def_tx: + ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, + ICE_FLTR_RX); err_def_rx: + ice_vsi_del_vlan_zero(uplink_vsi); +err_vlan_zero: ice_fltr_add_mac_and_broadcast(uplink_vsi, uplink_vsi->port_info->mac.perm_addr, ICE_FWD_TO_VSI); @@ -585,7 +512,6 @@ void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) static void ice_eswitch_stop_reprs(struct ice_pf *pf) { - ice_eswitch_del_sp_rules(pf); ice_eswitch_stop_all_tx_queues(pf); ice_eswitch_napi_disable(&pf->eswitch.reprs); } @@ -594,7 +520,6 @@ static void ice_eswitch_start_reprs(struct ice_pf *pf) { ice_eswitch_napi_enable(&pf->eswitch.reprs); ice_eswitch_start_all_tx_queues(pf); - ice_eswitch_add_sp_rules(pf); } static void diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h index b107e058d538..34823f58cd23 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.h +++ b/drivers/net/ethernet/intel/ice/ice_repr.h @@ -16,10 +16,6 @@ struct ice_repr { int q_id; u32 id; u8 parent_mac[ETH_ALEN]; -#ifdef CONFIG_ICE_SWITCHDEV - /* info about slow path rule */ - struct ice_rule_query_data sp_rule; -#endif }; struct ice_repr *ice_repr_add_vf(struct ice_vf *vf); diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index f84bab80ca42..ce0b344dda99 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -2444,6 +2444,9 @@ static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *fi) fi->lan_en = true; } } + + if (fi->flag & ICE_FLTR_TX_ONLY) + fi->lan_en = false; } /** @@ -3819,6 +3822,7 @@ ice_cfg_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle, bool set, } else if (f_info.flag & ICE_FLTR_TX) { f_info.src_id = ICE_SRC_ID_VSI; f_info.src = hw_vsi_id; + f_info.flag |= ICE_FLTR_TX_ONLY; } f_list_entry.fltr_info = f_info; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index db7e501b7e0a..1b7dae9ce8d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -8,8 +8,9 @@ #define ICE_SW_CFG_MAX_BUF_LEN 2048 #define ICE_DFLT_VSI_INVAL 0xff -#define ICE_FLTR_RX BIT(0) -#define ICE_FLTR_TX BIT(1) +#define ICE_FLTR_RX BIT(0) +#define ICE_FLTR_TX BIT(1) +#define ICE_FLTR_TX_ONLY BIT(2) #define ICE_VSI_INVAL_ID 0xffff #define ICE_INVAL_Q_HANDLE 0xFFFF -- cgit v1.2.3 From 9cba6e1767bf8286563ae8907d6e595c365d3e92 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 1 Mar 2024 12:54:10 +0100 Subject: ice: control default Tx rule in lag Tx rule in switchdev was changed to use PF instead of additional control plane VSI. Because of that during lag we should control it. Control means to add and remove the default Tx rule during lag active/inactive switching. It can be done the same way as default Rx rule. Reviewed-by: Wojciech Drewek Reviewed-by: Marcin Szycik Signed-off-by: Michal Swiatkowski Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lag.c | 44 +++++++++++++++++++++++++------- drivers/net/ethernet/intel/ice/ice_lag.h | 3 ++- 2 files changed, 37 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 467372d541d2..7b802884440c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -202,11 +202,12 @@ static struct ice_lag *ice_lag_find_primary(struct ice_lag *lag) * @act: rule action * @recipe_id: recipe id for the new rule * @rule_idx: pointer to rule index + * @direction: ICE_FLTR_RX or ICE_FLTR_TX * @add: boolean on whether we are adding filters */ static int ice_lag_cfg_fltr(struct ice_lag *lag, u32 act, u16 recipe_id, u16 *rule_idx, - bool add) + u8 direction, bool add) { struct ice_sw_rule_lkup_rx_tx *s_rule; u16 s_rule_sz, vsi_num; @@ -231,9 +232,16 @@ ice_lag_cfg_fltr(struct ice_lag *lag, u32 act, u16 recipe_id, u16 *rule_idx, act |= FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, vsi_num); - s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); s_rule->recipe_id = cpu_to_le16(recipe_id); - s_rule->src = cpu_to_le16(hw->port_info->lport); + if (direction == ICE_FLTR_RX) { + s_rule->hdr.type = + cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); + s_rule->src = cpu_to_le16(hw->port_info->lport); + } else { + s_rule->hdr.type = + cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX); + s_rule->src = cpu_to_le16(vsi_num); + } s_rule->act = cpu_to_le32(act); s_rule->hdr_len = cpu_to_le16(DUMMY_ETH_HDR_LEN); opc = ice_aqc_opc_add_sw_rules; @@ -266,9 +274,27 @@ ice_lag_cfg_dflt_fltr(struct ice_lag *lag, bool add) { u32 act = ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT | ICE_SINGLE_ACT_LAN_ENABLE; + int err; + + err = ice_lag_cfg_fltr(lag, act, lag->pf_recipe, &lag->pf_rx_rule_id, + ICE_FLTR_RX, add); + if (err) + goto err_rx; - return ice_lag_cfg_fltr(lag, act, lag->pf_recipe, - &lag->pf_rule_id, add); + act = ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT | + ICE_SINGLE_ACT_LB_ENABLE; + err = ice_lag_cfg_fltr(lag, act, lag->pf_recipe, &lag->pf_tx_rule_id, + ICE_FLTR_TX, add); + if (err) + goto err_tx; + + return 0; + +err_tx: + ice_lag_cfg_fltr(lag, act, lag->pf_recipe, &lag->pf_rx_rule_id, + ICE_FLTR_RX, !add); +err_rx: + return err; } /** @@ -284,7 +310,7 @@ ice_lag_cfg_drop_fltr(struct ice_lag *lag, bool add) ICE_SINGLE_ACT_DROP; return ice_lag_cfg_fltr(lag, act, lag->lport_recipe, - &lag->lport_rule_idx, add); + &lag->lport_rule_idx, ICE_FLTR_RX, add); } /** @@ -310,7 +336,7 @@ ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) dev = ice_pf_to_dev(lag->pf); /* interface not active - remove old default VSI rule */ - if (bonding_info->slave.state && lag->pf_rule_id) { + if (bonding_info->slave.state && lag->pf_rx_rule_id) { if (ice_lag_cfg_dflt_fltr(lag, false)) dev_err(dev, "Error removing old default VSI filter\n"); if (ice_lag_cfg_drop_fltr(lag, true)) @@ -319,7 +345,7 @@ ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) } /* interface becoming active - add new default VSI rule */ - if (!bonding_info->slave.state && !lag->pf_rule_id) { + if (!bonding_info->slave.state && !lag->pf_rx_rule_id) { if (ice_lag_cfg_dflt_fltr(lag, true)) dev_err(dev, "Error adding new default VSI filter\n"); if (lag->lport_rule_idx && ice_lag_cfg_drop_fltr(lag, false)) @@ -2149,7 +2175,7 @@ void ice_lag_rebuild(struct ice_pf *pf) ice_lag_cfg_cp_fltr(lag, true); - if (lag->pf_rule_id) + if (lag->pf_rx_rule_id) if (ice_lag_cfg_dflt_fltr(lag, true)) dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index 183b38792ef2..bab2c83142a1 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -43,7 +43,8 @@ struct ice_lag { u8 primary:1; /* this is primary */ u16 pf_recipe; u16 lport_recipe; - u16 pf_rule_id; + u16 pf_rx_rule_id; + u16 pf_tx_rule_id; u16 cp_rule_idx; u16 lport_rule_idx; u8 role; -- cgit v1.2.3 From 33bf1e86231dbd62f06f0ca3cdf5995eb7d077d5 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 1 Mar 2024 12:54:11 +0100 Subject: ice: remove switchdev control plane VSI For slow-path Rx and Tx PF VSI is used. There is no need to have control plane VSI. Remove all code related to it. Eswitch rebuild can't fail without rebuilding control plane VSI. Return void from ice_eswitch_rebuild(). Reviewed-by: Marcin Szycik Signed-off-by: Michal Swiatkowski Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 - drivers/net/ethernet/intel/ice/ice_base.c | 36 +---- drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 4 +- drivers/net/ethernet/intel/ice/ice_eswitch.c | 163 +--------------------- drivers/net/ethernet/intel/ice/ice_eswitch.h | 2 +- drivers/net/ethernet/intel/ice/ice_lag.c | 9 +- drivers/net/ethernet/intel/ice/ice_lib.c | 49 +------ drivers/net/ethernet/intel/ice/ice_main.c | 10 +- drivers/net/ethernet/intel/ice/ice_repr.c | 12 -- drivers/net/ethernet/intel/ice/ice_repr.h | 2 - drivers/net/ethernet/intel/ice/ice_type.h | 1 - drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c | 1 - 12 files changed, 13 insertions(+), 277 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 9bb435b4338f..c4127d5f2be3 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -522,7 +522,6 @@ enum ice_misc_thread_tasks { }; struct ice_eswitch { - struct ice_vsi *control_vsi; struct ice_vsi *uplink_vsi; struct ice_esw_br_offloads *br_offloads; struct xarray reprs; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index d2fd315556a3..8ffae7fe894f 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -263,30 +263,6 @@ static u16 ice_calc_txq_handle(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset; } -/** - * ice_eswitch_calc_txq_handle - * @ring: pointer to ring which unique index is needed - * - * To correctly work with many netdevs ring->q_index of Tx rings on switchdev - * VSI can repeat. Hardware ring setup requires unique q_index. Calculate it - * here by finding index in vsi->tx_rings of this ring. - * - * Return ICE_INVAL_Q_INDEX when index wasn't found. Should never happen, - * because VSI is get from ring->vsi, so it has to be present in this VSI. - */ -static u16 ice_eswitch_calc_txq_handle(struct ice_tx_ring *ring) -{ - const struct ice_vsi *vsi = ring->vsi; - int i; - - ice_for_each_txq(vsi, i) { - if (vsi->tx_rings[i] == ring) - return i; - } - - return ICE_INVAL_Q_INDEX; -} - /** * ice_cfg_xps_tx_ring - Configure XPS for a Tx ring * @ring: The Tx ring to configure @@ -353,9 +329,6 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf->vf_id; tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF; break; - case ICE_VSI_SWITCHDEV_CTRL: - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ; - break; default: return; } @@ -919,14 +892,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, /* Add unique software queue handle of the Tx queue per * TC into the VSI Tx ring */ - if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { - ring->q_handle = ice_eswitch_calc_txq_handle(ring); - - if (ring->q_handle == ICE_INVAL_Q_INDEX) - return -ENODEV; - } else { - ring->q_handle = ice_calc_txq_handle(vsi, ring, tc); - } + ring->q_handle = ice_calc_txq_handle(vsi, ring, tc); if (ch) status = ice_ena_vsi_txq(vsi->port_info, ch->ch_vsi->idx, 0, diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 6e20ee610022..ceb17c004d79 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -291,7 +291,6 @@ static void ice_dcb_ena_dis_vsi(struct ice_pf *pf, bool ena, bool locked) switch (vsi->type) { case ICE_VSI_CHNL: - case ICE_VSI_SWITCHDEV_CTRL: case ICE_VSI_PF: if (ena) ice_ena_vsi(vsi, locked); @@ -776,8 +775,7 @@ void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked) /* no need to proceed with remaining cfg if it is CHNL * or switchdev VSI */ - if (vsi->type == ICE_VSI_CHNL || - vsi->type == ICE_VSI_SWITCHDEV_CTRL) + if (vsi->type == ICE_VSI_CHNL) continue; ice_vsi_map_rings_to_vectors(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 50b3de700837..76fa114f22c9 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -20,7 +20,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) { struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi; - struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; struct net_device *netdev = uplink_vsi->netdev; struct ice_vsi_vlan_ops *vlan_ops; @@ -49,17 +48,12 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override)) goto err_override_uplink; - if (ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_set_allow_override)) - goto err_override_control; - if (ice_vsi_update_local_lb(uplink_vsi, true)) goto err_override_local_lb; return 0; err_override_local_lb: - ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override); -err_override_control: ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); err_override_uplink: vlan_ops->ena_rx_filtering(uplink_vsi); @@ -78,61 +72,6 @@ err_vlan_zero: return -ENODEV; } -/** - * ice_eswitch_remap_rings_to_vectors - reconfigure rings of eswitch ctrl VSI - * @eswitch: pointer to eswitch struct - * - * In eswitch number of allocated Tx/Rx rings is equal. - * - * This function fills q_vectors structures associated with representor and - * move each ring pairs to port representor netdevs. Each port representor - * will have dedicated 1 Tx/Rx ring pair, so number of rings pair is equal to - * number of VFs. - */ -static void ice_eswitch_remap_rings_to_vectors(struct ice_eswitch *eswitch) -{ - struct ice_vsi *vsi = eswitch->control_vsi; - unsigned long repr_id = 0; - int q_id; - - ice_for_each_txq(vsi, q_id) { - struct ice_q_vector *q_vector; - struct ice_tx_ring *tx_ring; - struct ice_rx_ring *rx_ring; - struct ice_repr *repr; - - repr = xa_find(&eswitch->reprs, &repr_id, U32_MAX, - XA_PRESENT); - if (!repr) - break; - - repr_id += 1; - repr->q_id = q_id; - q_vector = repr->q_vector; - tx_ring = vsi->tx_rings[q_id]; - rx_ring = vsi->rx_rings[q_id]; - - q_vector->vsi = vsi; - q_vector->reg_idx = vsi->q_vectors[0]->reg_idx; - - q_vector->num_ring_tx = 1; - q_vector->tx.tx_ring = tx_ring; - tx_ring->q_vector = q_vector; - tx_ring->next = NULL; - tx_ring->netdev = repr->netdev; - /* In switchdev mode, from OS stack perspective, there is only - * one queue for given netdev, so it needs to be indexed as 0. - */ - tx_ring->q_index = 0; - - q_vector->num_ring_rx = 1; - q_vector->rx.rx_ring = rx_ring; - rx_ring->q_vector = q_vector; - rx_ring->next = NULL; - rx_ring->netdev = repr->netdev; - } -} - /** * ice_eswitch_release_repr - clear PR VSI configuration * @pf: poiner to PF struct @@ -152,8 +91,6 @@ ice_eswitch_release_repr(struct ice_pf *pf, struct ice_repr *repr) repr->dst = NULL; ice_fltr_add_mac_and_broadcast(vsi, repr->parent_mac, ICE_FWD_TO_VSI); - - netif_napi_del(&repr->q_vector->napi); } /** @@ -179,9 +116,6 @@ static int ice_eswitch_setup_repr(struct ice_pf *pf, struct ice_repr *repr) if (ice_vsi_add_vlan_zero(vsi)) goto err_update_security; - netif_napi_add(repr->netdev, &repr->q_vector->napi, - ice_napi_poll); - netif_keep_dst(uplink_vsi->netdev); dst = repr->dst; @@ -287,13 +221,11 @@ ice_eswitch_set_target_vsi(struct sk_buff *skb, static void ice_eswitch_release_env(struct ice_pf *pf) { struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi; - struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; struct ice_vsi_vlan_ops *vlan_ops; vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi); ice_vsi_update_local_lb(uplink_vsi, false); - ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override); ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); vlan_ops->ena_rx_filtering(uplink_vsi); ice_clear_dflt_vsi(uplink_vsi); @@ -302,56 +234,13 @@ static void ice_eswitch_release_env(struct ice_pf *pf) ICE_FWD_TO_VSI); } -/** - * ice_eswitch_vsi_setup - configure eswitch control VSI - * @pf: pointer to PF structure - * @pi: pointer to port_info structure - */ -static struct ice_vsi * -ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) -{ - struct ice_vsi_cfg_params params = {}; - - params.type = ICE_VSI_SWITCHDEV_CTRL; - params.pi = pi; - params.flags = ICE_VSI_FLAG_INIT; - - return ice_vsi_setup(pf, ¶ms); -} - -/** - * ice_eswitch_napi_enable - enable NAPI for all port representors - * @reprs: xarray of reprs - */ -static void ice_eswitch_napi_enable(struct xarray *reprs) -{ - struct ice_repr *repr; - unsigned long id; - - xa_for_each(reprs, id, repr) - napi_enable(&repr->q_vector->napi); -} - -/** - * ice_eswitch_napi_disable - disable NAPI for all port representors - * @reprs: xarray of reprs - */ -static void ice_eswitch_napi_disable(struct xarray *reprs) -{ - struct ice_repr *repr; - unsigned long id; - - xa_for_each(reprs, id, repr) - napi_disable(&repr->q_vector->napi); -} - /** * ice_eswitch_enable_switchdev - configure eswitch in switchdev mode * @pf: pointer to PF structure */ static int ice_eswitch_enable_switchdev(struct ice_pf *pf) { - struct ice_vsi *ctrl_vsi, *uplink_vsi; + struct ice_vsi *uplink_vsi; uplink_vsi = ice_get_main_vsi(pf); if (!uplink_vsi) @@ -363,15 +252,10 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf) return -EINVAL; } - pf->eswitch.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info); - if (!pf->eswitch.control_vsi) - return -ENODEV; - - ctrl_vsi = pf->eswitch.control_vsi; pf->eswitch.uplink_vsi = uplink_vsi; if (ice_eswitch_setup_env(pf)) - goto err_vsi; + return -ENODEV; if (ice_eswitch_br_offloads_init(pf)) goto err_br_offloads; @@ -382,8 +266,6 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf) err_br_offloads: ice_eswitch_release_env(pf); -err_vsi: - ice_vsi_release(ctrl_vsi); return -ENODEV; } @@ -393,11 +275,8 @@ err_vsi: */ static void ice_eswitch_disable_switchdev(struct ice_pf *pf) { - struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; - ice_eswitch_br_offloads_deinit(pf); ice_eswitch_release_env(pf); - ice_vsi_release(ctrl_vsi); pf->eswitch.is_running = false; } @@ -513,40 +392,17 @@ void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) static void ice_eswitch_stop_reprs(struct ice_pf *pf) { ice_eswitch_stop_all_tx_queues(pf); - ice_eswitch_napi_disable(&pf->eswitch.reprs); } static void ice_eswitch_start_reprs(struct ice_pf *pf) { - ice_eswitch_napi_enable(&pf->eswitch.reprs); ice_eswitch_start_all_tx_queues(pf); } -static void -ice_eswitch_cp_change_queues(struct ice_eswitch *eswitch, int change) -{ - struct ice_vsi *cp = eswitch->control_vsi; - int queues = 0; - - if (queues) { - cp->req_txq = queues; - cp->req_rxq = queues; - ice_vsi_close(cp); - ice_vsi_rebuild(cp, ICE_VSI_FLAG_NO_INIT); - ice_vsi_open(cp); - } else if (!change) { - /* change == 0 means that VSI wasn't open, open it here */ - ice_vsi_open(cp); - } - - ice_eswitch_remap_rings_to_vectors(eswitch); -} - int ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf) { struct ice_repr *repr; - int change = 1; int err; if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY) @@ -556,7 +412,6 @@ ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf) err = ice_eswitch_enable_switchdev(pf); if (err) return err; - change = 0; } ice_eswitch_stop_reprs(pf); @@ -578,7 +433,6 @@ ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf) vf->repr_id = repr->id; - ice_eswitch_cp_change_queues(&pf->eswitch, change); ice_eswitch_start_reprs(pf); return 0; @@ -608,8 +462,6 @@ void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf) if (xa_empty(&pf->eswitch.reprs)) ice_eswitch_disable_switchdev(pf); - else - ice_eswitch_cp_change_queues(&pf->eswitch, -1); ice_eswitch_release_repr(pf, repr); ice_repr_rem_vf(repr); @@ -631,21 +483,14 @@ void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf) * ice_eswitch_rebuild - rebuild eswitch * @pf: pointer to PF structure */ -int ice_eswitch_rebuild(struct ice_pf *pf) +void ice_eswitch_rebuild(struct ice_pf *pf) { struct ice_repr *repr; unsigned long id; - int err; if (!ice_is_switchdev_running(pf)) - return 0; - - err = ice_vsi_rebuild(pf->eswitch.control_vsi, ICE_VSI_FLAG_INIT); - if (err) - return err; + return; xa_for_each(&pf->eswitch.reprs, id, repr) ice_eswitch_detach(pf, repr->vf); - - return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h index 59d51c0d14e5..baad68507471 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.h +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h @@ -10,7 +10,7 @@ void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf); int ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf); -int ice_eswitch_rebuild(struct ice_pf *pf); +void ice_eswitch_rebuild(struct ice_pf *pf); int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode); int diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 7b802884440c..7493eb87d221 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -740,8 +740,7 @@ static void ice_lag_move_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport) pf = lag->pf; ice_for_each_vsi(pf, i) - if (pf->vsi[i] && (pf->vsi[i]->type == ICE_VSI_VF || - pf->vsi[i]->type == ICE_VSI_SWITCHDEV_CTRL)) + if (pf->vsi[i] && pf->vsi[i]->type == ICE_VSI_VF) ice_lag_move_single_vf_nodes(lag, oldport, newport, i); } @@ -979,8 +978,7 @@ ice_lag_reclaim_vf_nodes(struct ice_lag *lag, struct ice_hw *src_hw) pf = lag->pf; ice_for_each_vsi(pf, i) - if (pf->vsi[i] && (pf->vsi[i]->type == ICE_VSI_VF || - pf->vsi[i]->type == ICE_VSI_SWITCHDEV_CTRL)) + if (pf->vsi[i] && pf->vsi[i]->type == ICE_VSI_VF) ice_for_each_traffic_class(tc) ice_lag_reclaim_vf_tc(lag, src_hw, i, tc); } @@ -2002,8 +2000,7 @@ ice_lag_move_vf_nodes_sync(struct ice_lag *lag, struct ice_hw *dest_hw) pf = lag->pf; ice_for_each_vsi(pf, i) - if (pf->vsi[i] && (pf->vsi[i]->type == ICE_VSI_VF || - pf->vsi[i]->type == ICE_VSI_SWITCHDEV_CTRL)) + if (pf->vsi[i] && pf->vsi[i]->type == ICE_VSI_VF) ice_for_each_traffic_class(tc) ice_lag_move_vf_nodes_tc_sync(lag, dest_hw, i, tc); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index ee3f0d3e3f6d..24b8f394ec4b 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -27,8 +27,6 @@ const char *ice_vsi_type_str(enum ice_vsi_type vsi_type) return "ICE_VSI_CHNL"; case ICE_VSI_LB: return "ICE_VSI_LB"; - case ICE_VSI_SWITCHDEV_CTRL: - return "ICE_VSI_SWITCHDEV_CTRL"; default: return "unknown"; } @@ -144,7 +142,6 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi) { switch (vsi->type) { case ICE_VSI_PF: - case ICE_VSI_SWITCHDEV_CTRL: case ICE_VSI_CTRL: case ICE_VSI_LB: /* a user could change the values of num_[tr]x_desc using @@ -211,21 +208,6 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi) max_t(int, vsi->alloc_rxq, vsi->alloc_txq)); break; - case ICE_VSI_SWITCHDEV_CTRL: - /* The number of queues for ctrl VSI is equal to number of PRs - * Each ring is associated to the corresponding VF_PR netdev. - * Tx and Rx rings are always equal - */ - if (vsi->req_txq && vsi->req_rxq) { - vsi->alloc_txq = vsi->req_txq; - vsi->alloc_rxq = vsi->req_rxq; - } else { - vsi->alloc_txq = 1; - vsi->alloc_rxq = 1; - } - - vsi->num_q_vectors = 1; - break; case ICE_VSI_VF: if (vf->num_req_qs) vf->num_vf_qs = vf->num_req_qs; @@ -522,22 +504,6 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) return IRQ_HANDLED; } -static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *data) -{ - struct ice_q_vector *q_vector = (struct ice_q_vector *)data; - struct ice_pf *pf = q_vector->vsi->back; - struct ice_repr *repr; - unsigned long id; - - if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring) - return IRQ_HANDLED; - - xa_for_each(&pf->eswitch.reprs, id, repr) - napi_schedule(&repr->q_vector->napi); - - return IRQ_HANDLED; -} - /** * ice_vsi_alloc_stat_arrays - Allocate statistics arrays * @vsi: VSI pointer @@ -600,10 +566,6 @@ ice_vsi_alloc_def(struct ice_vsi *vsi, struct ice_channel *ch) } switch (vsi->type) { - case ICE_VSI_SWITCHDEV_CTRL: - /* Setup eswitch MSIX irq handler for VSI */ - vsi->irq_handler = ice_eswitch_msix_clean_rings; - break; case ICE_VSI_PF: /* Setup default MSIX irq handler for VSI */ vsi->irq_handler = ice_msix_clean_rings; @@ -933,11 +895,6 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi) max_rss_size); vsi->rss_lut_type = ICE_LUT_PF; break; - case ICE_VSI_SWITCHDEV_CTRL: - vsi->rss_table_size = ICE_LUT_VSI_SIZE; - vsi->rss_size = min_t(u16, num_online_cpus(), max_rss_size); - vsi->rss_lut_type = ICE_LUT_VSI; - break; case ICE_VSI_VF: /* VF VSI will get a small RSS table. * For VSI_LUT, LUT size should be set to 64 bytes. @@ -1263,7 +1220,6 @@ static int ice_vsi_init(struct ice_vsi *vsi, u32 vsi_flags) case ICE_VSI_PF: ctxt->flags = ICE_AQ_VSI_TYPE_PF; break; - case ICE_VSI_SWITCHDEV_CTRL: case ICE_VSI_CHNL: ctxt->flags = ICE_AQ_VSI_TYPE_VMDQ2; break; @@ -2145,7 +2101,6 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) case ICE_VSI_CHNL: case ICE_VSI_LB: case ICE_VSI_PF: - case ICE_VSI_SWITCHDEV_CTRL: max_agg_nodes = ICE_MAX_PF_AGG_NODES; agg_node_id_start = ICE_PF_AGG_NODE_ID_START; agg_node_iter = &pf->pf_agg_node[0]; @@ -2317,7 +2272,6 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) switch (vsi->type) { case ICE_VSI_CTRL: - case ICE_VSI_SWITCHDEV_CTRL: case ICE_VSI_PF: ret = ice_vsi_alloc_q_vectors(vsi); if (ret) @@ -2750,8 +2704,7 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) } else { ice_vsi_close(vsi); } - } else if (vsi->type == ICE_VSI_CTRL || - vsi->type == ICE_VSI_SWITCHDEV_CTRL) { + } else if (vsi->type == ICE_VSI_CTRL) { ice_vsi_close(vsi); } } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 33a164fa325a..f2b7d6ca8805 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7055,13 +7055,11 @@ int ice_down(struct ice_vsi *vsi) WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state)); - if (vsi->netdev && vsi->type == ICE_VSI_PF) { + if (vsi->netdev) { vlan_err = ice_vsi_del_vlan_zero(vsi); ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); - } else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { - ice_eswitch_stop_all_tx_queues(vsi->back); } ice_vsi_dis_irq(vsi); @@ -7544,11 +7542,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) goto err_vsi_rebuild; } - err = ice_eswitch_rebuild(pf); - if (err) { - dev_err(dev, "Switchdev rebuild failed: %d\n", err); - goto err_vsi_rebuild; - } + ice_eswitch_rebuild(pf); if (reset_type == ICE_RESET_PFR) { err = ice_rebuild_channels(pf); diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 6191bee6c536..a5c24da31b88 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -291,7 +291,6 @@ static void ice_repr_remove_node(struct devlink_port *devlink_port) */ static void ice_repr_rem(struct ice_repr *repr) { - kfree(repr->q_vector); free_netdev(repr->netdev); kfree(repr); } @@ -331,7 +330,6 @@ static void ice_repr_set_tx_topology(struct ice_pf *pf) static struct ice_repr * ice_repr_add(struct ice_pf *pf, struct ice_vsi *src_vsi, const u8 *parent_mac) { - struct ice_q_vector *q_vector; struct ice_netdev_priv *np; struct ice_repr *repr; int err; @@ -350,20 +348,10 @@ ice_repr_add(struct ice_pf *pf, struct ice_vsi *src_vsi, const u8 *parent_mac) np = netdev_priv(repr->netdev); np->repr = repr; - q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL); - if (!q_vector) { - err = -ENOMEM; - goto err_alloc_q_vector; - } - repr->q_vector = q_vector; - repr->q_id = repr->id; - ether_addr_copy(repr->parent_mac, parent_mac); return repr; -err_alloc_q_vector: - free_netdev(repr->netdev); err_alloc: kfree(repr); return ERR_PTR(err); diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h index 34823f58cd23..eb8dec1f7de4 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.h +++ b/drivers/net/ethernet/intel/ice/ice_repr.h @@ -9,11 +9,9 @@ struct ice_repr { struct ice_vsi *src_vsi; struct ice_vf *vf; - struct ice_q_vector *q_vector; struct net_device *netdev; struct metadata_dst *dst; struct ice_esw_br_port *br_port; - int q_id; u32 id; u8 parent_mac[ETH_ALEN]; }; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 9ff92dba5823..64b8e7ec0b63 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -150,7 +150,6 @@ enum ice_vsi_type { ICE_VSI_CTRL = 3, /* equates to ICE_VSI_PF with 1 queue pair */ ICE_VSI_CHNL = 4, ICE_VSI_LB = 6, - ICE_VSI_SWITCHDEV_CTRL = 7, }; struct ice_link_status { diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c index 4a6c850d83ac..7aae7fdcfcdb 100644 --- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c +++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c @@ -72,7 +72,6 @@ void ice_vsi_init_vlan_ops(struct ice_vsi *vsi) switch (vsi->type) { case ICE_VSI_PF: - case ICE_VSI_SWITCHDEV_CTRL: ice_pf_vsi_init_vlan_ops(vsi); break; case ICE_VSI_VF: -- cgit v1.2.3 From 6235cb6e5b0de0717a1055740f6fd83a28531193 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 1 Mar 2024 12:54:12 +0100 Subject: ice: change repr::id values Instead of getting repr::id from xa_alloc() value, set it to the src_vsi::num_vsi value. It is unique for each PR. Reviewed-by: Przemek Kitszel Reviewed-by: Marcin Szycik Signed-off-by: Michal Swiatkowski Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 5 ++--- drivers/net/ethernet/intel/ice/ice_repr.c | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 76fa114f22c9..5eba8dec9f94 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -319,7 +319,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode, dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev", pf->hw.pf_id); - xa_init_flags(&pf->eswitch.reprs, XA_FLAGS_ALLOC); + xa_init(&pf->eswitch.reprs); NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev"); break; } @@ -426,8 +426,7 @@ ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf) if (err) goto err_setup_repr; - err = xa_alloc(&pf->eswitch.reprs, &repr->id, repr, - XA_LIMIT(1, INT_MAX), GFP_KERNEL); + err = xa_insert(&pf->eswitch.reprs, repr->id, repr, GFP_KERNEL); if (err) goto err_xa_alloc; diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index a5c24da31b88..b4fb74271811 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -345,6 +345,7 @@ ice_repr_add(struct ice_pf *pf, struct ice_vsi *src_vsi, const u8 *parent_mac) } repr->src_vsi = src_vsi; + repr->id = src_vsi->vsi_num; np = netdev_priv(repr->netdev); np->repr = repr; -- cgit v1.2.3 From 44ba608db50970dbb7eae1dca13ebcbf9485b4e7 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 1 Mar 2024 12:54:13 +0100 Subject: ice: do switchdev slow-path Rx using PF VSI Add an ICE_RX_FLAG_MULTIDEV flag to Rx ring. If it is set try to find correct port representor. Do it based on src_vsi value stored in flex descriptor. Ids of representor pointers stored in xarray are equal to corresponding src_vsi value. Thanks to that we can directly get correct representor if we have src_vsi value. Set multidev flag during ring configuration. If the mode is switchdev, change the ring descriptor to the one that contains src_vsi value. PF netdev should be reconfigured, do it by calling ice_down() and ice_up() if the netdev was up before configuring switchdev. Reviewed-by: Marcin Szycik Signed-off-by: Michal Swiatkowski Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 8 ++++++ drivers/net/ethernet/intel/ice/ice_eswitch.c | 36 +++++++++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_eswitch.h | 9 +++++++ drivers/net/ethernet/intel/ice/ice_txrx.h | 1 + drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 9 ++++++- 5 files changed, 62 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 8ffae7fe894f..662fc395edcc 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -452,6 +452,14 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Rx queue threshold in units of 64 */ rlan_ctx.lrxqthresh = 1; + /* PF acts as uplink for switchdev; set flex descriptor with src_vsi + * metadata and flags to allow redirecting to PR netdev + */ + if (ice_is_eswitch_mode_switchdev(vsi->back)) { + ring->flags |= ICE_RX_FLAGS_MULTIDEV; + rxdid = ICE_RXDID_FLEX_NIC_2; + } + /* Enable Flexible Descriptors in the queue context which * allows this driver to select a specific receive descriptor format * increasing context priority to pick up profile ID; default is 0x01; diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 5eba8dec9f94..86a6d58ad3ec 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -21,8 +21,13 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) { struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi; struct net_device *netdev = uplink_vsi->netdev; + bool if_running = netif_running(netdev); struct ice_vsi_vlan_ops *vlan_ops; + if (if_running && !test_and_set_bit(ICE_VSI_DOWN, uplink_vsi->state)) + if (ice_down(uplink_vsi)) + return -ENODEV; + ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx); netif_addr_lock_bh(netdev); @@ -51,8 +56,13 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) if (ice_vsi_update_local_lb(uplink_vsi, true)) goto err_override_local_lb; + if (if_running && ice_up(uplink_vsi)) + goto err_up; + return 0; +err_up: + ice_vsi_update_local_lb(uplink_vsi, false); err_override_local_lb: ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); err_override_uplink: @@ -69,6 +79,9 @@ err_vlan_zero: ice_fltr_add_mac_and_broadcast(uplink_vsi, uplink_vsi->port_info->mac.perm_addr, ICE_FWD_TO_VSI); + if (if_running) + ice_up(uplink_vsi); + return -ENODEV; } @@ -493,3 +506,26 @@ void ice_eswitch_rebuild(struct ice_pf *pf) xa_for_each(&pf->eswitch.reprs, id, repr) ice_eswitch_detach(pf, repr->vf); } + +/** + * ice_eswitch_get_target - get netdev based on src_vsi from descriptor + * @rx_ring: ring used to receive the packet + * @rx_desc: descriptor used to get src_vsi value + * + * Get src_vsi value from descriptor and load correct representor. If it isn't + * found return rx_ring->netdev. + */ +struct net_device *ice_eswitch_get_target(struct ice_rx_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc) +{ + struct ice_eswitch *eswitch = &rx_ring->vsi->back->eswitch; + struct ice_32b_rx_flex_desc_nic_2 *desc; + struct ice_repr *repr; + + desc = (struct ice_32b_rx_flex_desc_nic_2 *)rx_desc; + repr = xa_load(&eswitch->reprs, le16_to_cpu(desc->src_vsi)); + if (!repr) + return rx_ring->netdev; + + return repr->netdev; +} diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h index baad68507471..e2e5c0c75e7d 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.h +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h @@ -26,6 +26,8 @@ void ice_eswitch_set_target_vsi(struct sk_buff *skb, struct ice_tx_offload_params *off); netdev_tx_t ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev); +struct net_device *ice_eswitch_get_target(struct ice_rx_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc); #else /* CONFIG_ICE_SWITCHDEV */ static inline void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf) { } @@ -76,5 +78,12 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev) { return NETDEV_TX_BUSY; } + +static inline struct net_device * +ice_eswitch_get_target(struct ice_rx_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc) +{ + return rx_ring->netdev; +} #endif /* CONFIG_ICE_SWITCHDEV */ #endif /* _ICE_ESWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index af955b0e5dc5..feba314a3fe4 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -365,6 +365,7 @@ struct ice_rx_ring { u8 ptp_rx; #define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) #define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) +#define ICE_RX_FLAGS_MULTIDEV BIT(3) u8 flags; /* CL5 - 5th cacheline starts here */ struct xdp_rxq_info xdp_rxq; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index f8f1d2bdc1be..676c00e1554c 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -236,7 +236,14 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring, ice_rx_hash_to_skb(rx_ring, rx_desc, skb, ptype); /* modifies the skb - consumes the enet header */ - skb->protocol = eth_type_trans(skb, rx_ring->netdev); + if (unlikely(rx_ring->flags & ICE_RX_FLAGS_MULTIDEV)) { + struct net_device *netdev = ice_eswitch_get_target(rx_ring, + rx_desc); + + skb->protocol = eth_type_trans(skb, netdev); + } else { + skb->protocol = eth_type_trans(skb, rx_ring->netdev); + } ice_rx_csum(rx_ring, skb, rx_desc, ptype); -- cgit v1.2.3 From 4498159a509328e903a692504a0ba1624754bc3e Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 1 Mar 2024 12:54:14 +0100 Subject: ice: count representor stats Removing control plane VSI result in no information about slow-path statistic. In current solution statistics need to be counted in driver. Patch is based on similar implementation done by Simon Horman in nfp: commit eadfa4c3be99 ("nfp: add stats and xmit helpers for representors") Add const modifier to netdev parameter in ice_netdev_to_repr(). It isn't (and shouldn't be) modified in the function. Reviewed-by: Marcin Szycik Signed-off-by: Michal Swiatkowski Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 7 +- drivers/net/ethernet/intel/ice/ice_repr.c | 103 +++++++++++++++++++------- drivers/net/ethernet/intel/ice/ice_repr.h | 16 +++- drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 2 + 4 files changed, 98 insertions(+), 30 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 86a6d58ad3ec..af4e9530eb48 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -192,13 +192,18 @@ netdev_tx_t ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ice_repr *repr = ice_netdev_to_repr(netdev); + unsigned int len = skb->len; + int ret; skb_dst_drop(skb); dst_hold((struct dst_entry *)repr->dst); skb_dst_set(skb, (struct dst_entry *)repr->dst); skb->dev = repr->dst->u.port_info.lower_dev; - return dev_queue_xmit(skb); + ret = dev_queue_xmit(skb); + ice_repr_inc_tx_stats(repr, len, ret); + + return ret; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index b4fb74271811..2429727d5562 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -41,6 +41,47 @@ ice_repr_get_phys_port_name(struct net_device *netdev, char *buf, size_t len) return 0; } +/** + * ice_repr_inc_tx_stats - increment Tx statistic by one packet + * @repr: repr to increment stats on + * @len: length of the packet + * @xmit_status: value returned by xmit function + */ +void ice_repr_inc_tx_stats(struct ice_repr *repr, unsigned int len, + int xmit_status) +{ + struct ice_repr_pcpu_stats *stats; + + if (unlikely(xmit_status != NET_XMIT_SUCCESS && + xmit_status != NET_XMIT_CN)) { + this_cpu_inc(repr->stats->tx_drops); + return; + } + + stats = this_cpu_ptr(repr->stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += len; + u64_stats_update_end(&stats->syncp); +} + +/** + * ice_repr_inc_rx_stats - increment Rx statistic by one packet + * @netdev: repr netdev to increment stats on + * @len: length of the packet + */ +void ice_repr_inc_rx_stats(struct net_device *netdev, unsigned int len) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + struct ice_repr_pcpu_stats *stats; + + stats = this_cpu_ptr(repr->stats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += len; + u64_stats_update_end(&stats->syncp); +} + /** * ice_repr_get_stats64 - get VF stats for VFPR use * @netdev: pointer to port representor netdev @@ -76,7 +117,7 @@ ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) * ice_netdev_to_repr - Get port representor for given netdevice * @netdev: pointer to port representor netdev */ -struct ice_repr *ice_netdev_to_repr(struct net_device *netdev) +struct ice_repr *ice_netdev_to_repr(const struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); @@ -139,38 +180,35 @@ static int ice_repr_stop(struct net_device *netdev) * ice_repr_sp_stats64 - get slow path stats for port representor * @dev: network interface device structure * @stats: netlink stats structure - * - * RX/TX stats are being swapped here to be consistent with VF stats. In slow - * path, port representor receives data when the corresponding VF is sending it - * (and vice versa), TX and RX bytes/packets are effectively swapped on port - * representor. */ static int ice_repr_sp_stats64(const struct net_device *dev, struct rtnl_link_stats64 *stats) { - struct ice_netdev_priv *np = netdev_priv(dev); - int vf_id = np->repr->vf->vf_id; - struct ice_tx_ring *tx_ring; - struct ice_rx_ring *rx_ring; - u64 pkts, bytes; - - tx_ring = np->vsi->tx_rings[vf_id]; - ice_fetch_u64_stats_per_ring(&tx_ring->ring_stats->syncp, - tx_ring->ring_stats->stats, - &pkts, &bytes); - stats->rx_packets = pkts; - stats->rx_bytes = bytes; - - rx_ring = np->vsi->rx_rings[vf_id]; - ice_fetch_u64_stats_per_ring(&rx_ring->ring_stats->syncp, - rx_ring->ring_stats->stats, - &pkts, &bytes); - stats->tx_packets = pkts; - stats->tx_bytes = bytes; - stats->tx_dropped = rx_ring->ring_stats->rx_stats.alloc_page_failed + - rx_ring->ring_stats->rx_stats.alloc_buf_failed; - + struct ice_repr *repr = ice_netdev_to_repr(dev); + int i; + + for_each_possible_cpu(i) { + u64 tbytes, tpkts, tdrops, rbytes, rpkts; + struct ice_repr_pcpu_stats *repr_stats; + unsigned int start; + + repr_stats = per_cpu_ptr(repr->stats, i); + do { + start = u64_stats_fetch_begin(&repr_stats->syncp); + tbytes = repr_stats->tx_bytes; + tpkts = repr_stats->tx_packets; + tdrops = repr_stats->tx_drops; + rbytes = repr_stats->rx_bytes; + rpkts = repr_stats->rx_packets; + } while (u64_stats_fetch_retry(&repr_stats->syncp, start)); + + stats->tx_bytes += tbytes; + stats->tx_packets += tpkts; + stats->tx_dropped += tdrops; + stats->rx_bytes += rbytes; + stats->rx_packets += rpkts; + } return 0; } @@ -291,6 +329,7 @@ static void ice_repr_remove_node(struct devlink_port *devlink_port) */ static void ice_repr_rem(struct ice_repr *repr) { + free_percpu(repr->stats); free_netdev(repr->netdev); kfree(repr); } @@ -344,6 +383,12 @@ ice_repr_add(struct ice_pf *pf, struct ice_vsi *src_vsi, const u8 *parent_mac) goto err_alloc; } + repr->stats = netdev_alloc_pcpu_stats(struct ice_repr_pcpu_stats); + if (!repr->stats) { + err = -ENOMEM; + goto err_stats; + } + repr->src_vsi = src_vsi; repr->id = src_vsi->vsi_num; np = netdev_priv(repr->netdev); @@ -353,6 +398,8 @@ ice_repr_add(struct ice_pf *pf, struct ice_vsi *src_vsi, const u8 *parent_mac) return repr; +err_stats: + free_netdev(repr->netdev); err_alloc: kfree(repr); return ERR_PTR(err); diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h index eb8dec1f7de4..cff730b15ca0 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.h +++ b/drivers/net/ethernet/intel/ice/ice_repr.h @@ -6,12 +6,22 @@ #include +struct ice_repr_pcpu_stats { + struct u64_stats_sync syncp; + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; +}; + struct ice_repr { struct ice_vsi *src_vsi; struct ice_vf *vf; struct net_device *netdev; struct metadata_dst *dst; struct ice_esw_br_port *br_port; + struct ice_repr_pcpu_stats __percpu *stats; u32 id; u8 parent_mac[ETH_ALEN]; }; @@ -22,8 +32,12 @@ void ice_repr_rem_vf(struct ice_repr *repr); void ice_repr_start_tx_queues(struct ice_repr *repr); void ice_repr_stop_tx_queues(struct ice_repr *repr); -struct ice_repr *ice_netdev_to_repr(struct net_device *netdev); +struct ice_repr *ice_netdev_to_repr(const struct net_device *netdev); bool ice_is_port_repr_netdev(const struct net_device *netdev); struct ice_repr *ice_repr_get_by_vsi(struct ice_vsi *vsi); + +void ice_repr_inc_tx_stats(struct ice_repr *repr, unsigned int len, + int xmit_status); +void ice_repr_inc_rx_stats(struct net_device *netdev, unsigned int len); #endif diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 676c00e1554c..df072ce767b1 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -240,6 +240,8 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring, struct net_device *netdev = ice_eswitch_get_target(rx_ring, rx_desc); + if (ice_is_port_repr_netdev(netdev)) + ice_repr_inc_rx_stats(netdev, skb->len); skb->protocol = eth_type_trans(skb, netdev); } else { skb->protocol = eth_type_trans(skb, rx_ring->netdev); -- cgit v1.2.3 From ee36b1e93b11b980e0156bd07cbb9866b7d3e29e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 25 Mar 2024 17:06:33 -0500 Subject: net: amd8111e: Drop unused copy of pm_cap The copy of pdev->pm_cap in struct amd8111e_priv is never used. Drop it. Signed-off-by: Bjorn Helgaas Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240325220633.1453180-1-helgaas@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/amd8111e.c | 1 - drivers/net/ethernet/amd/amd8111e.h | 1 - 2 files changed, 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index ea6cfc2095e1..c1b5e9a94308 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1796,7 +1796,6 @@ static int amd8111e_probe_one(struct pci_dev *pdev, lp = netdev_priv(dev); lp->pci_dev = pdev; lp->amd8111e_net_dev = dev; - lp->pm_cap = pdev->pm_cap; spin_lock_init(&lp->lock); diff --git a/drivers/net/ethernet/amd/amd8111e.h b/drivers/net/ethernet/amd/amd8111e.h index 9d570adb295b..305232f5476d 100644 --- a/drivers/net/ethernet/amd/amd8111e.h +++ b/drivers/net/ethernet/amd/amd8111e.h @@ -764,7 +764,6 @@ struct amd8111e_priv{ u32 ext_phy_id; struct amd8111e_link_config link_config; - int pm_cap; struct net_device *next; int mii; -- cgit v1.2.3 From 2c60c4c008d4b05b9b65bf88f784556208029333 Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Mon, 25 Mar 2024 16:34:51 +0100 Subject: ravb: Add support for an optional MDIO mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver used the DT node of the device itself when registering the MDIO bus. While this works, it creates a problem: it forces any MDIO bus properties to also be set on the devices DT node. This mixes the properties of two distinctly different things and is confusing. This change adds support for an optional mdio node to be defined as a child to the device DT node. The child node can then be used to describe MDIO bus properties that the MDIO core can act on when registering the bus. If no mdio child node is found the driver fallback to the old behavior and register the MDIO bus using the device DT node. This change is backward compatible with old bindings in use. Signed-off-by: Niklas Söderlund Reviewed-by: Sergey Shtylyov Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240325153451.2366083-3-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index d1be030c8848..9a397f9b4c0c 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2564,6 +2564,7 @@ static int ravb_mdio_init(struct ravb_private *priv) { struct platform_device *pdev = priv->pdev; struct device *dev = &pdev->dev; + struct device_node *mdio_node; struct phy_device *phydev; struct device_node *pn; int error; @@ -2583,7 +2584,13 @@ static int ravb_mdio_init(struct ravb_private *priv) pdev->name, pdev->id); /* Register MDIO bus */ - error = of_mdiobus_register(priv->mii_bus, dev->of_node); + mdio_node = of_get_child_by_name(dev->of_node, "mdio"); + if (!mdio_node) { + /* backwards compatibility for DT lacking mdio subnode */ + mdio_node = of_node_get(dev->of_node); + } + error = of_mdiobus_register(priv->mii_bus, mdio_node); + of_node_put(mdio_node); if (error) goto out_free_bus; -- cgit v1.2.3 From 3bcbc67be1b7e7f6dc48a5b6687a511e5fd7cf81 Mon Sep 17 00:00:00 2001 From: John Fraker Date: Mon, 25 Mar 2024 15:33:08 -0700 Subject: gve: Add counter adminq_get_ptype_map_cnt to stats report This counter counts the number of times get_ptype_map is executed on the admin queue, and was previously missing from the stats report. Signed-off-by: John Fraker Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240325223308.618671-1-jfraker@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 9aebfb843d9d..dbe05402d40b 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -73,7 +73,7 @@ static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = { "adminq_create_tx_queue_cnt", "adminq_create_rx_queue_cnt", "adminq_destroy_tx_queue_cnt", "adminq_destroy_rx_queue_cnt", "adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt", - "adminq_report_stats_cnt", "adminq_report_link_speed_cnt" + "adminq_report_stats_cnt", "adminq_report_link_speed_cnt", "adminq_get_ptype_map_cnt" }; static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = { @@ -428,6 +428,7 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = priv->adminq_set_driver_parameter_cnt; data[i++] = priv->adminq_report_stats_cnt; data[i++] = priv->adminq_report_link_speed_cnt; + data[i++] = priv->adminq_get_ptype_map_cnt; } static void gve_get_channels(struct net_device *netdev, -- cgit v1.2.3 From 49d665b8535e5ea5927b896086dcb2eb65514341 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 25 Mar 2024 17:49:31 -0500 Subject: qed: Drop useless pci_params.pm_cap qed_init_pci() used pci_params.pm_cap only to cache the pci_dev.pm_cap. Drop the cache and use pci_dev.pm_cap directly. Signed-off-by: Bjorn Helgaas Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240325224931.1462051-1-helgaas@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed.h | 2 -- drivers/net/ethernet/qlogic/qed/qed_main.c | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 1d719726f72b..b7def3b54937 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -662,8 +662,6 @@ struct qed_hwfn { }; struct pci_params { - int pm_cap; - unsigned long mem_start; unsigned long mem_end; unsigned int irq; diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index c278f8893042..408da4312e01 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -323,8 +323,7 @@ static int qed_init_pci(struct qed_dev *cdev, struct pci_dev *pdev) goto err2; } - cdev->pci_params.pm_cap = pci_find_capability(pdev, PCI_CAP_ID_PM); - if (IS_PF(cdev) && !cdev->pci_params.pm_cap) + if (IS_PF(cdev) && !pdev->pm_cap) DP_NOTICE(cdev, "Cannot find power management capability\n"); rc = dma_set_mask_and_coherent(&cdev->pdev->dev, DMA_BIT_MASK(64)); -- cgit v1.2.3 From 6e9b01909a811555ff3326cf80a5847169c57806 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 Mar 2024 21:02:12 -0700 Subject: net: remove gfp_mask from napi_alloc_skb() __napi_alloc_skb() is napi_alloc_skb() with the added flexibility of choosing gfp_mask. This is a NAPI function, so GFP_ATOMIC is implied. The only practical choice the caller has is whether to set __GFP_NOWARN. But that's a false choice, too, allocation failures in atomic context will happen, and printing warnings in logs, effectively for a packet drop, is both too much and very likely non-actionable. This leads me to a conclusion that most uses of napi_alloc_skb() are simply misguided, and should use __GFP_NOWARN in the first place. We also have a "standard" way of reporting allocation failures via the queue stat API (qstats::rx-alloc-fail). The direct motivation for this patch is that one of the drivers used at Meta calls napi_alloc_skb() (so prior to this patch without __GFP_NOWARN), and the resulting OOM warning is the top networking warning in our fleet. Reviewed-by: Alexander Lobakin Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240327040213.3153864-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/mm/page_frags.rst | 2 +- Documentation/translations/zh_CN/mm/page_frags.rst | 2 +- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 4 +--- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 3 +-- drivers/net/ethernet/intel/iavf/iavf_txrx.c | 4 +--- drivers/net/ethernet/intel/ice/ice_txrx.c | 3 +-- drivers/net/ethernet/intel/ice/ice_xsk.c | 3 +-- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 5 ++--- drivers/net/ethernet/intel/igc/igc_main.c | 3 +-- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 3 +-- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 ++--- include/linux/skbuff.h | 8 +------- net/core/skbuff.c | 9 ++++----- 13 files changed, 18 insertions(+), 36 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/Documentation/mm/page_frags.rst b/Documentation/mm/page_frags.rst index a81617e688a8..503ca6cdb804 100644 --- a/Documentation/mm/page_frags.rst +++ b/Documentation/mm/page_frags.rst @@ -25,7 +25,7 @@ to be disabled when executing the fragment allocation. The network stack uses two separate caches per CPU to handle fragment allocation. The netdev_alloc_cache is used by callers making use of the netdev_alloc_frag and __netdev_alloc_skb calls. The napi_alloc_cache is -used by callers of the __napi_alloc_frag and __napi_alloc_skb calls. The +used by callers of the __napi_alloc_frag and napi_alloc_skb calls. The main difference between these two calls is the context in which they may be called. The "netdev" prefixed functions are usable in any context as these functions will disable interrupts, while the "napi" prefixed functions are diff --git a/Documentation/translations/zh_CN/mm/page_frags.rst b/Documentation/translations/zh_CN/mm/page_frags.rst index 20bd3fafdc8c..a5b22486a913 100644 --- a/Documentation/translations/zh_CN/mm/page_frags.rst +++ b/Documentation/translations/zh_CN/mm/page_frags.rst @@ -25,7 +25,7 @@ sk_buff->head使用,或者用于skb_shared_info的 “frags” 部分。 网络堆栈在每个CPU使用两个独立的缓存来处理碎片分配。netdev_alloc_cache被使用 netdev_alloc_frag和__netdev_alloc_skb调用的调用者使用。napi_alloc_cache -被调用__napi_alloc_frag和__napi_alloc_skb的调用者使用。这两个调用的主要区别是 +被调用__napi_alloc_frag和napi_alloc_skb的调用者使用。这两个调用的主要区别是 它们可能被调用的环境。“netdev” 前缀的函数可以在任何上下文中使用,因为这些函数 将禁用中断,而 ”napi“ 前缀的函数只可以在softirq上下文中使用。 diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 0d7177083708..ac2fcc5ac595 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2144,9 +2144,7 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, */ /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - I40E_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, I40E_RX_HDR_SIZE); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 11500003af0d..a85b425794df 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -301,8 +301,7 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, net_prefetch(xdp->data_meta); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); if (unlikely(!skb)) goto out; diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index b71484c87a84..32bb604a1382 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1334,9 +1334,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring, net_prefetch(va); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - IAVF_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, IAVF_RX_HDR_SIZE); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 97d41d6ebf1f..8bb743f78fcb 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1051,8 +1051,7 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) } /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 1857220d27fe..aa81d1162b81 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -555,8 +555,7 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) } net_prefetch(xdp->data_meta); - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 6dd7a66bb897..f940f650cd78 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3005,8 +3005,7 @@ struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, /* prefetch first cache line of first page */ net_prefetch(va); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE, - GFP_ATOMIC); + skb = napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE); if (unlikely(!skb)) { idpf_rx_put_page(rx_buf); @@ -3060,7 +3059,7 @@ static struct sk_buff *idpf_rx_hdr_construct_skb(struct idpf_queue *rxq, struct sk_buff *skb; /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rxq->q_vector->napi, size, GFP_ATOMIC); + skb = napi_alloc_skb(&rxq->q_vector->napi, size); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 35ad40a803cb..0cd923c8ac9b 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2712,8 +2712,7 @@ static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, net_prefetch(xdp->data_meta); - skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&ring->q_vector->napi, totalsize); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index d34d715c59eb..397cb773fabb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -220,8 +220,7 @@ static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring, net_prefetch(xdp->data_meta); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 24cd80490d19..bcdde68a099a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5109,9 +5109,8 @@ static struct sk_buff *stmmac_construct_skb_zc(struct stmmac_channel *ch, unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; - skb = __napi_alloc_skb(&ch->rxtx_napi, - xdp->data_end - xdp->data_hard_start, - GFP_ATOMIC | __GFP_NOWARN); + skb = napi_alloc_skb(&ch->rxtx_napi, + xdp->data_end - xdp->data_hard_start); if (unlikely(!skb)) return NULL; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 517e546a120a..b7f1ecdaec38 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3350,13 +3350,7 @@ static inline void *napi_alloc_frag_align(unsigned int fragsz, return __napi_alloc_frag_align(fragsz, -align); } -struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, - unsigned int length, gfp_t gfp_mask); -static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, - unsigned int length) -{ - return __napi_alloc_skb(napi, length, GFP_ATOMIC); -} +struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int length); void napi_consume_skb(struct sk_buff *skb, int budget); void napi_skb_free_stolen_head(struct sk_buff *skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 17617c29be2d..a1be84be5d35 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -775,10 +775,9 @@ skb_fail: EXPORT_SYMBOL(__netdev_alloc_skb); /** - * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance + * napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance * @napi: napi instance this buffer was allocated for * @len: length to allocate - * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages * * Allocate a new sk_buff for use in NAPI receive. This buffer will * attempt to allocate the head from a special reserved region used @@ -787,9 +786,9 @@ EXPORT_SYMBOL(__netdev_alloc_skb); * * %NULL is returned if there is no free memory. */ -struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, - gfp_t gfp_mask) +struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) { + gfp_t gfp_mask = GFP_ATOMIC | __GFP_NOWARN; struct napi_alloc_cache *nc; struct sk_buff *skb; bool pfmemalloc; @@ -860,7 +859,7 @@ skb_success: skb_fail: return skb; } -EXPORT_SYMBOL(__napi_alloc_skb); +EXPORT_SYMBOL(napi_alloc_skb); void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, int off, int size, unsigned int truesize) -- cgit v1.2.3 From c00d33f1fc7958e6e7f461c994fa025aa2273c13 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 27 Mar 2024 15:22:40 +0100 Subject: idpf: make virtchnl2.h self-contained To ease maintaining of virtchnl2.h, which already is messy enough, make it self-contained by adding missing if_ether.h include due to %ETH_ALEN usage. At the same time, virtchnl2_lan_desc.h is not used anywhere in the file, so move this include to idpf_txrx.h to speed up C preprocessing. Acked-by: Kees Cook Acked-by: Gustavo A. R. Silva Signed-off-by: Alexander Lobakin Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240327142241.1745989-3-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 2 ++ drivers/net/ethernet/intel/idpf/virtchnl2.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index df76493faa75..3d046b81e507 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -8,6 +8,8 @@ #include #include +#include "virtchnl2_lan_desc.h" + #define IDPF_LARGE_MAX_Q 256 #define IDPF_MAX_Q 16 #define IDPF_MIN_Q 2 diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h index 4a3c4454d25a..29419211b3d9 100644 --- a/drivers/net/ethernet/intel/idpf/virtchnl2.h +++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h @@ -4,6 +4,8 @@ #ifndef _VIRTCHNL2_H_ #define _VIRTCHNL2_H_ +#include + /* All opcodes associated with virtchnl2 are prefixed with virtchnl2 or * VIRTCHNL2. Any future opcodes, offloads/capabilities, structures, * and defines must be prefixed with virtchnl2 or VIRTCHNL2 to avoid confusion. @@ -17,8 +19,6 @@ * must remain unchanged over time, so we specify explicit values for all enums. */ -#include "virtchnl2_lan_desc.h" - /* This macro is used to generate compilation errors if a structure * is not exactly the correct length. */ -- cgit v1.2.3 From 93d24acfa05ebe954ec1782bca374de2501a5830 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 27 Mar 2024 15:22:41 +0100 Subject: idpf: sprinkle __counted_by{,_le}() in the virtchnl2 header Both virtchnl2.h and its consumer idpf_virtchnl.c are very error-prone. There are 10 structures with flexible arrays at the end, but 9 of them has flex member counter in Little Endian. Make the code a bit more robust by applying __counted_by_le() to those 9. LE platforms is the main target for this driver, so they would receive additional protection. While we're here, add __counted_by() to virtchnl2_ptype::proto_id, as its counter is `u8` regardless of the Endianness. Compile test on x86_64 (LE) didn't reveal any new issues after applying the attributes. Acked-by: Kees Cook Acked-by: Gustavo A. R. Silva Signed-off-by: Alexander Lobakin Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240327142241.1745989-4-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/virtchnl2.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h index 29419211b3d9..63deb120359c 100644 --- a/drivers/net/ethernet/intel/idpf/virtchnl2.h +++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h @@ -555,7 +555,7 @@ VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_queue_reg_chunk); struct virtchnl2_queue_reg_chunks { __le16 num_chunks; u8 pad[6]; - struct virtchnl2_queue_reg_chunk chunks[]; + struct virtchnl2_queue_reg_chunk chunks[] __counted_by_le(num_chunks); }; VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_queue_reg_chunks); @@ -703,7 +703,7 @@ struct virtchnl2_config_tx_queues { __le32 vport_id; __le16 num_qinfo; u8 pad[10]; - struct virtchnl2_txq_info qinfo[]; + struct virtchnl2_txq_info qinfo[] __counted_by_le(num_qinfo); }; VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_config_tx_queues); @@ -782,7 +782,7 @@ struct virtchnl2_config_rx_queues { __le32 vport_id; __le16 num_qinfo; u8 pad[18]; - struct virtchnl2_rxq_info qinfo[]; + struct virtchnl2_rxq_info qinfo[] __counted_by_le(num_qinfo); }; VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_config_rx_queues); @@ -868,7 +868,7 @@ VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_vector_chunk); struct virtchnl2_vector_chunks { __le16 num_vchunks; u8 pad[14]; - struct virtchnl2_vector_chunk vchunks[]; + struct virtchnl2_vector_chunk vchunks[] __counted_by_le(num_vchunks); }; VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_vector_chunks); @@ -912,7 +912,7 @@ struct virtchnl2_rss_lut { __le16 lut_entries_start; __le16 lut_entries; u8 pad[4]; - __le32 lut[]; + __le32 lut[] __counted_by_le(lut_entries); }; VIRTCHNL2_CHECK_STRUCT_LEN(12, virtchnl2_rss_lut); @@ -977,7 +977,7 @@ struct virtchnl2_ptype { u8 ptype_id_8; u8 proto_id_count; __le16 pad; - __le16 proto_id[]; + __le16 proto_id[] __counted_by(proto_id_count); } __packed __aligned(2); VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype); @@ -1104,7 +1104,7 @@ struct virtchnl2_rss_key { __le32 vport_id; __le16 key_len; u8 pad; - u8 key_flex[]; + u8 key_flex[] __counted_by_le(key_len); } __packed; VIRTCHNL2_CHECK_STRUCT_LEN(7, virtchnl2_rss_key); @@ -1131,7 +1131,7 @@ VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_queue_chunk); struct virtchnl2_queue_chunks { __le16 num_chunks; u8 pad[6]; - struct virtchnl2_queue_chunk chunks[]; + struct virtchnl2_queue_chunk chunks[] __counted_by_le(num_chunks); }; VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_queue_chunks); @@ -1195,7 +1195,7 @@ struct virtchnl2_queue_vector_maps { __le32 vport_id; __le16 num_qv_maps; u8 pad[10]; - struct virtchnl2_queue_vector qv_maps[]; + struct virtchnl2_queue_vector qv_maps[] __counted_by_le(num_qv_maps); }; VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_queue_vector_maps); @@ -1247,7 +1247,7 @@ struct virtchnl2_mac_addr_list { __le32 vport_id; __le16 num_mac_addr; u8 pad[2]; - struct virtchnl2_mac_addr mac_addr_list[]; + struct virtchnl2_mac_addr mac_addr_list[] __counted_by_le(num_mac_addr); }; VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_mac_addr_list); -- cgit v1.2.3 From 10658e99d952769002c11f7884eebb5a0310d161 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 26 Mar 2024 14:32:09 +0100 Subject: net: stmmac: don't rely on lynx_pcs presence to check for a PHY When initializing attached PHYs, there are some cases where we don't expect any PHY to be connected. The logic uses conditions based on various local PCS configuration, but also calls-in phylink_expects_phy() via stmmac_init_phy(), which is enough to ensure we don't try to initialize a PHY when using a Lynx PCS, as long as we have the phy_interface set to a 802.3z mode and are using inband negociation. Drop the lynx check, making the stmmac generic code more pcs_lynx-agnostic. Signed-off-by: Maxime Chevallier [rgantois: commit log] Signed-off-by: Romain Gantois Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240326-rxc_bugfix-v6-3-24a74e5c761f@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bcdde68a099a..9fb8750248a1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3960,8 +3960,7 @@ static int __stmmac_open(struct net_device *dev, if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI && (!priv->hw->xpcs || - xpcs_get_an_mode(priv->hw->xpcs, mode) != DW_AN_C73) && - !priv->hw->lynx_pcs) { + xpcs_get_an_mode(priv->hw->xpcs, mode) != DW_AN_C73)) { ret = stmmac_init_phy(dev); if (ret) { netdev_err(priv->dev, -- cgit v1.2.3 From f7bff228a616a7eb6cba9246e97ec4da543aa53a Mon Sep 17 00:00:00 2001 From: Romain Gantois Date: Tue, 26 Mar 2024 14:32:10 +0100 Subject: net: stmmac: Support a generic PCS field in mac_device_info Global stmmac support for early initialization of PCS devices requires a generic PCS reference that can be passed to phylink_pcs_pre_init(). Currently, the mac_device_info struct contains only one PCS field, which is specific to the Lynx model. As PCS models are hardware-specific, it is more appropriate to have a generic PCS field in the mac_device_info struct. Refactor the lynx_pcs field into a generic phylink_pcs field. Signed-off-by: Romain Gantois Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240326-rxc_bugfix-v6-4-24a74e5c761f@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 8 ++++---- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +---- 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index a6fefe675ef1..f55cf09f0783 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -593,7 +593,7 @@ struct mac_device_info { const struct stmmac_mmc_ops *mmc; const struct stmmac_est_ops *est; struct dw_xpcs *xpcs; - struct phylink_pcs *lynx_pcs; /* Lynx external PCS */ + struct phylink_pcs *phylink_pcs; struct mii_regs mii; /* MII register Addresses */ struct mac_link link; void __iomem *pcsr; /* vpointer to device CSRs */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 68f85e4605cb..12b4a80ea3aa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -479,9 +479,9 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) goto err_dvr_remove; } - stpriv->hw->lynx_pcs = lynx_pcs_create_mdiodev(pcs_bus, 0); - if (IS_ERR(stpriv->hw->lynx_pcs)) { - ret = PTR_ERR(stpriv->hw->lynx_pcs); + stpriv->hw->phylink_pcs = lynx_pcs_create_mdiodev(pcs_bus, 0); + if (IS_ERR(stpriv->hw->phylink_pcs)) { + ret = PTR_ERR(stpriv->hw->phylink_pcs); goto err_dvr_remove; } } @@ -498,7 +498,7 @@ static void socfpga_dwmac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); - struct phylink_pcs *pcs = priv->hw->lynx_pcs; + struct phylink_pcs *pcs = priv->hw->phylink_pcs; stmmac_pltfr_remove(pdev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 9fb8750248a1..1ee06a6e5c22 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -944,10 +944,7 @@ static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config, if (priv->hw->xpcs) return &priv->hw->xpcs->pcs; - if (priv->hw->lynx_pcs) - return priv->hw->lynx_pcs; - - return NULL; + return priv->hw->phylink_pcs; } static void stmmac_mac_config(struct phylink_config *config, unsigned int mode, -- cgit v1.2.3 From 58329b03a5957904fa2b33b3824ed19e7b42c9e9 Mon Sep 17 00:00:00 2001 From: Romain Gantois Date: Tue, 26 Mar 2024 14:32:11 +0100 Subject: net: stmmac: Signal to PHY/PCS drivers to keep RX clock on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a reocurring issue with stmmac controllers where the MAC fails to initialize its hardware if an RX clock signal isn't provided on the MAC/PHY link. This causes issues when PHY or PCS devices either go into suspend while cutting the RX clock or do not bring the clock signal up early enough for the MAC to initialize successfully. Set the mac_requires_rxc flag in the stmmac phylink config so that PHY/PCS drivers know to keep the RX clock up at all times. Reported-by: Clark Wang Link: https://lore.kernel.org/all/20230202081559.3553637-1-xiaoning.wang@nxp.com/ Reported-by: Clément Léger Link: https://lore.kernel.org/linux-arm-kernel/20230116103926.276869-4-clement.leger@bootlin.com/ Co-developed-by: Russell King (Oracle) Signed-off-by: Russell King (Oracle) Signed-off-by: Romain Gantois Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240326-rxc_bugfix-v6-5-24a74e5c761f@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1ee06a6e5c22..fe3498e86de9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1218,6 +1218,9 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) priv->phylink_config.type = PHYLINK_NETDEV; priv->phylink_config.mac_managed_pm = true; + /* Stmmac always requires an RX clock for hardware initialization */ + priv->phylink_config.mac_requires_rxc = true; + mdio_bus_data = priv->plat->mdio_bus_data; if (mdio_bus_data) priv->phylink_config.ovr_an_inband = @@ -3408,6 +3411,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) u32 chan; int ret; + /* Make sure RX clock is enabled */ + if (priv->hw->phylink_pcs) + phylink_pcs_pre_init(priv->phylink, priv->hw->phylink_pcs); + /* DMA initialization and SW reset */ ret = stmmac_init_dma_engine(priv); if (ret < 0) { -- cgit v1.2.3 From 7de3c2218eed77ed4771521459a18bef938f7089 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Mar 2024 15:28:51 -0700 Subject: bnxt_en: Add a timeout parameter to bnxt_hwrm_port_ts_query() The caller can pass this new timeout parameter to the function to specify the firmware timeout value when requesting the TX timestamp from the firmware. This will allow the caller to precisely control the timeout and will be used in the next patch. In this patch, the parameter is 0 which means to use the current default value. Cc: Richard Cochran Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Reviewed-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20240325222902.220712-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 16 ++++++++++++---- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index cc07660330f5..dbfd1b36774c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -109,7 +109,8 @@ static void bnxt_ptp_get_current_time(struct bnxt *bp) spin_unlock_bh(&ptp->ptp_lock); } -static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts) +static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts, + u32 txts_tmo) { struct hwrm_port_ts_query_output *resp; struct hwrm_port_ts_query_input *req; @@ -122,10 +123,15 @@ static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts) req->flags = cpu_to_le32(flags); if ((flags & PORT_TS_QUERY_REQ_FLAGS_PATH) == PORT_TS_QUERY_REQ_FLAGS_PATH_TX) { + u32 tmo_us = txts_tmo * 1000; + req->enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES); req->ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid); req->ptp_hdr_offset = cpu_to_le16(bp->ptp_cfg->tx_hdr_off); - req->ts_req_timeout = cpu_to_le16(BNXT_PTP_QTS_TIMEOUT); + if (!tmo_us) + tmo_us = BNXT_PTP_QTS_TIMEOUT; + tmo_us = min(tmo_us, BNXT_PTP_QTS_MAX_TMO_US); + req->ts_req_timeout = cpu_to_le16(txts_tmo); } resp = hwrm_req_hold(bp, req); @@ -675,7 +681,8 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) u64 ts = 0, ns = 0; int rc; - rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_PATH_TX, &ts); + rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_PATH_TX, &ts, + 0); if (!rc) { memset(×tamp, 0, sizeof(timestamp)); spin_lock_bh(&ptp->ptp_lock); @@ -891,7 +898,8 @@ int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg) if (rc) return rc; } else { - rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, &ns); + rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, + &ns, 0); if (rc) return rc; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index fce8dc39a7d0..04886d5f22ad 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -23,6 +23,7 @@ #define BNXT_HI_TIMER_MASK 0xffff00000000UL #define BNXT_PTP_QTS_TIMEOUT 1000 +#define BNXT_PTP_QTS_MAX_TMO_US 65535 #define BNXT_PTP_QTS_TX_ENABLES (PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID | \ PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT | \ PORT_TS_QUERY_REQ_ENABLES_PTP_HDR_OFFSET) -- cgit v1.2.3 From 604041643a858bc1d3926e0a32ebc482e8beaee2 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 25 Mar 2024 15:28:52 -0700 Subject: bnxt_en: Retry PTP TX timestamp from FW for 1 second Use a new default 1 second timeout value instead of the existing 1 msec value. The driver will keep track of the remaining time before timeout and will pass this value to bnxt_hwrm_port_ts_query(). The firmware supports timeout values up to 65535 usecs. If the timeout value passed to bnxt_hwrm_port_ts_query() is less than the FW max value, we will use that value to precisely control the specified timeout. If it is larger than the FW max value, we will use the FW max value and any additional retry to reach the desired timeout will be done in the context of bnxt_ptp_ts_aux_eork(). Link: https://lore.kernel.org/netdev/20240229070202.107488-2-michael.chan@broadcom.com/ Cc: Richard Cochran Reviewed-by: Andy Gospodarek Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Reviewed-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20240325222902.220712-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 16 +++++++++++++++- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 4 ++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index dbfd1b36774c..345aac4484ee 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -678,11 +678,17 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; struct skb_shared_hwtstamps timestamp; + unsigned long now = jiffies; u64 ts = 0, ns = 0; + u32 tmo = 0; int rc; + if (!ptp->txts_pending) + ptp->abs_txts_tmo = now + msecs_to_jiffies(ptp->txts_tmo); + if (!time_after_eq(now, ptp->abs_txts_tmo)) + tmo = jiffies_to_msecs(ptp->abs_txts_tmo - now); rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_PATH_TX, &ts, - 0); + tmo); if (!rc) { memset(×tamp, 0, sizeof(timestamp)); spin_lock_bh(&ptp->ptp_lock); @@ -691,6 +697,10 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) timestamp.hwtstamp = ns_to_ktime(ns); skb_tstamp_tx(ptp->tx_skb, ×tamp); } else { + if (!time_after_eq(jiffies, ptp->abs_txts_tmo)) { + ptp->txts_pending = true; + return; + } netdev_warn_once(bp->dev, "TS query for TX timer failed rc = %x\n", rc); } @@ -698,6 +708,7 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) dev_kfree_skb_any(ptp->tx_skb); ptp->tx_skb = NULL; atomic_inc(&ptp->tx_avail); + ptp->txts_pending = false; } static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) @@ -721,6 +732,8 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) spin_unlock_bh(&ptp->ptp_lock); ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD; } + if (ptp->txts_pending) + return 0; return HZ; } @@ -973,6 +986,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) spin_unlock_bh(&ptp->ptp_lock); ptp_schedule_worker(ptp->ptp_clock, 0); } + ptp->txts_tmo = BNXT_PTP_DFLT_TX_TMO; return 0; out: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 04886d5f22ad..6a2bba3f9e2d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -22,6 +22,7 @@ #define BNXT_LO_TIMER_MASK 0x0000ffffffffUL #define BNXT_HI_TIMER_MASK 0xffff00000000UL +#define BNXT_PTP_DFLT_TX_TMO 1000 /* ms */ #define BNXT_PTP_QTS_TIMEOUT 1000 #define BNXT_PTP_QTS_MAX_TMO_US 65535 #define BNXT_PTP_QTS_TX_ENABLES (PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID | \ @@ -116,11 +117,14 @@ struct bnxt_ptp_cfg { BNXT_PTP_MSG_PDELAY_REQ | \ BNXT_PTP_MSG_PDELAY_RESP) u8 tx_tstamp_en:1; + u8 txts_pending:1; int rx_filter; u32 tstamp_filters; u32 refclk_regs[2]; u32 refclk_mapped_regs[2]; + u32 txts_tmo; + unsigned long abs_txts_tmo; }; #if BITS_PER_LONG == 32 -- cgit v1.2.3 From 1dcd70ba2437b52d18f3ad4cc65931a74bdcb353 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 25 Mar 2024 15:28:53 -0700 Subject: bnxt_en: Add helper function bnxt_hwrm_vnic_rss_cfg_p5() This is a pure refactoring patch. The new function bnxt_hwrm_vnic_set_rss_p5() will set up the P5_PLUS specific RSS ring table and then call bnxt_hwrm_vnic_cfg() to setup the vnic for proper RSS operations. This new function will be used later for additional RSS contexts. Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240325222902.220712-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 ++++++++++++++++++++----------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 21 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 493b724848c8..dd2c5033af02 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9825,6 +9825,23 @@ vnic_setup_err: return rc; } +int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, u16 vnic_id) +{ + int rc; + + rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic_id, true); + if (rc) { + netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", + vnic_id, rc); + return rc; + } + rc = bnxt_hwrm_vnic_cfg(bp, vnic_id); + if (rc) + netdev_err(bp->dev, "hwrm vnic %d cfg failure rc: %x\n", + vnic_id, rc); + return rc; +} + static int __bnxt_setup_vnic_p5(struct bnxt *bp, u16 vnic_id) { int rc, i, nr_ctxs; @@ -9842,18 +9859,10 @@ static int __bnxt_setup_vnic_p5(struct bnxt *bp, u16 vnic_id) if (i < nr_ctxs) return -ENOMEM; - rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic_id, true); - if (rc) { - netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", - vnic_id, rc); - return rc; - } - rc = bnxt_hwrm_vnic_cfg(bp, vnic_id); - if (rc) { - netdev_err(bp->dev, "hwrm vnic %d cfg failure rc: %x\n", - vnic_id, rc); + rc = bnxt_hwrm_vnic_rss_cfg_p5(bp, vnic_id); + if (rc) return rc; - } + if (bp->flags & BNXT_FLAG_AGG_RINGS) { rc = bnxt_hwrm_vnic_set_hds(bp, vnic_id); if (rc) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index dd849e715c9b..fbb53308cb81 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2721,6 +2721,7 @@ int bnxt_hwrm_free_wol_fltr(struct bnxt *bp); int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all); int bnxt_hwrm_func_qcaps(struct bnxt *bp); int bnxt_hwrm_fw_set_time(struct bnxt *); +int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, u16 vnic_id); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); -- cgit v1.2.3 From a4c11166a69619f1fd0118a0790c46872836240a Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 25 Mar 2024 15:28:54 -0700 Subject: bnxt_en: Refactor VNIC alloc and cfg functions The current VNIC structures are stored in an array bp->vnic_info[]. The index of the array (vnic_id) is passed to all the functions that need to reference the VNIC. This patch changes the scheme to pass the VNIC pointer instead of the vnic index. Subsequent patches will create additional VNICs that will not be stored in the bp->vnic_info[] array. Using the VNIC pointer will work for all the VNICs. Reviewed-by: Kalesh AP Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240325222902.220712-5-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 140 +++++++++++++------------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 5 +- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 2 +- 3 files changed, 75 insertions(+), 72 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index dd2c5033af02..3f5d7c81a281 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4241,6 +4241,7 @@ static void bnxt_init_vnics(struct bnxt *bp) int j; vnic->fw_vnic_id = INVALID_HW_RING_ID; + vnic->vnic_id = i; for (j = 0; j < BNXT_MAX_CTX_PER_VNIC; j++) vnic->fw_rss_cos_lb_ctx[j] = INVALID_HW_RING_ID; @@ -5938,9 +5939,9 @@ static void bnxt_hwrm_vnic_update_tunl_tpa(struct bnxt *bp, req->tnl_tpa_en_bitmap = cpu_to_le32(tunl_tpa_bmap); } -static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags) +static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic, + u32 tpa_flags) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; u16 max_aggs = VNIC_TPA_CFG_REQ_MAX_AGGS_MAX; struct hwrm_vnic_tpa_cfg_input *req; int rc; @@ -6154,9 +6155,9 @@ __bnxt_hwrm_vnic_set_rss(struct bnxt *bp, struct hwrm_vnic_rss_cfg_input *req, req->hash_key_tbl_addr = cpu_to_le64(vnic->rss_hash_key_dma_addr); } -static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) +static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, struct bnxt_vnic_info *vnic, + bool set_rss) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_rss_cfg_input *req; int rc; @@ -6174,9 +6175,9 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) return hwrm_req_send(bp, req); } -static int bnxt_hwrm_vnic_set_rss_p5(struct bnxt *bp, u16 vnic_id, bool set_rss) +static int bnxt_hwrm_vnic_set_rss_p5(struct bnxt *bp, + struct bnxt_vnic_info *vnic, bool set_rss) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_rss_cfg_input *req; dma_addr_t ring_tbl_map; u32 i, nr_ctxs; @@ -6229,9 +6230,8 @@ static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp) hwrm_req_drop(bp, req); } -static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id) +static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_plcmodes_cfg_input *req; int rc; @@ -6256,7 +6256,8 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id) return hwrm_req_send(bp, req); } -static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, u16 vnic_id, +static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, + struct bnxt_vnic_info *vnic, u16 ctx_idx) { struct hwrm_vnic_rss_cos_lb_ctx_free_input *req; @@ -6265,10 +6266,10 @@ static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, u16 vnic_id, return; req->rss_cos_lb_ctx_id = - cpu_to_le16(bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx]); + cpu_to_le16(vnic->fw_rss_cos_lb_ctx[ctx_idx]); hwrm_req_send(bp, req); - bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] = INVALID_HW_RING_ID; + vnic->fw_rss_cos_lb_ctx[ctx_idx] = INVALID_HW_RING_ID; } static void bnxt_hwrm_vnic_ctx_free(struct bnxt *bp) @@ -6280,13 +6281,14 @@ static void bnxt_hwrm_vnic_ctx_free(struct bnxt *bp) for (j = 0; j < BNXT_MAX_CTX_PER_VNIC; j++) { if (vnic->fw_rss_cos_lb_ctx[j] != INVALID_HW_RING_ID) - bnxt_hwrm_vnic_ctx_free_one(bp, i, j); + bnxt_hwrm_vnic_ctx_free_one(bp, vnic, j); } } bp->rsscos_nr_ctxs = 0; } -static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id, u16 ctx_idx) +static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, + struct bnxt_vnic_info *vnic, u16 ctx_idx) { struct hwrm_vnic_rss_cos_lb_ctx_alloc_output *resp; struct hwrm_vnic_rss_cos_lb_ctx_alloc_input *req; @@ -6299,7 +6301,7 @@ static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id, u16 ctx_idx) resp = hwrm_req_hold(bp, req); rc = hwrm_req_send(bp, req); if (!rc) - bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] = + vnic->fw_rss_cos_lb_ctx[ctx_idx] = le16_to_cpu(resp->rss_cos_lb_ctx_id); hwrm_req_drop(bp, req); @@ -6313,10 +6315,9 @@ static u32 bnxt_get_roce_vnic_mode(struct bnxt *bp) return VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE; } -int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id) +int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic) { struct bnxt_vnic_info *vnic0 = &bp->vnic_info[BNXT_VNIC_DEFAULT]; - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_cfg_input *req; unsigned int ring = 0, grp_idx; u16 def_vlan = 0; @@ -6364,8 +6365,8 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id) if (vnic->flags & BNXT_VNIC_RSS_FLAG) ring = 0; else if (vnic->flags & BNXT_VNIC_RFS_FLAG) - ring = vnic_id - 1; - else if ((vnic_id == 1) && BNXT_CHIP_TYPE_NITRO_A0(bp)) + ring = vnic->vnic_id - 1; + else if ((vnic->vnic_id == 1) && BNXT_CHIP_TYPE_NITRO_A0(bp)) ring = bp->rx_nr_rings - 1; grp_idx = bp->rx_ring[ring].bnapi->index; @@ -6381,25 +6382,25 @@ vnic_mru: #endif if ((bp->flags & BNXT_FLAG_STRIP_VLAN) || def_vlan) req->flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE); - if (!vnic_id && bnxt_ulp_registered(bp->edev)) + if (vnic->vnic_id == BNXT_VNIC_DEFAULT && bnxt_ulp_registered(bp->edev)) req->flags |= cpu_to_le32(bnxt_get_roce_vnic_mode(bp)); return hwrm_req_send(bp, req); } -static void bnxt_hwrm_vnic_free_one(struct bnxt *bp, u16 vnic_id) +static void bnxt_hwrm_vnic_free_one(struct bnxt *bp, + struct bnxt_vnic_info *vnic) { - if (bp->vnic_info[vnic_id].fw_vnic_id != INVALID_HW_RING_ID) { + if (vnic->fw_vnic_id != INVALID_HW_RING_ID) { struct hwrm_vnic_free_input *req; if (hwrm_req_init(bp, req, HWRM_VNIC_FREE)) return; - req->vnic_id = - cpu_to_le32(bp->vnic_info[vnic_id].fw_vnic_id); + req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); hwrm_req_send(bp, req); - bp->vnic_info[vnic_id].fw_vnic_id = INVALID_HW_RING_ID; + vnic->fw_vnic_id = INVALID_HW_RING_ID; } } @@ -6408,15 +6409,14 @@ static void bnxt_hwrm_vnic_free(struct bnxt *bp) u16 i; for (i = 0; i < bp->nr_vnics; i++) - bnxt_hwrm_vnic_free_one(bp, i); + bnxt_hwrm_vnic_free_one(bp, &bp->vnic_info[i]); } -static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id, +static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic, unsigned int start_rx_ring_idx, unsigned int nr_rings) { unsigned int i, j, grp_idx, end_idx = start_rx_ring_idx + nr_rings; - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_alloc_output *resp; struct hwrm_vnic_alloc_input *req; int rc; @@ -6442,7 +6442,7 @@ static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id, vnic_no_ring_grps: for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++) vnic->fw_rss_cos_lb_ctx[i] = INVALID_HW_RING_ID; - if (vnic_id == BNXT_VNIC_DEFAULT) + if (vnic->vnic_id == BNXT_VNIC_DEFAULT) req->flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_DEFAULT); resp = hwrm_req_hold(bp, req); @@ -9676,7 +9676,7 @@ static int bnxt_set_tpa(struct bnxt *bp, bool set_tpa) else if (BNXT_NO_FW_ACCESS(bp)) return 0; for (i = 0; i < bp->nr_vnics; i++) { - rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags); + rc = bnxt_hwrm_vnic_set_tpa(bp, &bp->vnic_info[i], tpa_flags); if (rc) { netdev_err(bp->dev, "hwrm vnic set tpa failure rc for vnic %d: %x\n", i, rc); @@ -9691,7 +9691,7 @@ static void bnxt_hwrm_clear_vnic_rss(struct bnxt *bp) int i; for (i = 0; i < bp->nr_vnics; i++) - bnxt_hwrm_vnic_set_rss(bp, i, false); + bnxt_hwrm_vnic_set_rss(bp, &bp->vnic_info[i], false); } static void bnxt_clear_vnic(struct bnxt *bp) @@ -9769,28 +9769,27 @@ static int bnxt_hwrm_set_cache_line_size(struct bnxt *bp, int size) return hwrm_req_send(bp, req); } -static int __bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) +static int __bnxt_setup_vnic(struct bnxt *bp, struct bnxt_vnic_info *vnic) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; int rc; if (vnic->flags & BNXT_VNIC_RFS_NEW_RSS_FLAG) goto skip_rss_ctx; /* allocate context for vnic */ - rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, 0); + rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, 0); if (rc) { netdev_err(bp->dev, "hwrm vnic %d alloc failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); goto vnic_setup_err; } bp->rsscos_nr_ctxs++; if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { - rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, 1); + rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, 1); if (rc) { netdev_err(bp->dev, "hwrm vnic %d cos ctx alloc failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); goto vnic_setup_err; } bp->rsscos_nr_ctxs++; @@ -9798,26 +9797,26 @@ static int __bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) skip_rss_ctx: /* configure default vnic, ring grp */ - rc = bnxt_hwrm_vnic_cfg(bp, vnic_id); + rc = bnxt_hwrm_vnic_cfg(bp, vnic); if (rc) { netdev_err(bp->dev, "hwrm vnic %d cfg failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); goto vnic_setup_err; } /* Enable RSS hashing on vnic */ - rc = bnxt_hwrm_vnic_set_rss(bp, vnic_id, true); + rc = bnxt_hwrm_vnic_set_rss(bp, vnic, true); if (rc) { netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); goto vnic_setup_err; } if (bp->flags & BNXT_FLAG_AGG_RINGS) { - rc = bnxt_hwrm_vnic_set_hds(bp, vnic_id); + rc = bnxt_hwrm_vnic_set_hds(bp, vnic); if (rc) { netdev_err(bp->dev, "hwrm vnic %d set hds failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); } } @@ -9825,33 +9824,33 @@ vnic_setup_err: return rc; } -int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, u16 vnic_id) +int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic) { int rc; - rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic_id, true); + rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); if (rc) { netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", - vnic_id, rc); + vnic->vnic_id, rc); return rc; } - rc = bnxt_hwrm_vnic_cfg(bp, vnic_id); + rc = bnxt_hwrm_vnic_cfg(bp, vnic); if (rc) netdev_err(bp->dev, "hwrm vnic %d cfg failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); return rc; } -static int __bnxt_setup_vnic_p5(struct bnxt *bp, u16 vnic_id) +static int __bnxt_setup_vnic_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic) { int rc, i, nr_ctxs; nr_ctxs = bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings); for (i = 0; i < nr_ctxs; i++) { - rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, i); + rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, i); if (rc) { netdev_err(bp->dev, "hwrm vnic %d ctx %d alloc failure rc: %x\n", - vnic_id, i, rc); + vnic->vnic_id, i, rc); break; } bp->rsscos_nr_ctxs++; @@ -9859,55 +9858,57 @@ static int __bnxt_setup_vnic_p5(struct bnxt *bp, u16 vnic_id) if (i < nr_ctxs) return -ENOMEM; - rc = bnxt_hwrm_vnic_rss_cfg_p5(bp, vnic_id); + rc = bnxt_hwrm_vnic_rss_cfg_p5(bp, vnic); if (rc) return rc; if (bp->flags & BNXT_FLAG_AGG_RINGS) { - rc = bnxt_hwrm_vnic_set_hds(bp, vnic_id); + rc = bnxt_hwrm_vnic_set_hds(bp, vnic); if (rc) { netdev_err(bp->dev, "hwrm vnic %d set hds failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); } } return rc; } -static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) +static int bnxt_setup_vnic(struct bnxt *bp, struct bnxt_vnic_info *vnic) { if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) - return __bnxt_setup_vnic_p5(bp, vnic_id); + return __bnxt_setup_vnic_p5(bp, vnic); else - return __bnxt_setup_vnic(bp, vnic_id); + return __bnxt_setup_vnic(bp, vnic); } -static int bnxt_alloc_and_setup_vnic(struct bnxt *bp, u16 vnic_id, +static int bnxt_alloc_and_setup_vnic(struct bnxt *bp, + struct bnxt_vnic_info *vnic, u16 start_rx_ring_idx, int rx_rings) { int rc; - rc = bnxt_hwrm_vnic_alloc(bp, vnic_id, start_rx_ring_idx, rx_rings); + rc = bnxt_hwrm_vnic_alloc(bp, vnic, start_rx_ring_idx, rx_rings); if (rc) { netdev_err(bp->dev, "hwrm vnic %d alloc failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); return rc; } - return bnxt_setup_vnic(bp, vnic_id); + return bnxt_setup_vnic(bp, vnic); } static int bnxt_alloc_rfs_vnics(struct bnxt *bp) { + struct bnxt_vnic_info *vnic; int i, rc = 0; - if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) - return bnxt_alloc_and_setup_vnic(bp, BNXT_VNIC_NTUPLE, 0, - bp->rx_nr_rings); + if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) { + vnic = &bp->vnic_info[BNXT_VNIC_NTUPLE]; + return bnxt_alloc_and_setup_vnic(bp, vnic, 0, bp->rx_nr_rings); + } if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) return 0; for (i = 0; i < bp->rx_nr_rings; i++) { - struct bnxt_vnic_info *vnic; u16 vnic_id = i + 1; u16 ring_id = i; @@ -9918,7 +9919,7 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) vnic->flags |= BNXT_VNIC_RFS_FLAG; if (bp->rss_cap & BNXT_RSS_CAP_NEW_RSS_CAP) vnic->flags |= BNXT_VNIC_RFS_NEW_RSS_FLAG; - if (bnxt_alloc_and_setup_vnic(bp, vnic_id, ring_id, 1)) + if (bnxt_alloc_and_setup_vnic(bp, &bp->vnic_info[vnic_id], ring_id, 1)) break; } return rc; @@ -9936,16 +9937,17 @@ static bool bnxt_promisc_ok(struct bnxt *bp) static int bnxt_setup_nitroa0_vnic(struct bnxt *bp) { + struct bnxt_vnic_info *vnic = &bp->vnic_info[1]; unsigned int rc = 0; - rc = bnxt_hwrm_vnic_alloc(bp, 1, bp->rx_nr_rings - 1, 1); + rc = bnxt_hwrm_vnic_alloc(bp, vnic, bp->rx_nr_rings - 1, 1); if (rc) { netdev_err(bp->dev, "Cannot allocate special vnic for NS2 A0: %x\n", rc); return rc; } - rc = bnxt_hwrm_vnic_cfg(bp, 1); + rc = bnxt_hwrm_vnic_cfg(bp, vnic); if (rc) { netdev_err(bp->dev, "Cannot allocate special vnic for NS2 A0: %x\n", rc); @@ -9988,7 +9990,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) rx_nr_rings--; /* default vnic 0 */ - rc = bnxt_hwrm_vnic_alloc(bp, BNXT_VNIC_DEFAULT, 0, rx_nr_rings); + rc = bnxt_hwrm_vnic_alloc(bp, vnic, 0, rx_nr_rings); if (rc) { netdev_err(bp->dev, "hwrm vnic alloc failure rc: %x\n", rc); goto err_out; @@ -9997,7 +9999,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) if (BNXT_VF(bp)) bnxt_hwrm_func_qcfg(bp); - rc = bnxt_setup_vnic(bp, BNXT_VNIC_DEFAULT); + rc = bnxt_setup_vnic(bp, vnic); if (rc) goto err_out; if (bp->rss_cap & BNXT_RSS_CAP_RSS_HASH_TYPE_DELTA) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index fbb53308cb81..81460a96c0dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1256,6 +1256,7 @@ struct bnxt_vnic_info { #define BNXT_VNIC_UCAST_FLAG 8 #define BNXT_VNIC_RFS_NEW_RSS_FLAG 0x10 #define BNXT_VNIC_NTUPLE_FLAG 0x20 + u32 vnic_id; }; struct bnxt_hw_rings { @@ -2695,7 +2696,7 @@ int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp, struct bnxt_ntuple_filter *fltr); void bnxt_fill_ipv6_mask(__be32 mask[4]); int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings); -int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id); +int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic); int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings); int bnxt_nq_rings_in_use(struct bnxt *bp); int bnxt_hwrm_set_coal(struct bnxt *); @@ -2721,7 +2722,7 @@ int bnxt_hwrm_free_wol_fltr(struct bnxt *bp); int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all); int bnxt_hwrm_func_qcaps(struct bnxt *bp); int bnxt_hwrm_fw_set_time(struct bnxt *); -int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, u16 vnic_id); +int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 93f9bd55020f..86dcd2c76587 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -71,7 +71,7 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, rcu_assign_pointer(ulp->ulp_ops, ulp_ops); if (test_bit(BNXT_STATE_OPEN, &bp->state)) - bnxt_hwrm_vnic_cfg(bp, 0); + bnxt_hwrm_vnic_cfg(bp, &bp->vnic_info[BNXT_VNIC_DEFAULT]); bnxt_fill_msix_vecs(bp, bp->edev->msix_entries); edev->flags |= BNXT_EN_FLAG_MSIX_REQUESTED; -- cgit v1.2.3 From fea41bd766342a734e0562945254b65c22c6c3b4 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 25 Mar 2024 15:28:55 -0700 Subject: bnxt_en: Introduce rss ctx structure, alloc/free functions Add struct bnxt_rss_ctx, related storage lists, required defines, and its alloc/free functions. Later patches will use them in order to support multiple RSS contexts. Reviewed-by: Kalesh AP Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240325222902.220712-6-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 54 +++++++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 24 ++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 + 3 files changed, 80 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3f5d7c81a281..0ede267904ad 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9925,6 +9925,53 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) return rc; } +void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, + bool all) +{ + if (!all) + return; + + list_del(&rss_ctx->list); + bp->num_rss_ctx--; + clear_bit(rss_ctx->index, bp->rss_ctx_bmap); + kfree(rss_ctx); +} + +struct bnxt_rss_ctx *bnxt_alloc_rss_ctx(struct bnxt *bp) +{ + struct bnxt_rss_ctx *rss_ctx = NULL; + + rss_ctx = kzalloc(sizeof(*rss_ctx), GFP_KERNEL); + if (rss_ctx) { + rss_ctx->vnic.rss_ctx = rss_ctx; + list_add_tail(&rss_ctx->list, &bp->rss_ctx_list); + bp->num_rss_ctx++; + } + return rss_ctx; +} + +void bnxt_clear_rss_ctxs(struct bnxt *bp, bool all) +{ + struct bnxt_rss_ctx *rss_ctx, *tmp; + + list_for_each_entry_safe(rss_ctx, tmp, &bp->rss_ctx_list, list) + bnxt_del_one_rss_ctx(bp, rss_ctx, all); + + if (all) + bitmap_free(bp->rss_ctx_bmap); +} + +static void bnxt_init_multi_rss_ctx(struct bnxt *bp) +{ + bp->rss_ctx_bmap = bitmap_zalloc(BNXT_RSS_CTX_BMAP_LEN, GFP_KERNEL); + if (bp->rss_ctx_bmap) { + /* burn index 0 since we cannot have context 0 */ + __set_bit(0, bp->rss_ctx_bmap); + INIT_LIST_HEAD(&bp->rss_ctx_list); + bp->rss_cap |= BNXT_RSS_CAP_MULTI_RSS_CTX; + } +} + /* Allow PF, trusted VFs and VFs with default VLAN to be in promiscuous mode */ static bool bnxt_promisc_ok(struct bnxt *bp) { @@ -14612,6 +14659,8 @@ static void bnxt_remove_one(struct pci_dev *pdev) unregister_netdev(dev); bnxt_free_l2_filters(bp, true); bnxt_free_ntp_fltrs(bp, true); + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_clear_rss_ctxs(bp, true); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); /* Flush any pending tasks */ cancel_work_sync(&bp->sp_task); @@ -15223,6 +15272,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_LIST_HEAD(&bp->usr_fltr_list); + if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) + bnxt_init_multi_rss_ctx(bp); + rc = register_netdev(dev); if (rc) goto init_err_cleanup; @@ -15243,6 +15295,8 @@ init_err_dl: bnxt_clear_int_mode(bp); init_err_pci_clean: + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_clear_rss_ctxs(bp, true); bnxt_hwrm_func_drv_unrgtr(bp); bnxt_free_hwrm_resources(bp); bnxt_hwmon_uninit(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 81460a96c0dd..4d3104c26cfa 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1256,9 +1256,21 @@ struct bnxt_vnic_info { #define BNXT_VNIC_UCAST_FLAG 8 #define BNXT_VNIC_RFS_NEW_RSS_FLAG 0x10 #define BNXT_VNIC_NTUPLE_FLAG 0x20 +#define BNXT_VNIC_RSSCTX_FLAG 0x40 + struct bnxt_rss_ctx *rss_ctx; u32 vnic_id; }; +struct bnxt_rss_ctx { + struct list_head list; + struct bnxt_vnic_info vnic; + u16 *rss_indir_tbl; + u8 index; +}; + +#define BNXT_MAX_ETH_RSS_CTX 32 +#define BNXT_RSS_CTX_BMAP_LEN (BNXT_MAX_ETH_RSS_CTX + 1) + struct bnxt_hw_rings { int tx; int rx; @@ -2228,6 +2240,9 @@ struct bnxt { /* grp_info indexed by completion ring index */ struct bnxt_ring_grp_info *grp_info; struct bnxt_vnic_info *vnic_info; + struct list_head rss_ctx_list; + unsigned long *rss_ctx_bmap; + u32 num_rss_ctx; int nr_vnics; u16 *rss_indir_tbl; u16 rss_indir_tbl_entries; @@ -2242,6 +2257,7 @@ struct bnxt { #define BNXT_RSS_CAP_AH_V6_RSS_CAP BIT(5) #define BNXT_RSS_CAP_ESP_V4_RSS_CAP BIT(6) #define BNXT_RSS_CAP_ESP_V6_RSS_CAP BIT(7) +#define BNXT_RSS_CAP_MULTI_RSS_CTX BIT(8) u8 rss_hash_key[HW_HASH_KEY_SIZE]; u8 rss_hash_key_valid:1; @@ -2341,6 +2357,10 @@ struct bnxt { #define BNXT_SUPPORTS_NTUPLE_VNIC(bp) \ (BNXT_PF(bp) && ((bp)->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V3)) +#define BNXT_SUPPORTS_MULTI_RSS_CTX(bp) \ + (BNXT_PF(bp) && BNXT_SUPPORTS_NTUPLE_VNIC(bp) && \ + ((bp)->rss_cap & BNXT_RSS_CAP_MULTI_RSS_CTX)) + u32 hwrm_spec_code; u16 hwrm_cmd_seq; u16 hwrm_cmd_kong_seq; @@ -2723,6 +2743,10 @@ int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all); int bnxt_hwrm_func_qcaps(struct bnxt *bp); int bnxt_hwrm_fw_set_time(struct bnxt *); int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); +void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, + bool all); +struct bnxt_rss_ctx *bnxt_alloc_rss_ctx(struct bnxt *bp); +void bnxt_clear_rss_ctxs(struct bnxt *bp, bool all); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 1d240a27455a..771833b1900d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -969,6 +969,8 @@ static int bnxt_set_channels(struct net_device *dev, } bnxt_clear_usr_fltrs(bp, true); + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_clear_rss_ctxs(bp, false); if (netif_running(dev)) { if (BNXT_PF(bp)) { /* TODO CHIMP_FW: Send message to all VF's -- cgit v1.2.3 From ecb342bb6098fa13f2719f8baaab4475e404f784 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 25 Mar 2024 15:28:56 -0700 Subject: bnxt_en: Refactor RSS indir alloc/set functions We will need to dynamically allocate and change indirection tables for additional RSS contexts. Add the rss_ctx pointer parameter to bnxt_alloc_rss_indir_tbl() and bnxt_set_dflt_rss_indir_tbl(). Existing usage will always pass rss_ctx as NULL which means the default RSS context. When supporting additional RSS contexts in subsequent patches, we'll pass the valid rss_ctx to these 2 functions. Reviewed-by: Andy Gospodarek Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240325222902.220712-7-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 +++++++++++++++++++++---------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 ++ 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0ede267904ad..80ccb5a54dae 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6026,9 +6026,10 @@ static u16 bnxt_cp_ring_for_tx(struct bnxt *bp, struct bnxt_tx_ring_info *txr) return bnxt_cp_ring_from_grp(bp, &txr->tx_ring_struct); } -static int bnxt_alloc_rss_indir_tbl(struct bnxt *bp) +int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx) { int entries; + u16 *tbl; if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) entries = BNXT_MAX_RSS_TABLE_ENTRIES_P5; @@ -6036,16 +6037,22 @@ static int bnxt_alloc_rss_indir_tbl(struct bnxt *bp) entries = HW_HASH_INDEX_SIZE; bp->rss_indir_tbl_entries = entries; - bp->rss_indir_tbl = kmalloc_array(entries, sizeof(*bp->rss_indir_tbl), - GFP_KERNEL); - if (!bp->rss_indir_tbl) + tbl = kmalloc_array(entries, sizeof(*bp->rss_indir_tbl), GFP_KERNEL); + if (!tbl) return -ENOMEM; + + if (rss_ctx) + rss_ctx->rss_indir_tbl = tbl; + else + bp->rss_indir_tbl = tbl; + return 0; } -static void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp) +void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx) { u16 max_rings, max_entries, pad, i; + u16 *rss_indir_tbl; if (!bp->rx_nr_rings) return; @@ -6056,13 +6063,17 @@ static void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp) max_rings = bp->rx_nr_rings; max_entries = bnxt_get_rxfh_indir_size(bp->dev); + if (rss_ctx) + rss_indir_tbl = &rss_ctx->rss_indir_tbl[0]; + else + rss_indir_tbl = &bp->rss_indir_tbl[0]; for (i = 0; i < max_entries; i++) - bp->rss_indir_tbl[i] = ethtool_rxfh_indir_default(i, max_rings); + rss_indir_tbl[i] = ethtool_rxfh_indir_default(i, max_rings); pad = bp->rss_indir_tbl_entries - max_entries; if (pad) - memset(&bp->rss_indir_tbl[i], 0, pad * sizeof(u16)); + memset(&rss_indir_tbl[i], 0, pad * sizeof(u16)); } static u16 bnxt_get_max_rss_ring(struct bnxt *bp) @@ -7341,7 +7352,7 @@ static void bnxt_check_rss_tbl_no_rmgr(struct bnxt *bp) if (hw_resc->resv_rx_rings != bp->rx_nr_rings) { hw_resc->resv_rx_rings = bp->rx_nr_rings; if (!netif_is_rxfh_configured(bp->dev)) - bnxt_set_dflt_rss_indir_tbl(bp); + bnxt_set_dflt_rss_indir_tbl(bp, NULL); } } @@ -7497,7 +7508,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp) return -ENOMEM; if (!netif_is_rxfh_configured(bp->dev)) - bnxt_set_dflt_rss_indir_tbl(bp); + bnxt_set_dflt_rss_indir_tbl(bp, NULL); return rc; } @@ -15119,7 +15130,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bp->flags |= BNXT_FLAG_CHIP_P7; } - rc = bnxt_alloc_rss_indir_tbl(bp); + rc = bnxt_alloc_rss_indir_tbl(bp, NULL); if (rc) goto init_err_pci_clean; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 4d3104c26cfa..181758f6892a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2715,6 +2715,8 @@ int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp, int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp, struct bnxt_ntuple_filter *fltr); void bnxt_fill_ipv6_mask(__be32 mask[4]); +int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); +void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings); int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic); int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings); -- cgit v1.2.3 From b09353437b28ff8786e60aa9bd560a4474facfc0 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Mar 2024 15:28:57 -0700 Subject: bnxt_en: Simplify bnxt_rfs_capable() bnxt_rfs_capable() determines the number of VNICs and RSS_CTXs required to support aRFS and then reserves the resources. We already have functions bnxt_get_total_vnics() and bnxt_get_total_rss_ctxs() to do that. Simplify the code by calling these functions. It is also more correct to do the resource reservation after bnxt_can_reserve_rings() returns true. Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240325222902.220712-8-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 80ccb5a54dae..e968684ccb1d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12479,28 +12479,19 @@ static bool bnxt_rfs_capable(struct bnxt *bp) struct bnxt_hw_rings hwr = {0}; int max_vnics, max_rss_ctxs; - hwr.rss_ctx = 1; - if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) { - /* 2 VNICS: default + Ntuple */ - hwr.vnic = 2; - hwr.rss_ctx = bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) * - hwr.vnic; - goto check_reserve_vnic; - } - if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) + if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && + !BNXT_SUPPORTS_NTUPLE_VNIC(bp)) return bnxt_rfs_supported(bp); + if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings) return false; - hwr.vnic = 1 + bp->rx_nr_rings; -check_reserve_vnic: + hwr.grp = bp->rx_nr_rings; + hwr.vnic = bnxt_get_total_vnics(bp, bp->rx_nr_rings); + hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr); max_vnics = bnxt_get_max_func_vnics(bp); max_rss_ctxs = bnxt_get_max_func_rss_ctxs(bp); - if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && - !(bp->rss_cap & BNXT_RSS_CAP_NEW_RSS_CAP)) - hwr.rss_ctx = hwr.vnic; - if (hwr.vnic > max_vnics || hwr.rss_ctx > max_rss_ctxs) { if (bp->rx_nr_rings > 1) netdev_warn(bp->dev, -- cgit v1.2.3 From 0895926f725ac6e4d163eb4b964199a5e81fbf40 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 25 Mar 2024 15:28:58 -0700 Subject: bnxt_en: Add a new_rss_ctx parameter to bnxt_rfs_capable() Modify bnxt_rfs_capable() to check that there are enough resources to support aRFS/ntuple filters for a new RSS context requested by the user. Existing use cases in the driver will always set the new parameter to false. Reviewed-by: Kalesh AP Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240325222902.220712-9-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 ++++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e968684ccb1d..c5f3b97d258d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7360,7 +7360,7 @@ static int bnxt_get_total_vnics(struct bnxt *bp, int rx_rings) { if (bp->flags & BNXT_FLAG_RFS) { if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) - return 2; + return 2 + bp->num_rss_ctx; if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) return rx_rings + 1; } @@ -12474,7 +12474,7 @@ static bool bnxt_rfs_supported(struct bnxt *bp) } /* If runtime conditions support RFS */ -static bool bnxt_rfs_capable(struct bnxt *bp) +bool bnxt_rfs_capable(struct bnxt *bp, bool new_rss_ctx) { struct bnxt_hw_rings hwr = {0}; int max_vnics, max_rss_ctxs; @@ -12488,6 +12488,8 @@ static bool bnxt_rfs_capable(struct bnxt *bp) hwr.grp = bp->rx_nr_rings; hwr.vnic = bnxt_get_total_vnics(bp, bp->rx_nr_rings); + if (new_rss_ctx) + hwr.vnic++; hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr); max_vnics = bnxt_get_max_func_vnics(bp); max_rss_ctxs = bnxt_get_max_func_rss_ctxs(bp); @@ -12525,7 +12527,7 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev, struct bnxt *bp = netdev_priv(dev); netdev_features_t vlan_features; - if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp)) + if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp, false)) features &= ~NETIF_F_NTUPLE; if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog) @@ -13661,7 +13663,7 @@ static void bnxt_set_dflt_rfs(struct bnxt *bp) bp->flags &= ~BNXT_FLAG_RFS; if (bnxt_rfs_supported(bp)) { dev->hw_features |= NETIF_F_NTUPLE; - if (bnxt_rfs_capable(bp)) { + if (bnxt_rfs_capable(bp, false)) { bp->flags |= BNXT_FLAG_RFS; dev->features |= NETIF_F_NTUPLE; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 181758f6892a..d5fbb63a0e0c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2756,6 +2756,7 @@ void bnxt_reenable_sriov(struct bnxt *bp); void bnxt_close_nic(struct bnxt *, bool, bool); void bnxt_get_ring_err_stats(struct bnxt *bp, struct bnxt_total_ring_err_stats *stats); +bool bnxt_rfs_capable(struct bnxt *bp, bool new_rss_ctx); int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words, u32 *reg_buf); void bnxt_fw_exception(struct bnxt *bp); -- cgit v1.2.3 From 77a614f7499edd47549f31730f50cb043b699a8b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Mar 2024 15:28:59 -0700 Subject: bnxt_en: Refactor bnxt_set_rxfh() Add a new bnxt_modify_rss() function to modify the RSS key and RSS indirection table. The new function can modify the parameters for the default context or additional contexts. Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240325222902.220712-10-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 40 +++++++++++++++-------- 1 file changed, 27 insertions(+), 13 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 771833b1900d..7f57198f5834 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1778,6 +1778,32 @@ static int bnxt_get_rxfh(struct net_device *dev, return 0; } +static void bnxt_modify_rss(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, + struct ethtool_rxfh_param *rxfh) +{ + if (rxfh->key) { + if (rss_ctx) { + memcpy(rss_ctx->vnic.rss_hash_key, rxfh->key, + HW_HASH_KEY_SIZE); + } else { + memcpy(bp->rss_hash_key, rxfh->key, HW_HASH_KEY_SIZE); + bp->rss_hash_key_updated = true; + } + } + if (rxfh->indir) { + u32 i, pad, tbl_size = bnxt_get_rxfh_indir_size(bp->dev); + u16 *indir_tbl = bp->rss_indir_tbl; + + if (rss_ctx) + indir_tbl = rss_ctx->rss_indir_tbl; + for (i = 0; i < tbl_size; i++) + indir_tbl[i] = rxfh->indir[i]; + pad = bp->rss_indir_tbl_entries - tbl_size; + if (pad) + memset(&bp->rss_indir_tbl[i], 0, pad * sizeof(u16)); + } +} + static int bnxt_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack) @@ -1788,20 +1814,8 @@ static int bnxt_set_rxfh(struct net_device *dev, if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; - if (rxfh->key) { - memcpy(bp->rss_hash_key, rxfh->key, HW_HASH_KEY_SIZE); - bp->rss_hash_key_updated = true; - } - - if (rxfh->indir) { - u32 i, pad, tbl_size = bnxt_get_rxfh_indir_size(dev); + bnxt_modify_rss(bp, NULL, rxfh); - for (i = 0; i < tbl_size; i++) - bp->rss_indir_tbl[i] = rxfh->indir[i]; - pad = bp->rss_indir_tbl_entries - tbl_size; - if (pad) - memset(&bp->rss_indir_tbl[i], 0, pad * sizeof(u16)); - } bnxt_clear_usr_fltrs(bp, false); if (netif_running(bp->dev)) { bnxt_close_nic(bp, false, false); -- cgit v1.2.3 From b3d0083caf9abd495e1fc0016b0b29bbfeee99af Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 25 Mar 2024 15:29:00 -0700 Subject: bnxt_en: Support RSS contexts in ethtool .{get|set}_rxfh() Support up to 32 RSS contexts per device if supported by the device. Reviewed-by: Kalesh AP Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240325222902.220712-11-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 49 ++++++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 7 + drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 148 +++++++++++++++++++++- 3 files changed, 196 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c5f3b97d258d..8aa3db2ceece 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5939,8 +5939,8 @@ static void bnxt_hwrm_vnic_update_tunl_tpa(struct bnxt *bp, req->tnl_tpa_en_bitmap = cpu_to_le32(tunl_tpa_bmap); } -static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic, - u32 tpa_flags) +int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic, + u32 tpa_flags) { u16 max_aggs = VNIC_TPA_CFG_REQ_MAX_AGGS_MAX; struct hwrm_vnic_tpa_cfg_input *req; @@ -6129,6 +6129,8 @@ static void bnxt_fill_hw_rss_tbl_p5(struct bnxt *bp, if (vnic->flags & BNXT_VNIC_NTUPLE_FLAG) j = ethtool_rxfh_indir_default(i, bp->rx_nr_rings); + else if (vnic->flags & BNXT_VNIC_RSSCTX_FLAG) + j = vnic->rss_ctx->rss_indir_tbl[i]; else j = bp->rss_indir_tbl[i]; rxr = &bp->rx_ring[j]; @@ -6423,9 +6425,9 @@ static void bnxt_hwrm_vnic_free(struct bnxt *bp) bnxt_hwrm_vnic_free_one(bp, &bp->vnic_info[i]); } -static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic, - unsigned int start_rx_ring_idx, - unsigned int nr_rings) +int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic, + unsigned int start_rx_ring_idx, + unsigned int nr_rings) { unsigned int i, j, grp_idx, end_idx = start_rx_ring_idx + nr_rings; struct hwrm_vnic_alloc_output *resp; @@ -9852,7 +9854,7 @@ int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic) return rc; } -static int __bnxt_setup_vnic_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic) +int __bnxt_setup_vnic_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic) { int rc, i, nr_ctxs; @@ -9939,15 +9941,46 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, bool all) { + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + int i; + + bnxt_hwrm_vnic_free_one(bp, &rss_ctx->vnic); + for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++) { + if (vnic->fw_rss_cos_lb_ctx[i] != INVALID_HW_RING_ID) + bnxt_hwrm_vnic_ctx_free_one(bp, vnic, i); + } if (!all) return; + if (vnic->rss_table) + dma_free_coherent(&bp->pdev->dev, vnic->rss_table_size, + vnic->rss_table, + vnic->rss_table_dma_addr); + kfree(rss_ctx->rss_indir_tbl); list_del(&rss_ctx->list); bp->num_rss_ctx--; clear_bit(rss_ctx->index, bp->rss_ctx_bmap); kfree(rss_ctx); } +static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp) +{ + bool set_tpa = !!(bp->flags & BNXT_FLAG_TPA); + struct bnxt_rss_ctx *rss_ctx, *tmp; + + list_for_each_entry_safe(rss_ctx, tmp, &bp->rss_ctx_list, list) { + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + + if (bnxt_hwrm_vnic_alloc(bp, vnic, 0, bp->rx_nr_rings) || + bnxt_hwrm_vnic_set_tpa(bp, vnic, set_tpa) || + __bnxt_setup_vnic_p5(bp, vnic)) { + netdev_err(bp->dev, "Failed to restore RSS ctx %d\n", + rss_ctx->index); + bnxt_del_one_rss_ctx(bp, rss_ctx, true); + } + } +} + struct bnxt_rss_ctx *bnxt_alloc_rss_ctx(struct bnxt *bp) { struct bnxt_rss_ctx *rss_ctx = NULL; @@ -11829,6 +11862,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) bnxt_vf_reps_open(bp); bnxt_ptp_init_rtc(bp, true); bnxt_ptp_cfg_tstamp_filters(bp); + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_hwrm_realloc_rss_ctx_vnic(bp); bnxt_cfg_usr_fltrs(bp); return 0; @@ -11977,6 +12012,8 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, while (bnxt_drv_busy(bp)) msleep(20); + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_clear_rss_ctxs(bp, false); /* Flush rings and disable interrupts */ bnxt_shutdown_nic(bp, irq_re_init); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d5fbb63a0e0c..37a850959315 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1270,6 +1270,7 @@ struct bnxt_rss_ctx { #define BNXT_MAX_ETH_RSS_CTX 32 #define BNXT_RSS_CTX_BMAP_LEN (BNXT_MAX_ETH_RSS_CTX + 1) +#define BNXT_VNIC_ID_INVALID 0xffffffff struct bnxt_hw_rings { int tx; @@ -2714,11 +2715,16 @@ int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp, struct bnxt_ntuple_filter *fltr); int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp, struct bnxt_ntuple_filter *fltr); +int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic, + u32 tpa_flags); void bnxt_fill_ipv6_mask(__be32 mask[4]); int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings); int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic); +int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic, + unsigned int start_rx_ring_idx, + unsigned int nr_rings); int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings); int bnxt_nq_rings_in_use(struct bnxt *bp); int bnxt_hwrm_set_coal(struct bnxt *); @@ -2745,6 +2751,7 @@ int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all); int bnxt_hwrm_func_qcaps(struct bnxt *bp); int bnxt_hwrm_fw_set_time(struct bnxt *); int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); +int __bnxt_setup_vnic_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, bool all); struct bnxt_rss_ctx *bnxt_alloc_rss_ctx(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 7f57198f5834..4dbe80b11dda 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1207,6 +1207,36 @@ fltr_err: return rc; } +static struct bnxt_rss_ctx *bnxt_get_rss_ctx_from_index(struct bnxt *bp, + u32 index) +{ + struct bnxt_rss_ctx *rss_ctx, *tmp; + + list_for_each_entry_safe(rss_ctx, tmp, &bp->rss_ctx_list, list) + if (rss_ctx->index == index) + return rss_ctx; + return NULL; +} + +static int bnxt_alloc_rss_ctx_rss_table(struct bnxt *bp, + struct bnxt_rss_ctx *rss_ctx) +{ + int size = L1_CACHE_ALIGN(BNXT_MAX_RSS_TABLE_SIZE_P5); + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + + vnic->rss_table_size = size + HW_HASH_KEY_SIZE; + vnic->rss_table = dma_alloc_coherent(&bp->pdev->dev, + vnic->rss_table_size, + &vnic->rss_table_dma_addr, + GFP_KERNEL); + if (!vnic->rss_table) + return -ENOMEM; + + vnic->rss_hash_key = ((void *)vnic->rss_table) + size; + vnic->rss_hash_key_dma_addr = vnic->rss_table_dma_addr + size; + return 0; +} + static int bnxt_add_l2_cls_rule(struct bnxt *bp, struct ethtool_rx_flow_spec *fs) { @@ -1756,7 +1786,10 @@ static u32 bnxt_get_rxfh_key_size(struct net_device *dev) static int bnxt_get_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh) { + u32 rss_context = rxfh->rss_context; + struct bnxt_rss_ctx *rss_ctx = NULL; struct bnxt *bp = netdev_priv(dev); + u16 *indir_tbl = bp->rss_indir_tbl; struct bnxt_vnic_info *vnic; u32 i, tbl_size; @@ -1766,10 +1799,18 @@ static int bnxt_get_rxfh(struct net_device *dev, return 0; vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT]; - if (rxfh->indir && bp->rss_indir_tbl) { + if (rxfh->rss_context) { + rss_ctx = bnxt_get_rss_ctx_from_index(bp, rss_context); + if (!rss_ctx) + return -EINVAL; + indir_tbl = rss_ctx->rss_indir_tbl; + vnic = &rss_ctx->vnic; + } + + if (rxfh->indir && indir_tbl) { tbl_size = bnxt_get_rxfh_indir_size(dev); for (i = 0; i < tbl_size; i++) - rxfh->indir[i] = bp->rss_indir_tbl[i]; + rxfh->indir[i] = indir_tbl[i]; } if (rxfh->key && vnic->rss_hash_key) @@ -1804,6 +1845,105 @@ static void bnxt_modify_rss(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, } } +static int bnxt_set_rxfh_context(struct bnxt *bp, + struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + u32 *rss_context = &rxfh->rss_context; + struct bnxt_rss_ctx *rss_ctx; + struct bnxt_vnic_info *vnic; + bool modify = false; + int bit_id; + int rc; + + if (!BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) { + NL_SET_ERR_MSG_MOD(extack, "RSS contexts not supported"); + return -EOPNOTSUPP; + } + + if (*rss_context != ETH_RXFH_CONTEXT_ALLOC) { + rss_ctx = bnxt_get_rss_ctx_from_index(bp, *rss_context); + if (!rss_ctx) { + NL_SET_ERR_MSG_FMT_MOD(extack, "RSS context %u not found", + *rss_context); + return -EINVAL; + } + if (*rss_context && rxfh->rss_delete) { + bnxt_del_one_rss_ctx(bp, rss_ctx, true); + return 0; + } + modify = true; + vnic = &rss_ctx->vnic; + goto modify_context; + } + + if (bp->num_rss_ctx >= BNXT_MAX_ETH_RSS_CTX) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Out of RSS contexts, maximum %u", + BNXT_MAX_ETH_RSS_CTX); + return -EINVAL; + } + + if (!bnxt_rfs_capable(bp, true)) { + NL_SET_ERR_MSG_MOD(extack, "Out hardware resources"); + return -ENOMEM; + } + + rss_ctx = bnxt_alloc_rss_ctx(bp); + if (!rss_ctx) + return -ENOMEM; + + vnic = &rss_ctx->vnic; + vnic->flags |= BNXT_VNIC_RSSCTX_FLAG; + vnic->vnic_id = BNXT_VNIC_ID_INVALID; + rc = bnxt_alloc_rss_ctx_rss_table(bp, rss_ctx); + if (rc) + goto out; + + rc = bnxt_alloc_rss_indir_tbl(bp, rss_ctx); + if (rc) + goto out; + + bnxt_set_dflt_rss_indir_tbl(bp, rss_ctx); + memcpy(vnic->rss_hash_key, bp->rss_hash_key, HW_HASH_KEY_SIZE); + + rc = bnxt_hwrm_vnic_alloc(bp, vnic, 0, bp->rx_nr_rings); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Unable to allocate VNIC"); + goto out; + } + + rc = bnxt_hwrm_vnic_set_tpa(bp, vnic, bp->flags & BNXT_FLAG_TPA); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Unable to setup TPA"); + goto out; + } +modify_context: + bnxt_modify_rss(bp, rss_ctx, rxfh); + + if (modify) + return bnxt_hwrm_vnic_rss_cfg_p5(bp, vnic); + + rc = __bnxt_setup_vnic_p5(bp, vnic); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Unable to setup TPA"); + goto out; + } + + bit_id = bitmap_find_free_region(bp->rss_ctx_bmap, + BNXT_RSS_CTX_BMAP_LEN, 0); + if (bit_id < 0) { + rc = -ENOMEM; + goto out; + } + rss_ctx->index = (u16)bit_id; + *rss_context = rss_ctx->index; + + return 0; +out: + bnxt_del_one_rss_ctx(bp, rss_ctx, true); + return rc; +} + static int bnxt_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack) @@ -1814,6 +1954,9 @@ static int bnxt_set_rxfh(struct net_device *dev, if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; + if (rxfh->rss_context) + return bnxt_set_rxfh_context(bp, rxfh, extack); + bnxt_modify_rss(bp, NULL, rxfh); bnxt_clear_usr_fltrs(bp, false); @@ -5087,6 +5230,7 @@ void bnxt_ethtool_free(struct bnxt *bp) const struct ethtool_ops bnxt_ethtool_ops = { .cap_link_lanes_supported = 1, + .cap_rss_ctx_supported = 1, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USECS_IRQ | -- cgit v1.2.3 From 61c814bf4ad7c4422796e53e9489965e9257abac Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 25 Mar 2024 15:29:01 -0700 Subject: bnxt_en: Refactor bnxt_cfg_rfs_ring_tbl_idx() Refactor bnxt_cfg_rfs_ring_tbl_idx() to pass in the filter structure pointer instead of the RX ring number. This will allow an ntuple filter to be set up for the non-default RSS contexts in the next patch. Reviewed-by: Kalesh AP Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240325222902.220712-12-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8aa3db2ceece..88d4116cfd79 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5789,8 +5789,10 @@ void bnxt_fill_ipv6_mask(__be32 mask[4]) static void bnxt_cfg_rfs_ring_tbl_idx(struct bnxt *bp, struct hwrm_cfa_ntuple_filter_alloc_input *req, - u16 rxq) + struct bnxt_ntuple_filter *fltr) { + u16 rxq = fltr->base.rxq; + if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) { struct bnxt_vnic_info *vnic; u32 enables; @@ -5831,7 +5833,7 @@ int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp, req->flags = cpu_to_le32(CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DROP); } else if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2) { - bnxt_cfg_rfs_ring_tbl_idx(bp, req, fltr->base.rxq); + bnxt_cfg_rfs_ring_tbl_idx(bp, req, fltr); } else { vnic = &bp->vnic_info[fltr->base.rxq + 1]; req->dst_id = cpu_to_le16(vnic->fw_vnic_id); -- cgit v1.2.3 From 2f4f9fe5bf5fe0eff7e3f372353b30e600e33c20 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 25 Mar 2024 15:29:02 -0700 Subject: bnxt_en: Support adding ntuple rules on RSS contexts When the user wants to add an ntuple filter to an RSS context, select the appropriate VNIC belonging to the selected RSS context and add the VNIC destination rule. Make the necessary changes to bnxt_add_ntuple_cls_rule(). Reviewed-by: Kalesh AP Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240325222902.220712-13-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 26 ++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 33 ++++++++++++++++------- 3 files changed, 51 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 88d4116cfd79..388e80bf91f5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5791,8 +5791,20 @@ bnxt_cfg_rfs_ring_tbl_idx(struct bnxt *bp, struct hwrm_cfa_ntuple_filter_alloc_input *req, struct bnxt_ntuple_filter *fltr) { + struct bnxt_rss_ctx *rss_ctx, *tmp; u16 rxq = fltr->base.rxq; + if (fltr->base.flags & BNXT_ACT_RSS_CTX) { + list_for_each_entry_safe(rss_ctx, tmp, &bp->rss_ctx_list, list) { + if (rss_ctx->index == fltr->base.fw_vnic_id) { + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + + req->dst_id = cpu_to_le16(vnic->fw_vnic_id); + break; + } + } + return; + } if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) { struct bnxt_vnic_info *vnic; u32 enables; @@ -9944,6 +9956,8 @@ void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, bool all) { struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + struct bnxt_filter_base *usr_fltr, *tmp; + struct bnxt_ntuple_filter *ntp_fltr; int i; bnxt_hwrm_vnic_free_one(bp, &rss_ctx->vnic); @@ -9954,6 +9968,18 @@ void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, if (!all) return; + list_for_each_entry_safe(usr_fltr, tmp, &bp->usr_fltr_list, list) { + if ((usr_fltr->flags & BNXT_ACT_RSS_CTX) && + usr_fltr->fw_vnic_id == rss_ctx->index) { + ntp_fltr = container_of(usr_fltr, + struct bnxt_ntuple_filter, + base); + bnxt_hwrm_cfa_ntuple_filter_free(bp, ntp_fltr); + bnxt_del_ntp_filter(bp, ntp_fltr); + bnxt_del_one_usr_fltr(bp, usr_fltr); + } + } + if (vnic->rss_table) dma_free_coherent(&bp->pdev->dev, vnic->rss_table_size, vnic->rss_table, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 37a850959315..0640fcb57ef8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1374,6 +1374,7 @@ struct bnxt_filter_base { #define BNXT_ACT_RING_DST 2 #define BNXT_ACT_FUNC_DST 4 #define BNXT_ACT_NO_AGING 8 +#define BNXT_ACT_RSS_CTX 0x10 u16 sw_id; u16 rxq; u16 fw_vnic_id; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 4dbe80b11dda..9c49f629d565 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1312,22 +1312,24 @@ static bool bnxt_verify_ntuple_ip6_flow(struct ethtool_usrip6_spec *ip_spec, } static int bnxt_add_ntuple_cls_rule(struct bnxt *bp, - struct ethtool_rx_flow_spec *fs) + struct ethtool_rxnfc *cmd) { - u8 vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); - u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie); + struct ethtool_rx_flow_spec *fs = &cmd->fs; struct bnxt_ntuple_filter *new_fltr, *fltr; + u32 flow_type = fs->flow_type & 0xff; struct bnxt_l2_filter *l2_fltr; struct bnxt_flow_masks *fmasks; - u32 flow_type = fs->flow_type; struct flow_keys *fkeys; - u32 idx; + u32 idx, ring; int rc; + u8 vf; if (!bp->vnic_info) return -EAGAIN; - if ((flow_type & (FLOW_MAC_EXT | FLOW_EXT)) || vf) + vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); + ring = ethtool_get_flow_spec_ring(fs->ring_cookie); + if ((fs->flow_type & (FLOW_MAC_EXT | FLOW_EXT)) || vf) return -EOPNOTSUPP; if (flow_type == IP_USER_FLOW) { @@ -1435,6 +1437,19 @@ static int bnxt_add_ntuple_cls_rule(struct bnxt *bp, rcu_read_unlock(); new_fltr->base.flags = BNXT_ACT_NO_AGING; + if (fs->flow_type & FLOW_RSS) { + struct bnxt_rss_ctx *rss_ctx; + + new_fltr->base.fw_vnic_id = 0; + new_fltr->base.flags |= BNXT_ACT_RSS_CTX; + rss_ctx = bnxt_get_rss_ctx_from_index(bp, cmd->rss_context); + if (rss_ctx) { + new_fltr->base.fw_vnic_id = rss_ctx->index; + } else { + rc = -EINVAL; + goto ntuple_err; + } + } if (fs->ring_cookie == RX_CLS_FLOW_DISC) new_fltr->base.flags |= BNXT_ACT_DROP; else @@ -1476,12 +1491,12 @@ static int bnxt_srxclsrlins(struct bnxt *bp, struct ethtool_rxnfc *cmd) flow_type == IPV6_USER_FLOW) && !(bp->fw_cap & BNXT_FW_CAP_CFA_NTUPLE_RX_EXT_IP_PROTO)) return -EOPNOTSUPP; - if (flow_type & (FLOW_MAC_EXT | FLOW_RSS)) + if (flow_type & FLOW_MAC_EXT) return -EINVAL; flow_type &= ~FLOW_EXT; if (fs->ring_cookie == RX_CLS_FLOW_DISC && flow_type != ETHER_FLOW) - return bnxt_add_ntuple_cls_rule(bp, fs); + return bnxt_add_ntuple_cls_rule(bp, cmd); ring = ethtool_get_flow_spec_ring(fs->ring_cookie); vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); @@ -1495,7 +1510,7 @@ static int bnxt_srxclsrlins(struct bnxt *bp, struct ethtool_rxnfc *cmd) if (flow_type == ETHER_FLOW) rc = bnxt_add_l2_cls_rule(bp, fs); else - rc = bnxt_add_ntuple_cls_rule(bp, fs); + rc = bnxt_add_ntuple_cls_rule(bp, cmd); return rc; } -- cgit v1.2.3 From 47220a1e0b701d144ef20131f31566cf0815095f Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 5 Mar 2024 18:50:21 -0800 Subject: igb: simplify pci ops declaration The igb driver was pre-declaring tons of functions just so that it could have an early declaration of the pci_driver struct. Delete a bunch of the declarations and move the struct to the bottom of the file, after all the functions are declared. Reviewed-by: Alan Brady Signed-off-by: Jesse Brandeburg Reviewed-by: Maciej Fijalkowski Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 53 ++++++++++++++----------------- 1 file changed, 24 insertions(+), 29 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a3f100769e39..89604c9d8e49 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -106,8 +106,6 @@ static int igb_setup_all_rx_resources(struct igb_adapter *); static void igb_free_all_tx_resources(struct igb_adapter *); static void igb_free_all_rx_resources(struct igb_adapter *); static void igb_setup_mrqc(struct igb_adapter *); -static int igb_probe(struct pci_dev *, const struct pci_device_id *); -static void igb_remove(struct pci_dev *pdev); static void igb_init_queue_configuration(struct igb_adapter *adapter); static int igb_sw_init(struct igb_adapter *); int igb_open(struct net_device *); @@ -178,20 +176,6 @@ static int igb_vf_configure(struct igb_adapter *adapter, int vf); static int igb_disable_sriov(struct pci_dev *dev, bool reinit); #endif -static int igb_suspend(struct device *); -static int igb_resume(struct device *); -static int igb_runtime_suspend(struct device *dev); -static int igb_runtime_resume(struct device *dev); -static int igb_runtime_idle(struct device *dev); -#ifdef CONFIG_PM -static const struct dev_pm_ops igb_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) - SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, - igb_runtime_idle) -}; -#endif -static void igb_shutdown(struct pci_dev *); -static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs); #ifdef CONFIG_IGB_DCA static int igb_notify_dca(struct notifier_block *, unsigned long, void *); static struct notifier_block dca_notifier = { @@ -219,19 +203,6 @@ static const struct pci_error_handlers igb_err_handler = { static void igb_init_dmac(struct igb_adapter *adapter, u32 pba); -static struct pci_driver igb_driver = { - .name = igb_driver_name, - .id_table = igb_pci_tbl, - .probe = igb_probe, - .remove = igb_remove, -#ifdef CONFIG_PM - .driver.pm = &igb_pm_ops, -#endif - .shutdown = igb_shutdown, - .sriov_configure = igb_pci_sriov_configure, - .err_handler = &igb_err_handler -}; - MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); MODULE_LICENSE("GPL v2"); @@ -647,6 +618,8 @@ struct net_device *igb_get_hw_dev(struct e1000_hw *hw) return adapter->netdev; } +static struct pci_driver igb_driver; + /** * igb_init_module - Driver Registration Routine * @@ -10157,4 +10130,26 @@ static void igb_nfc_filter_restore(struct igb_adapter *adapter) spin_unlock(&adapter->nfc_lock); } + +#ifdef CONFIG_PM +static const struct dev_pm_ops igb_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) + SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, + igb_runtime_idle) +}; +#endif + +static struct pci_driver igb_driver = { + .name = igb_driver_name, + .id_table = igb_pci_tbl, + .probe = igb_probe, + .remove = igb_remove, +#ifdef CONFIG_PM + .driver.pm = &igb_pm_ops, +#endif + .shutdown = igb_shutdown, + .sriov_configure = igb_pci_sriov_configure, + .err_handler = &igb_err_handler +}; + /* igb_main.c */ -- cgit v1.2.3 From 75a3f93b53832449c2c58a527a3865394cc656ba Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 5 Mar 2024 18:50:22 -0800 Subject: net: intel: implement modern PM ops declarations Switch the Intel networking drivers to use the new power management ops declaration formats and macros, which allows us to drop __maybe_unused, as well as a bunch of ifdef checking CONFIG_PM. This is safe to do because the compiler drops the unused functions, verified by checking for any of the power management function symbols being present in System.map for a build without CONFIG_PM. If a driver has runtime PM, define the ops with pm_ptr(), and if the driver has Simple PM, use pm_sleep_ptr(), as well as the new versions of the macros for declaring the members of the pm_ops structs. Checked with network-enabled allnoconfig, allyesconfig, allmodconfig on x64_64. Reviewed-by: Alan Brady Signed-off-by: Jesse Brandeburg Reviewed-by: Maciej Fijalkowski Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e100.c | 8 +++---- drivers/net/ethernet/intel/e1000/e1000_main.c | 14 ++++++------ drivers/net/ethernet/intel/e1000e/netdev.c | 22 ++++++++----------- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 10 ++++----- drivers/net/ethernet/intel/i40e/i40e_main.c | 10 ++++----- drivers/net/ethernet/intel/iavf/iavf_main.c | 8 +++---- drivers/net/ethernet/intel/ice/ice_main.c | 12 ++++------- drivers/net/ethernet/intel/igb/igb_main.c | 26 +++++++++-------------- drivers/net/ethernet/intel/igbvf/netdev.c | 6 +++--- drivers/net/ethernet/intel/igc/igc_main.c | 24 +++++++-------------- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 +++---- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 8 +++---- 12 files changed, 64 insertions(+), 92 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 3fcb8daaa243..9b068d40778d 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -3037,7 +3037,7 @@ static int __e100_power_off(struct pci_dev *pdev, bool wake) return 0; } -static int __maybe_unused e100_suspend(struct device *dev_d) +static int e100_suspend(struct device *dev_d) { bool wake; @@ -3046,7 +3046,7 @@ static int __maybe_unused e100_suspend(struct device *dev_d) return 0; } -static int __maybe_unused e100_resume(struct device *dev_d) +static int e100_resume(struct device *dev_d) { struct net_device *netdev = dev_get_drvdata(dev_d); struct nic *nic = netdev_priv(netdev); @@ -3163,7 +3163,7 @@ static const struct pci_error_handlers e100_err_handler = { .resume = e100_io_resume, }; -static SIMPLE_DEV_PM_OPS(e100_pm_ops, e100_suspend, e100_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(e100_pm_ops, e100_suspend, e100_resume); static struct pci_driver e100_driver = { .name = DRV_NAME, @@ -3172,7 +3172,7 @@ static struct pci_driver e100_driver = { .remove = e100_remove, /* Power Management hooks */ - .driver.pm = &e100_pm_ops, + .driver.pm = pm_sleep_ptr(&e100_pm_ops), .shutdown = e100_shutdown, .err_handler = &e100_err_handler, diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 1d1e93686af2..5b43f9b194fc 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -149,8 +149,8 @@ static int e1000_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid); static void e1000_restore_vlan(struct e1000_adapter *adapter); -static int __maybe_unused e1000_suspend(struct device *dev); -static int __maybe_unused e1000_resume(struct device *dev); +static int e1000_suspend(struct device *dev); +static int e1000_resume(struct device *dev); static void e1000_shutdown(struct pci_dev *pdev); #ifdef CONFIG_NET_POLL_CONTROLLER @@ -175,16 +175,14 @@ static const struct pci_error_handlers e1000_err_handler = { .resume = e1000_io_resume, }; -static SIMPLE_DEV_PM_OPS(e1000_pm_ops, e1000_suspend, e1000_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(e1000_pm_ops, e1000_suspend, e1000_resume); static struct pci_driver e1000_driver = { .name = e1000_driver_name, .id_table = e1000_pci_tbl, .probe = e1000_probe, .remove = e1000_remove, - .driver = { - .pm = &e1000_pm_ops, - }, + .driver.pm = pm_sleep_ptr(&e1000_pm_ops), .shutdown = e1000_shutdown, .err_handler = &e1000_err_handler }; @@ -5135,7 +5133,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) return 0; } -static int __maybe_unused e1000_suspend(struct device *dev) +static int e1000_suspend(struct device *dev) { int retval; struct pci_dev *pdev = to_pci_dev(dev); @@ -5147,7 +5145,7 @@ static int __maybe_unused e1000_suspend(struct device *dev) return retval; } -static int __maybe_unused e1000_resume(struct device *dev) +static int e1000_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index cc8c531ec3df..1c91dece75a8 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6950,13 +6950,13 @@ static int __e1000_resume(struct pci_dev *pdev) return 0; } -static __maybe_unused int e1000e_pm_prepare(struct device *dev) +static int e1000e_pm_prepare(struct device *dev) { return pm_runtime_suspended(dev) && pm_suspend_via_firmware(); } -static __maybe_unused int e1000e_pm_suspend(struct device *dev) +static int e1000e_pm_suspend(struct device *dev) { struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); @@ -6979,7 +6979,7 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) return rc; } -static __maybe_unused int e1000e_pm_resume(struct device *dev) +static int e1000e_pm_resume(struct device *dev) { struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); @@ -7013,7 +7013,7 @@ static __maybe_unused int e1000e_pm_runtime_idle(struct device *dev) return -EBUSY; } -static __maybe_unused int e1000e_pm_runtime_resume(struct device *dev) +static int e1000e_pm_runtime_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -7032,7 +7032,7 @@ static __maybe_unused int e1000e_pm_runtime_resume(struct device *dev) return rc; } -static __maybe_unused int e1000e_pm_runtime_suspend(struct device *dev) +static int e1000e_pm_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -7919,8 +7919,7 @@ static const struct pci_device_id e1000_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); -static const struct dev_pm_ops e1000_pm_ops = { -#ifdef CONFIG_PM_SLEEP +static const struct dev_pm_ops e1000e_pm_ops = { .prepare = e1000e_pm_prepare, .suspend = e1000e_pm_suspend, .resume = e1000e_pm_resume, @@ -7928,9 +7927,8 @@ static const struct dev_pm_ops e1000_pm_ops = { .thaw = e1000e_pm_thaw, .poweroff = e1000e_pm_suspend, .restore = e1000e_pm_resume, -#endif - SET_RUNTIME_PM_OPS(e1000e_pm_runtime_suspend, e1000e_pm_runtime_resume, - e1000e_pm_runtime_idle) + RUNTIME_PM_OPS(e1000e_pm_runtime_suspend, e1000e_pm_runtime_resume, + e1000e_pm_runtime_idle) }; /* PCI Device API Driver */ @@ -7939,9 +7937,7 @@ static struct pci_driver e1000_driver = { .id_table = e1000_pci_tbl, .probe = e1000_probe, .remove = e1000_remove, - .driver = { - .pm = &e1000_pm_ops, - }, + .driver.pm = pm_ptr(&e1000e_pm_ops), .shutdown = e1000_shutdown, .err_handler = &e1000_err_handler }; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index d748b98274e7..92de609b7218 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -2342,7 +2342,7 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) * suspend or hibernation. This function does not need to handle lower PCIe * device state as the stack takes care of that for us. **/ -static int __maybe_unused fm10k_resume(struct device *dev) +static int fm10k_resume(struct device *dev) { struct fm10k_intfc *interface = dev_get_drvdata(dev); struct net_device *netdev = interface->netdev; @@ -2369,7 +2369,7 @@ static int __maybe_unused fm10k_resume(struct device *dev) * system suspend or hibernation. This function does not need to handle lower * PCIe device state as the stack takes care of that for us. **/ -static int __maybe_unused fm10k_suspend(struct device *dev) +static int fm10k_suspend(struct device *dev) { struct fm10k_intfc *interface = dev_get_drvdata(dev); struct net_device *netdev = interface->netdev; @@ -2502,16 +2502,14 @@ static const struct pci_error_handlers fm10k_err_handler = { .reset_done = fm10k_io_reset_done, }; -static SIMPLE_DEV_PM_OPS(fm10k_pm_ops, fm10k_suspend, fm10k_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(fm10k_pm_ops, fm10k_suspend, fm10k_resume); static struct pci_driver fm10k_driver = { .name = fm10k_driver_name, .id_table = fm10k_pci_tbl, .probe = fm10k_probe, .remove = fm10k_remove, - .driver = { - .pm = &fm10k_pm_ops, - }, + .driver.pm = pm_sleep_ptr(&fm10k_pm_ops), .sriov_configure = fm10k_iov_configure, .err_handler = &fm10k_err_handler }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f86578857e8a..b297f44d635f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16509,7 +16509,7 @@ static void i40e_shutdown(struct pci_dev *pdev) * i40e_suspend - PM callback for moving to D3 * @dev: generic device information structure **/ -static int __maybe_unused i40e_suspend(struct device *dev) +static int i40e_suspend(struct device *dev) { struct i40e_pf *pf = dev_get_drvdata(dev); struct i40e_hw *hw = &pf->hw; @@ -16560,7 +16560,7 @@ static int __maybe_unused i40e_suspend(struct device *dev) * i40e_resume - PM callback for waking up from D3 * @dev: generic device information structure **/ -static int __maybe_unused i40e_resume(struct device *dev) +static int i40e_resume(struct device *dev) { struct i40e_pf *pf = dev_get_drvdata(dev); int err; @@ -16606,16 +16606,14 @@ static const struct pci_error_handlers i40e_err_handler = { .resume = i40e_pci_error_resume, }; -static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume); static struct pci_driver i40e_driver = { .name = i40e_driver_name, .id_table = i40e_pci_tbl, .probe = i40e_probe, .remove = i40e_remove, - .driver = { - .pm = &i40e_pm_ops, - }, + .driver.pm = pm_sleep_ptr(&i40e_pm_ops), .shutdown = i40e_shutdown, .err_handler = &i40e_err_handler, .sriov_configure = i40e_pci_sriov_configure, diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index ef2440f3abf8..13361a780ece 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -5023,7 +5023,7 @@ err_dma: * * Called when the system (VM) is entering sleep/suspend. **/ -static int __maybe_unused iavf_suspend(struct device *dev_d) +static int iavf_suspend(struct device *dev_d) { struct net_device *netdev = dev_get_drvdata(dev_d); struct iavf_adapter *adapter = netdev_priv(netdev); @@ -5051,7 +5051,7 @@ static int __maybe_unused iavf_suspend(struct device *dev_d) * * Called when the system (VM) is resumed from sleep/suspend. **/ -static int __maybe_unused iavf_resume(struct device *dev_d) +static int iavf_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); struct iavf_adapter *adapter; @@ -5238,14 +5238,14 @@ static void iavf_shutdown(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D3hot); } -static SIMPLE_DEV_PM_OPS(iavf_pm_ops, iavf_suspend, iavf_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(iavf_pm_ops, iavf_suspend, iavf_resume); static struct pci_driver iavf_driver = { .name = iavf_driver_name, .id_table = iavf_pci_tbl, .probe = iavf_probe, .remove = iavf_remove, - .driver.pm = &iavf_pm_ops, + .driver.pm = pm_sleep_ptr(&iavf_pm_ops), .shutdown = iavf_shutdown, }; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f2b7d6ca8805..3dea0d4c767c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5321,7 +5321,6 @@ static void ice_shutdown(struct pci_dev *pdev) } } -#ifdef CONFIG_PM /** * ice_prepare_for_shutdown - prep for PCI shutdown * @pf: board private structure @@ -5410,7 +5409,7 @@ err_reinit: * Power Management callback to quiesce the device and prepare * for D3 transition. */ -static int __maybe_unused ice_suspend(struct device *dev) +static int ice_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct ice_pf *pf; @@ -5477,7 +5476,7 @@ static int __maybe_unused ice_suspend(struct device *dev) * ice_resume - PM callback for waking up from D3 * @dev: generic device information structure */ -static int __maybe_unused ice_resume(struct device *dev) +static int ice_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); enum ice_reset_req reset_type; @@ -5528,7 +5527,6 @@ static int __maybe_unused ice_resume(struct device *dev) return 0; } -#endif /* CONFIG_PM */ /** * ice_pci_err_detected - warning that PCI error has been detected @@ -5702,7 +5700,7 @@ static const struct pci_device_id ice_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, ice_pci_tbl); -static __maybe_unused SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume); static const struct pci_error_handlers ice_pci_err_handler = { .error_detected = ice_pci_err_detected, @@ -5717,9 +5715,7 @@ static struct pci_driver ice_driver = { .id_table = ice_pci_tbl, .probe = ice_probe, .remove = ice_remove, -#ifdef CONFIG_PM - .driver.pm = &ice_pm_ops, -#endif /* CONFIG_PM */ + .driver.pm = pm_sleep_ptr(&ice_pm_ops), .shutdown = ice_shutdown, .sriov_configure = ice_sriov_configure, .sriov_get_vf_total_msix = ice_sriov_get_vf_total_msix, diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 89604c9d8e49..74a998fcaa6f 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -9426,12 +9426,12 @@ static void igb_deliver_wake_packet(struct net_device *netdev) netif_rx(skb); } -static int __maybe_unused igb_suspend(struct device *dev) +static int igb_suspend(struct device *dev) { return __igb_shutdown(to_pci_dev(dev), NULL, 0); } -static int __maybe_unused __igb_resume(struct device *dev, bool rpm) +static int __igb_resume(struct device *dev, bool rpm) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -9487,12 +9487,12 @@ static int __maybe_unused __igb_resume(struct device *dev, bool rpm) return err; } -static int __maybe_unused igb_resume(struct device *dev) +static int igb_resume(struct device *dev) { return __igb_resume(dev, false); } -static int __maybe_unused igb_runtime_idle(struct device *dev) +static int igb_runtime_idle(struct device *dev) { struct net_device *netdev = dev_get_drvdata(dev); struct igb_adapter *adapter = netdev_priv(netdev); @@ -9503,12 +9503,12 @@ static int __maybe_unused igb_runtime_idle(struct device *dev) return -EBUSY; } -static int __maybe_unused igb_runtime_suspend(struct device *dev) +static int igb_runtime_suspend(struct device *dev) { return __igb_shutdown(to_pci_dev(dev), NULL, 1); } -static int __maybe_unused igb_runtime_resume(struct device *dev) +static int igb_runtime_resume(struct device *dev) { return __igb_resume(dev, true); } @@ -10131,22 +10131,16 @@ static void igb_nfc_filter_restore(struct igb_adapter *adapter) spin_unlock(&adapter->nfc_lock); } -#ifdef CONFIG_PM -static const struct dev_pm_ops igb_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) - SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, - igb_runtime_idle) -}; -#endif +static _DEFINE_DEV_PM_OPS(igb_pm_ops, igb_suspend, igb_resume, + igb_runtime_suspend, igb_runtime_resume, + igb_runtime_idle); static struct pci_driver igb_driver = { .name = igb_driver_name, .id_table = igb_pci_tbl, .probe = igb_probe, .remove = igb_remove, -#ifdef CONFIG_PM - .driver.pm = &igb_pm_ops, -#endif + .driver.pm = pm_ptr(&igb_pm_ops), .shutdown = igb_shutdown, .sriov_configure = igb_pci_sriov_configure, .err_handler = &igb_err_handler diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index b0cf310e6f7b..40ccd24ffc53 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2470,7 +2470,7 @@ static int igbvf_suspend(struct device *dev_d) return 0; } -static int __maybe_unused igbvf_resume(struct device *dev_d) +static int igbvf_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); struct net_device *netdev = pci_get_drvdata(pdev); @@ -2957,7 +2957,7 @@ static const struct pci_device_id igbvf_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, igbvf_pci_tbl); -static SIMPLE_DEV_PM_OPS(igbvf_pm_ops, igbvf_suspend, igbvf_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(igbvf_pm_ops, igbvf_suspend, igbvf_resume); /* PCI Device API Driver */ static struct pci_driver igbvf_driver = { @@ -2965,7 +2965,7 @@ static struct pci_driver igbvf_driver = { .id_table = igbvf_pci_tbl, .probe = igbvf_probe, .remove = igbvf_remove, - .driver.pm = &igbvf_pm_ops, + .driver.pm = pm_sleep_ptr(&igbvf_pm_ops), .shutdown = igbvf_shutdown, .err_handler = &igbvf_err_handler }; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 0cd923c8ac9b..998d8c345a78 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7104,8 +7104,7 @@ static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake, return 0; } -#ifdef CONFIG_PM -static int __maybe_unused igc_runtime_suspend(struct device *dev) +static int igc_runtime_suspend(struct device *dev) { return __igc_shutdown(to_pci_dev(dev), NULL, 1); } @@ -7140,7 +7139,7 @@ static void igc_deliver_wake_packet(struct net_device *netdev) netif_rx(skb); } -static int __maybe_unused igc_resume(struct device *dev) +static int igc_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -7193,12 +7192,12 @@ static int __maybe_unused igc_resume(struct device *dev) return err; } -static int __maybe_unused igc_runtime_resume(struct device *dev) +static int igc_runtime_resume(struct device *dev) { return igc_resume(dev); } -static int __maybe_unused igc_suspend(struct device *dev) +static int igc_suspend(struct device *dev) { return __igc_shutdown(to_pci_dev(dev), NULL, 0); } @@ -7213,7 +7212,6 @@ static int __maybe_unused igc_runtime_idle(struct device *dev) return -EBUSY; } -#endif /* CONFIG_PM */ static void igc_shutdown(struct pci_dev *pdev) { @@ -7328,22 +7326,16 @@ static const struct pci_error_handlers igc_err_handler = { .resume = igc_io_resume, }; -#ifdef CONFIG_PM -static const struct dev_pm_ops igc_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume) - SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume, - igc_runtime_idle) -}; -#endif +static _DEFINE_DEV_PM_OPS(igc_pm_ops, igc_suspend, igc_resume, + igc_runtime_suspend, igc_runtime_resume, + igc_runtime_idle); static struct pci_driver igc_driver = { .name = igc_driver_name, .id_table = igc_pci_tbl, .probe = igc_probe, .remove = igc_remove, -#ifdef CONFIG_PM - .driver.pm = &igc_pm_ops, -#endif + .driver.pm = pm_ptr(&igc_pm_ops), .shutdown = igc_shutdown, .err_handler = &igc_err_handler, }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f985252c8c8d..987dd9c7206d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6974,7 +6974,7 @@ int ixgbe_close(struct net_device *netdev) return 0; } -static int __maybe_unused ixgbe_resume(struct device *dev_d) +static int ixgbe_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); @@ -7082,7 +7082,7 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake) return 0; } -static int __maybe_unused ixgbe_suspend(struct device *dev_d) +static int ixgbe_suspend(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); int retval; @@ -11588,14 +11588,14 @@ static const struct pci_error_handlers ixgbe_err_handler = { .resume = ixgbe_io_resume, }; -static SIMPLE_DEV_PM_OPS(ixgbe_pm_ops, ixgbe_suspend, ixgbe_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(ixgbe_pm_ops, ixgbe_suspend, ixgbe_resume); static struct pci_driver ixgbe_driver = { .name = ixgbe_driver_name, .id_table = ixgbe_pci_tbl, .probe = ixgbe_probe, .remove = ixgbe_remove, - .driver.pm = &ixgbe_pm_ops, + .driver.pm = pm_sleep_ptr(&ixgbe_pm_ops), .shutdown = ixgbe_shutdown, .sriov_configure = ixgbe_pci_sriov_configure, .err_handler = &ixgbe_err_handler diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 9c960017a6de..3161a13079fe 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4300,7 +4300,7 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) return 0; } -static int __maybe_unused ixgbevf_suspend(struct device *dev_d) +static int ixgbevf_suspend(struct device *dev_d) { struct net_device *netdev = dev_get_drvdata(dev_d); struct ixgbevf_adapter *adapter = netdev_priv(netdev); @@ -4317,7 +4317,7 @@ static int __maybe_unused ixgbevf_suspend(struct device *dev_d) return 0; } -static int __maybe_unused ixgbevf_resume(struct device *dev_d) +static int ixgbevf_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); struct net_device *netdev = pci_get_drvdata(pdev); @@ -4854,7 +4854,7 @@ static const struct pci_error_handlers ixgbevf_err_handler = { .resume = ixgbevf_io_resume, }; -static SIMPLE_DEV_PM_OPS(ixgbevf_pm_ops, ixgbevf_suspend, ixgbevf_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(ixgbevf_pm_ops, ixgbevf_suspend, ixgbevf_resume); static struct pci_driver ixgbevf_driver = { .name = ixgbevf_driver_name, @@ -4863,7 +4863,7 @@ static struct pci_driver ixgbevf_driver = { .remove = ixgbevf_remove, /* Power Management Hooks */ - .driver.pm = &ixgbevf_pm_ops, + .driver.pm = pm_sleep_ptr(&ixgbevf_pm_ops), .shutdown = ixgbevf_shutdown, .err_handler = &ixgbevf_err_handler -- cgit v1.2.3 From 6f31d6b643a32cc126cf86093fca1ea575948bf0 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 11 Feb 2024 09:30:58 +0200 Subject: igc: Refactor runtime power management flow Following the corresponding discussion [1] and [2] refactor the 'igc_open' method and avoid taking the rtnl_lock() during the 'igc_resume' method. The rtnl_lock is held by the upper layer and could lead to the deadlock during resuming from a runtime power management flow. Notify the stack of the actual queue counts 'netif_set_real_num_*_queues' outside the '_igc_open' wrapper. This notification doesn't have to be called on each resume. Test: 1. Disconnect the ethernet cable 2. Enable the runtime power management via file system: echo auto > /sys/devices/pci0000\.../power/control 3. Check the device state (lspci -s -vvv | grep -i Status) Link: https://lore.kernel.org/netdev/20231206113934.8d7819857574.I2deb5804 ef1739a2af307283d320ef7d82456494@changeid/#r [1] Link: https://lore.kernel.org/netdev/20211125074949.5f897431@kicinski-fedo ra-pc1c0hjn.dhcp.thefacebook.com/t/ [2] Signed-off-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 32 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 998d8c345a78..d9bd001af7ba 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5929,15 +5929,6 @@ static int __igc_open(struct net_device *netdev, bool resuming) if (err) goto err_req_irq; - /* Notify the stack of the actual queue counts. */ - err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); - if (err) - goto err_set_queues; - - err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); - if (err) - goto err_set_queues; - clear_bit(__IGC_DOWN, &adapter->state); for (i = 0; i < adapter->num_q_vectors; i++) @@ -5958,8 +5949,6 @@ static int __igc_open(struct net_device *netdev, bool resuming) return IGC_SUCCESS; -err_set_queues: - igc_free_irq(adapter); err_req_irq: igc_release_hw_control(adapter); igc_power_down_phy_copper_base(&adapter->hw); @@ -5976,6 +5965,17 @@ err_setup_tx: int igc_open(struct net_device *netdev) { + struct igc_adapter *adapter = netdev_priv(netdev); + int err; + + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_queues(netdev, adapter->num_tx_queues, + adapter->num_rx_queues); + if (err) { + netdev_err(netdev, "error setting real queue count\n"); + return err; + } + return __igc_open(netdev, false); } @@ -7181,13 +7181,11 @@ static int igc_resume(struct device *dev) wr32(IGC_WUS, ~0); - rtnl_lock(); - if (!err && netif_running(netdev)) + if (netif_running(netdev)) { err = __igc_open(netdev, true); - - if (!err) - netif_device_attach(netdev); - rtnl_unlock(); + if (!err) + netif_device_attach(netdev); + } return err; } -- cgit v1.2.3 From ee4300b24a32ced81e72e11fdceeb8d536997a28 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 6 Mar 2024 17:30:54 +0100 Subject: i40e: avoid forward declarations in i40e_nvm.c Move code around to get rid of forward declarations. No functional changes. After a plain code juggling, checkpatch reported: total: 0 errors, 7 warnings, 12 checks, 1581 lines checked so while at it let's address old issues as well. Should we ever address the remaining unnecessary forward declarations within drivers/net/ethernet/intel/, consider this change as a starting point/reference. As reported in [0], there would be a lot more of work to do...if we care. [0]: https://lore.kernel.org/intel-wired-lan/Zeh8qadiTGf413YU@boxer/T/#u Signed-off-by: Maciej Fijalkowski Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_nvm.c | 1050 ++++++++++++++-------------- 1 file changed, 509 insertions(+), 541 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 605fd82f5d20..7f0936f4e05e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -734,37 +734,7 @@ int i40e_validate_nvm_checksum(struct i40e_hw *hw, return ret_code; } -static int i40e_nvmupd_state_init(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static int i40e_nvmupd_state_reading(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static int i40e_nvmupd_state_writing(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *errno); -static enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - int *perrno); -static int i40e_nvmupd_nvm_erase(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - int *perrno); -static int i40e_nvmupd_nvm_write(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static int i40e_nvmupd_nvm_read(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static int i40e_nvmupd_exec_aq(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static int i40e_nvmupd_get_aq_result(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static int i40e_nvmupd_get_aq_event(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno); -static inline u8 i40e_nvmupd_get_module(u32 val) +static u8 i40e_nvmupd_get_module(u32 val) { return (u8)(val & I40E_NVM_MOD_PNT_MASK); } @@ -799,146 +769,433 @@ static const char * const i40e_nvm_update_state_str[] = { }; /** - * i40e_nvmupd_command - Process an NVM update command + * i40e_nvmupd_validate_command - Validate given command * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command - * @bytes: pointer to the data buffer + * @cmd: pointer to nvm update command buffer * @perrno: pointer to return error code * - * Dispatches command depending on what update state is current + * Return one of the valid command types or I40E_NVMUPD_INVALID **/ -int i40e_nvmupd_command(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) +static enum i40e_nvmupd_cmd +i40e_nvmupd_validate_command(struct i40e_hw *hw, struct i40e_nvm_access *cmd, + int *perrno) { enum i40e_nvmupd_cmd upd_cmd; - int status; - - /* assume success */ - *perrno = 0; + u8 module, transaction; - /* early check for status command and debug msgs */ - upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); + /* anything that doesn't match a recognized case is an error */ + upd_cmd = I40E_NVMUPD_INVALID; - i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d opc 0x%04x cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n", - i40e_nvm_update_state_str[upd_cmd], - hw->nvmupd_state, - hw->nvm_release_on_done, hw->nvm_wait_opcode, - cmd->command, cmd->config, cmd->offset, cmd->data_size); + transaction = i40e_nvmupd_get_transaction(cmd->config); + module = i40e_nvmupd_get_module(cmd->config); - if (upd_cmd == I40E_NVMUPD_INVALID) { - *perrno = -EFAULT; + /* limits on data size */ + if (cmd->data_size < 1 || cmd->data_size > I40E_NVMUPD_MAX_DATA) { i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_validate_command returns %d errno %d\n", - upd_cmd, *perrno); + "%s data_size %d\n", __func__, cmd->data_size); + *perrno = -EFAULT; + return I40E_NVMUPD_INVALID; } - /* a status request returns immediately rather than - * going into the state machine - */ - if (upd_cmd == I40E_NVMUPD_STATUS) { - if (!cmd->data_size) { - *perrno = -EFAULT; - return -EINVAL; + switch (cmd->command) { + case I40E_NVM_READ: + switch (transaction) { + case I40E_NVM_CON: + upd_cmd = I40E_NVMUPD_READ_CON; + break; + case I40E_NVM_SNT: + upd_cmd = I40E_NVMUPD_READ_SNT; + break; + case I40E_NVM_LCB: + upd_cmd = I40E_NVMUPD_READ_LCB; + break; + case I40E_NVM_SA: + upd_cmd = I40E_NVMUPD_READ_SA; + break; + case I40E_NVM_EXEC: + if (module == 0xf) + upd_cmd = I40E_NVMUPD_STATUS; + else if (module == 0) + upd_cmd = I40E_NVMUPD_GET_AQ_RESULT; + break; + case I40E_NVM_AQE: + upd_cmd = I40E_NVMUPD_GET_AQ_EVENT; + break; } + break; - bytes[0] = hw->nvmupd_state; - - if (cmd->data_size >= 4) { - bytes[1] = 0; - *((u16 *)&bytes[2]) = hw->nvm_wait_opcode; + case I40E_NVM_WRITE: + switch (transaction) { + case I40E_NVM_CON: + upd_cmd = I40E_NVMUPD_WRITE_CON; + break; + case I40E_NVM_SNT: + upd_cmd = I40E_NVMUPD_WRITE_SNT; + break; + case I40E_NVM_LCB: + upd_cmd = I40E_NVMUPD_WRITE_LCB; + break; + case I40E_NVM_SA: + upd_cmd = I40E_NVMUPD_WRITE_SA; + break; + case I40E_NVM_ERA: + upd_cmd = I40E_NVMUPD_WRITE_ERA; + break; + case I40E_NVM_CSUM: + upd_cmd = I40E_NVMUPD_CSUM_CON; + break; + case (I40E_NVM_CSUM | I40E_NVM_SA): + upd_cmd = I40E_NVMUPD_CSUM_SA; + break; + case (I40E_NVM_CSUM | I40E_NVM_LCB): + upd_cmd = I40E_NVMUPD_CSUM_LCB; + break; + case I40E_NVM_EXEC: + if (module == 0) + upd_cmd = I40E_NVMUPD_EXEC_AQ; + break; } + break; + } - /* Clear error status on read */ - if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) - hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + return upd_cmd; +} - return 0; - } +/** + * i40e_nvmupd_nvm_erase - Erase an NVM module + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @perrno: pointer to return error code + * + * module, offset, data_size and data are in cmd structure + **/ +static int i40e_nvmupd_nvm_erase(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + int *perrno) +{ + struct i40e_asq_cmd_details cmd_details; + u8 module, transaction; + int status = 0; + bool last; - /* Clear status even it is not read and log */ - if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) { + transaction = i40e_nvmupd_get_transaction(cmd->config); + module = i40e_nvmupd_get_module(cmd->config); + last = (transaction & I40E_NVM_LCB); + + memset(&cmd_details, 0, sizeof(cmd_details)); + cmd_details.wb_desc = &hw->nvm_wb_desc; + + status = i40e_aq_erase_nvm(hw, module, cmd->offset, (u16)cmd->data_size, + last, &cmd_details); + if (status) { i40e_debug(hw, I40E_DEBUG_NVM, - "Clearing I40E_NVMUPD_STATE_ERROR state without reading\n"); - hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + "%s mod 0x%x off 0x%x len 0x%x\n", + __func__, module, cmd->offset, cmd->data_size); + i40e_debug(hw, I40E_DEBUG_NVM, + "%s status %d aq %d\n", + __func__, status, hw->aq.asq_last_status); + *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); } - /* Acquire lock to prevent race condition where adminq_task - * can execute after i40e_nvmupd_nvm_read/write but before state - * variables (nvm_wait_opcode, nvm_release_on_done) are updated. - * - * During NVMUpdate, it is observed that lock could be held for - * ~5ms for most commands. However lock is held for ~60ms for - * NVMUPD_CSUM_LCB command. - */ - mutex_lock(&hw->aq.arq_mutex); - switch (hw->nvmupd_state) { - case I40E_NVMUPD_STATE_INIT: - status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno); - break; - - case I40E_NVMUPD_STATE_READING: - status = i40e_nvmupd_state_reading(hw, cmd, bytes, perrno); - break; + return status; +} - case I40E_NVMUPD_STATE_WRITING: - status = i40e_nvmupd_state_writing(hw, cmd, bytes, perrno); - break; +/** + * i40e_nvmupd_nvm_write - Write NVM + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * module, offset, data_size and data are in cmd structure + **/ +static int i40e_nvmupd_nvm_write(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) +{ + struct i40e_asq_cmd_details cmd_details; + u8 module, transaction; + u8 preservation_flags; + int status = 0; + bool last; - case I40E_NVMUPD_STATE_INIT_WAIT: - case I40E_NVMUPD_STATE_WRITE_WAIT: - /* if we need to stop waiting for an event, clear - * the wait info and return before doing anything else - */ - if (cmd->offset == 0xffff) { - i40e_nvmupd_clear_wait_state(hw); - status = 0; - break; - } + transaction = i40e_nvmupd_get_transaction(cmd->config); + module = i40e_nvmupd_get_module(cmd->config); + last = (transaction & I40E_NVM_LCB); + preservation_flags = i40e_nvmupd_get_preservation_flags(cmd->config); - status = -EBUSY; - *perrno = -EBUSY; - break; + memset(&cmd_details, 0, sizeof(cmd_details)); + cmd_details.wb_desc = &hw->nvm_wb_desc; - default: - /* invalid state, should never happen */ + status = i40e_aq_update_nvm(hw, module, cmd->offset, + (u16)cmd->data_size, bytes, last, + preservation_flags, &cmd_details); + if (status) { i40e_debug(hw, I40E_DEBUG_NVM, - "NVMUPD: no such state %d\n", hw->nvmupd_state); - status = -EOPNOTSUPP; - *perrno = -ESRCH; - break; + "%s mod 0x%x off 0x%x len 0x%x\n", + __func__, module, cmd->offset, cmd->data_size); + i40e_debug(hw, I40E_DEBUG_NVM, + "%s status %d aq %d\n", + __func__, status, hw->aq.asq_last_status); + *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); } - mutex_unlock(&hw->aq.arq_mutex); return status; } /** - * i40e_nvmupd_state_init - Handle NVM update state Init + * i40e_nvmupd_nvm_read - Read NVM * @hw: pointer to hardware structure * @cmd: pointer to nvm update command buffer * @bytes: pointer to the data buffer * @perrno: pointer to return error code * - * Process legitimate commands of the Init state and conditionally set next - * state. Reject all other commands. + * cmd structure contains identifiers and data buffer **/ -static int i40e_nvmupd_state_init(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) +static int i40e_nvmupd_nvm_read(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) { - enum i40e_nvmupd_cmd upd_cmd; - int status = 0; + struct i40e_asq_cmd_details cmd_details; + u8 module, transaction; + int status; + bool last; - upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); + transaction = i40e_nvmupd_get_transaction(cmd->config); + module = i40e_nvmupd_get_module(cmd->config); + last = (transaction == I40E_NVM_LCB) || (transaction == I40E_NVM_SA); - switch (upd_cmd) { - case I40E_NVMUPD_READ_SA: - status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); - if (status) { - *perrno = i40e_aq_rc_to_posix(status, - hw->aq.asq_last_status); - } else { + memset(&cmd_details, 0, sizeof(cmd_details)); + cmd_details.wb_desc = &hw->nvm_wb_desc; + + status = i40e_aq_read_nvm(hw, module, cmd->offset, (u16)cmd->data_size, + bytes, last, &cmd_details); + if (status) { + i40e_debug(hw, I40E_DEBUG_NVM, + "%s mod 0x%x off 0x%x len 0x%x\n", + __func__, module, cmd->offset, cmd->data_size); + i40e_debug(hw, I40E_DEBUG_NVM, + "%s status %d aq %d\n", + __func__, status, hw->aq.asq_last_status); + *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); + } + + return status; +} + +/** + * i40e_nvmupd_exec_aq - Run an AQ command + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * cmd structure contains identifiers and data buffer + **/ +static int i40e_nvmupd_exec_aq(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) +{ + struct i40e_asq_cmd_details cmd_details; + struct i40e_aq_desc *aq_desc; + u32 buff_size = 0; + u8 *buff = NULL; + u32 aq_desc_len; + u32 aq_data_len; + int status; + + i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); + if (cmd->offset == 0xffff) + return 0; + + memset(&cmd_details, 0, sizeof(cmd_details)); + cmd_details.wb_desc = &hw->nvm_wb_desc; + + aq_desc_len = sizeof(struct i40e_aq_desc); + memset(&hw->nvm_wb_desc, 0, aq_desc_len); + + /* get the aq descriptor */ + if (cmd->data_size < aq_desc_len) { + i40e_debug(hw, I40E_DEBUG_NVM, + "NVMUPD: not enough aq desc bytes for exec, size %d < %d\n", + cmd->data_size, aq_desc_len); + *perrno = -EINVAL; + return -EINVAL; + } + aq_desc = (struct i40e_aq_desc *)bytes; + + /* if data buffer needed, make sure it's ready */ + aq_data_len = cmd->data_size - aq_desc_len; + buff_size = max_t(u32, aq_data_len, le16_to_cpu(aq_desc->datalen)); + if (buff_size) { + if (!hw->nvm_buff.va) { + status = i40e_allocate_virt_mem(hw, &hw->nvm_buff, + hw->aq.asq_buf_size); + if (status) + i40e_debug(hw, I40E_DEBUG_NVM, + "NVMUPD: i40e_allocate_virt_mem for exec buff failed, %d\n", + status); + } + + if (hw->nvm_buff.va) { + buff = hw->nvm_buff.va; + memcpy(buff, &bytes[aq_desc_len], aq_data_len); + } + } + + if (cmd->offset) + memset(&hw->nvm_aq_event_desc, 0, aq_desc_len); + + /* and away we go! */ + status = i40e_asq_send_command(hw, aq_desc, buff, + buff_size, &cmd_details); + if (status) { + i40e_debug(hw, I40E_DEBUG_NVM, + "%s err %pe aq_err %s\n", + __func__, ERR_PTR(status), + i40e_aq_str(hw, hw->aq.asq_last_status)); + *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); + return status; + } + + /* should we wait for a followup event? */ + if (cmd->offset) { + hw->nvm_wait_opcode = cmd->offset; + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; + } + + return status; +} + +/** + * i40e_nvmupd_get_aq_result - Get the results from the previous exec_aq + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * cmd structure contains identifiers and data buffer + **/ +static int i40e_nvmupd_get_aq_result(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) +{ + u32 aq_total_len; + u32 aq_desc_len; + int remainder; + u8 *buff; + + i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); + + aq_desc_len = sizeof(struct i40e_aq_desc); + aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_wb_desc.datalen); + + /* check offset range */ + if (cmd->offset > aq_total_len) { + i40e_debug(hw, I40E_DEBUG_NVM, "%s: offset too big %d > %d\n", + __func__, cmd->offset, aq_total_len); + *perrno = -EINVAL; + return -EINVAL; + } + + /* check copylength range */ + if (cmd->data_size > (aq_total_len - cmd->offset)) { + int new_len = aq_total_len - cmd->offset; + + i40e_debug(hw, I40E_DEBUG_NVM, "%s: copy length %d too big, trimming to %d\n", + __func__, cmd->data_size, new_len); + cmd->data_size = new_len; + } + + remainder = cmd->data_size; + if (cmd->offset < aq_desc_len) { + u32 len = aq_desc_len - cmd->offset; + + len = min(len, cmd->data_size); + i40e_debug(hw, I40E_DEBUG_NVM, "%s: aq_desc bytes %d to %d\n", + __func__, cmd->offset, cmd->offset + len); + + buff = ((u8 *)&hw->nvm_wb_desc) + cmd->offset; + memcpy(bytes, buff, len); + + bytes += len; + remainder -= len; + buff = hw->nvm_buff.va; + } else { + buff = hw->nvm_buff.va + (cmd->offset - aq_desc_len); + } + + if (remainder > 0) { + int start_byte = buff - (u8 *)hw->nvm_buff.va; + + i40e_debug(hw, I40E_DEBUG_NVM, "%s: databuf bytes %d to %d\n", + __func__, start_byte, start_byte + remainder); + memcpy(bytes, buff, remainder); + } + + return 0; +} + +/** + * i40e_nvmupd_get_aq_event - Get the Admin Queue event from previous exec_aq + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * cmd structure contains identifiers and data buffer + **/ +static int i40e_nvmupd_get_aq_event(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) +{ + u32 aq_total_len; + u32 aq_desc_len; + + i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); + + aq_desc_len = sizeof(struct i40e_aq_desc); + aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_aq_event_desc.datalen); + + /* check copylength range */ + if (cmd->data_size > aq_total_len) { + i40e_debug(hw, I40E_DEBUG_NVM, + "%s: copy length %d too big, trimming to %d\n", + __func__, cmd->data_size, aq_total_len); + cmd->data_size = aq_total_len; + } + + memcpy(bytes, &hw->nvm_aq_event_desc, cmd->data_size); + + return 0; +} + +/** + * i40e_nvmupd_state_init - Handle NVM update state Init + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * Process legitimate commands of the Init state and conditionally set next + * state. Reject all other commands. + **/ +static int i40e_nvmupd_state_init(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) +{ + enum i40e_nvmupd_cmd upd_cmd; + int status = 0; + + upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); + + switch (upd_cmd) { + case I40E_NVMUPD_READ_SA: + status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); + if (status) { + *perrno = i40e_aq_rc_to_posix(status, + hw->aq.asq_last_status); + } else { status = i40e_nvmupd_nvm_read(hw, cmd, bytes, perrno); i40e_release_nvm(hw); } @@ -948,7 +1205,7 @@ static int i40e_nvmupd_state_init(struct i40e_hw *hw, status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); if (status) { *perrno = i40e_aq_rc_to_posix(status, - hw->aq.asq_last_status); + hw->aq.asq_last_status); } else { status = i40e_nvmupd_nvm_read(hw, cmd, bytes, perrno); if (status) @@ -962,7 +1219,7 @@ static int i40e_nvmupd_state_init(struct i40e_hw *hw, status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE); if (status) { *perrno = i40e_aq_rc_to_posix(status, - hw->aq.asq_last_status); + hw->aq.asq_last_status); } else { status = i40e_nvmupd_nvm_erase(hw, cmd, perrno); if (status) { @@ -979,7 +1236,7 @@ static int i40e_nvmupd_state_init(struct i40e_hw *hw, status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE); if (status) { *perrno = i40e_aq_rc_to_posix(status, - hw->aq.asq_last_status); + hw->aq.asq_last_status); } else { status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno); if (status) { @@ -996,7 +1253,7 @@ static int i40e_nvmupd_state_init(struct i40e_hw *hw, status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE); if (status) { *perrno = i40e_aq_rc_to_posix(status, - hw->aq.asq_last_status); + hw->aq.asq_last_status); } else { status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno); if (status) { @@ -1012,7 +1269,7 @@ static int i40e_nvmupd_state_init(struct i40e_hw *hw, status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE); if (status) { *perrno = i40e_aq_rc_to_posix(status, - hw->aq.asq_last_status); + hw->aq.asq_last_status); } else { status = i40e_update_nvm_checksum(hw); if (status) { @@ -1185,7 +1442,7 @@ retry: * so here we try to reacquire the semaphore then retry the write. * We only do one retry, then give up. */ - if (status && (hw->aq.asq_last_status == I40E_AQ_RC_EBUSY) && + if (status && hw->aq.asq_last_status == I40E_AQ_RC_EBUSY && !retry_attempt) { u32 old_asq_status = hw->aq.asq_last_status; int old_status = status; @@ -1215,457 +1472,168 @@ retry: } /** - * i40e_nvmupd_clear_wait_state - clear wait state on hw - * @hw: pointer to the hardware structure + * i40e_nvmupd_command - Process an NVM update command + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * Dispatches command depending on what update state is current **/ -void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw) +int i40e_nvmupd_command(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) { - i40e_debug(hw, I40E_DEBUG_NVM, - "NVMUPD: clearing wait on opcode 0x%04x\n", - hw->nvm_wait_opcode); - - if (hw->nvm_release_on_done) { - i40e_release_nvm(hw); - hw->nvm_release_on_done = false; - } - hw->nvm_wait_opcode = 0; + enum i40e_nvmupd_cmd upd_cmd; + int status; - if (hw->aq.arq_last_status) { - hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR; - return; - } + /* assume success */ + *perrno = 0; - switch (hw->nvmupd_state) { - case I40E_NVMUPD_STATE_INIT_WAIT: - hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; - break; + /* early check for status command and debug msgs */ + upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); - case I40E_NVMUPD_STATE_WRITE_WAIT: - hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING; - break; + i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d opc 0x%04x cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n", + i40e_nvm_update_state_str[upd_cmd], + hw->nvmupd_state, + hw->nvm_release_on_done, hw->nvm_wait_opcode, + cmd->command, cmd->config, cmd->offset, cmd->data_size); - default: - break; + if (upd_cmd == I40E_NVMUPD_INVALID) { + *perrno = -EFAULT; + i40e_debug(hw, I40E_DEBUG_NVM, + "i40e_nvmupd_validate_command returns %d errno %d\n", + upd_cmd, *perrno); } -} -/** - * i40e_nvmupd_check_wait_event - handle NVM update operation events - * @hw: pointer to the hardware structure - * @opcode: the event that just happened - * @desc: AdminQ descriptor - **/ -void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode, - struct i40e_aq_desc *desc) -{ - u32 aq_desc_len = sizeof(struct i40e_aq_desc); + /* a status request returns immediately rather than + * going into the state machine + */ + if (upd_cmd == I40E_NVMUPD_STATUS) { + if (!cmd->data_size) { + *perrno = -EFAULT; + return -EINVAL; + } - if (opcode == hw->nvm_wait_opcode) { - memcpy(&hw->nvm_aq_event_desc, desc, aq_desc_len); - i40e_nvmupd_clear_wait_state(hw); - } -} + bytes[0] = hw->nvmupd_state; -/** - * i40e_nvmupd_validate_command - Validate given command - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @perrno: pointer to return error code - * - * Return one of the valid command types or I40E_NVMUPD_INVALID - **/ -static enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - int *perrno) -{ - enum i40e_nvmupd_cmd upd_cmd; - u8 module, transaction; + if (cmd->data_size >= 4) { + bytes[1] = 0; + *((u16 *)&bytes[2]) = hw->nvm_wait_opcode; + } - /* anything that doesn't match a recognized case is an error */ - upd_cmd = I40E_NVMUPD_INVALID; + /* Clear error status on read */ + if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; - transaction = i40e_nvmupd_get_transaction(cmd->config); - module = i40e_nvmupd_get_module(cmd->config); + return 0; + } - /* limits on data size */ - if ((cmd->data_size < 1) || - (cmd->data_size > I40E_NVMUPD_MAX_DATA)) { + /* Clear status even it is not read and log */ + if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) { i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_validate_command data_size %d\n", - cmd->data_size); - *perrno = -EFAULT; - return I40E_NVMUPD_INVALID; + "Clearing I40E_NVMUPD_STATE_ERROR state without reading\n"); + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; } - switch (cmd->command) { - case I40E_NVM_READ: - switch (transaction) { - case I40E_NVM_CON: - upd_cmd = I40E_NVMUPD_READ_CON; - break; - case I40E_NVM_SNT: - upd_cmd = I40E_NVMUPD_READ_SNT; - break; - case I40E_NVM_LCB: - upd_cmd = I40E_NVMUPD_READ_LCB; - break; - case I40E_NVM_SA: - upd_cmd = I40E_NVMUPD_READ_SA; - break; - case I40E_NVM_EXEC: - if (module == 0xf) - upd_cmd = I40E_NVMUPD_STATUS; - else if (module == 0) - upd_cmd = I40E_NVMUPD_GET_AQ_RESULT; - break; - case I40E_NVM_AQE: - upd_cmd = I40E_NVMUPD_GET_AQ_EVENT; - break; - } + /* Acquire lock to prevent race condition where adminq_task + * can execute after i40e_nvmupd_nvm_read/write but before state + * variables (nvm_wait_opcode, nvm_release_on_done) are updated. + * + * During NVMUpdate, it is observed that lock could be held for + * ~5ms for most commands. However lock is held for ~60ms for + * NVMUPD_CSUM_LCB command. + */ + mutex_lock(&hw->aq.arq_mutex); + switch (hw->nvmupd_state) { + case I40E_NVMUPD_STATE_INIT: + status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno); break; - case I40E_NVM_WRITE: - switch (transaction) { - case I40E_NVM_CON: - upd_cmd = I40E_NVMUPD_WRITE_CON; - break; - case I40E_NVM_SNT: - upd_cmd = I40E_NVMUPD_WRITE_SNT; - break; - case I40E_NVM_LCB: - upd_cmd = I40E_NVMUPD_WRITE_LCB; - break; - case I40E_NVM_SA: - upd_cmd = I40E_NVMUPD_WRITE_SA; - break; - case I40E_NVM_ERA: - upd_cmd = I40E_NVMUPD_WRITE_ERA; - break; - case I40E_NVM_CSUM: - upd_cmd = I40E_NVMUPD_CSUM_CON; - break; - case (I40E_NVM_CSUM|I40E_NVM_SA): - upd_cmd = I40E_NVMUPD_CSUM_SA; - break; - case (I40E_NVM_CSUM|I40E_NVM_LCB): - upd_cmd = I40E_NVMUPD_CSUM_LCB; - break; - case I40E_NVM_EXEC: - if (module == 0) - upd_cmd = I40E_NVMUPD_EXEC_AQ; - break; - } + case I40E_NVMUPD_STATE_READING: + status = i40e_nvmupd_state_reading(hw, cmd, bytes, perrno); break; - } - - return upd_cmd; -} - -/** - * i40e_nvmupd_exec_aq - Run an AQ command - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @bytes: pointer to the data buffer - * @perrno: pointer to return error code - * - * cmd structure contains identifiers and data buffer - **/ -static int i40e_nvmupd_exec_aq(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) -{ - struct i40e_asq_cmd_details cmd_details; - struct i40e_aq_desc *aq_desc; - u32 buff_size = 0; - u8 *buff = NULL; - u32 aq_desc_len; - u32 aq_data_len; - int status; - - i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); - if (cmd->offset == 0xffff) - return 0; - - memset(&cmd_details, 0, sizeof(cmd_details)); - cmd_details.wb_desc = &hw->nvm_wb_desc; - aq_desc_len = sizeof(struct i40e_aq_desc); - memset(&hw->nvm_wb_desc, 0, aq_desc_len); - - /* get the aq descriptor */ - if (cmd->data_size < aq_desc_len) { - i40e_debug(hw, I40E_DEBUG_NVM, - "NVMUPD: not enough aq desc bytes for exec, size %d < %d\n", - cmd->data_size, aq_desc_len); - *perrno = -EINVAL; - return -EINVAL; - } - aq_desc = (struct i40e_aq_desc *)bytes; - - /* if data buffer needed, make sure it's ready */ - aq_data_len = cmd->data_size - aq_desc_len; - buff_size = max_t(u32, aq_data_len, le16_to_cpu(aq_desc->datalen)); - if (buff_size) { - if (!hw->nvm_buff.va) { - status = i40e_allocate_virt_mem(hw, &hw->nvm_buff, - hw->aq.asq_buf_size); - if (status) - i40e_debug(hw, I40E_DEBUG_NVM, - "NVMUPD: i40e_allocate_virt_mem for exec buff failed, %d\n", - status); - } + case I40E_NVMUPD_STATE_WRITING: + status = i40e_nvmupd_state_writing(hw, cmd, bytes, perrno); + break; - if (hw->nvm_buff.va) { - buff = hw->nvm_buff.va; - memcpy(buff, &bytes[aq_desc_len], aq_data_len); + case I40E_NVMUPD_STATE_INIT_WAIT: + case I40E_NVMUPD_STATE_WRITE_WAIT: + /* if we need to stop waiting for an event, clear + * the wait info and return before doing anything else + */ + if (cmd->offset == 0xffff) { + i40e_nvmupd_clear_wait_state(hw); + status = 0; + break; } - } - if (cmd->offset) - memset(&hw->nvm_aq_event_desc, 0, aq_desc_len); + status = -EBUSY; + *perrno = -EBUSY; + break; - /* and away we go! */ - status = i40e_asq_send_command(hw, aq_desc, buff, - buff_size, &cmd_details); - if (status) { + default: + /* invalid state, should never happen */ i40e_debug(hw, I40E_DEBUG_NVM, - "%s err %pe aq_err %s\n", - __func__, ERR_PTR(status), - i40e_aq_str(hw, hw->aq.asq_last_status)); - *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); - return status; - } - - /* should we wait for a followup event? */ - if (cmd->offset) { - hw->nvm_wait_opcode = cmd->offset; - hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; + "NVMUPD: no such state %d\n", hw->nvmupd_state); + status = -EOPNOTSUPP; + *perrno = -ESRCH; + break; } + mutex_unlock(&hw->aq.arq_mutex); return status; } /** - * i40e_nvmupd_get_aq_result - Get the results from the previous exec_aq - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @bytes: pointer to the data buffer - * @perrno: pointer to return error code - * - * cmd structure contains identifiers and data buffer - **/ -static int i40e_nvmupd_get_aq_result(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) -{ - u32 aq_total_len; - u32 aq_desc_len; - int remainder; - u8 *buff; - - i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); - - aq_desc_len = sizeof(struct i40e_aq_desc); - aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_wb_desc.datalen); - - /* check offset range */ - if (cmd->offset > aq_total_len) { - i40e_debug(hw, I40E_DEBUG_NVM, "%s: offset too big %d > %d\n", - __func__, cmd->offset, aq_total_len); - *perrno = -EINVAL; - return -EINVAL; - } - - /* check copylength range */ - if (cmd->data_size > (aq_total_len - cmd->offset)) { - int new_len = aq_total_len - cmd->offset; - - i40e_debug(hw, I40E_DEBUG_NVM, "%s: copy length %d too big, trimming to %d\n", - __func__, cmd->data_size, new_len); - cmd->data_size = new_len; - } - - remainder = cmd->data_size; - if (cmd->offset < aq_desc_len) { - u32 len = aq_desc_len - cmd->offset; - - len = min(len, cmd->data_size); - i40e_debug(hw, I40E_DEBUG_NVM, "%s: aq_desc bytes %d to %d\n", - __func__, cmd->offset, cmd->offset + len); - - buff = ((u8 *)&hw->nvm_wb_desc) + cmd->offset; - memcpy(bytes, buff, len); - - bytes += len; - remainder -= len; - buff = hw->nvm_buff.va; - } else { - buff = hw->nvm_buff.va + (cmd->offset - aq_desc_len); - } - - if (remainder > 0) { - int start_byte = buff - (u8 *)hw->nvm_buff.va; - - i40e_debug(hw, I40E_DEBUG_NVM, "%s: databuf bytes %d to %d\n", - __func__, start_byte, start_byte + remainder); - memcpy(bytes, buff, remainder); - } - - return 0; -} - -/** - * i40e_nvmupd_get_aq_event - Get the Admin Queue event from previous exec_aq - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @bytes: pointer to the data buffer - * @perrno: pointer to return error code - * - * cmd structure contains identifiers and data buffer + * i40e_nvmupd_clear_wait_state - clear wait state on hw + * @hw: pointer to the hardware structure **/ -static int i40e_nvmupd_get_aq_event(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) +void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw) { - u32 aq_total_len; - u32 aq_desc_len; - - i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); - - aq_desc_len = sizeof(struct i40e_aq_desc); - aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_aq_event_desc.datalen); + i40e_debug(hw, I40E_DEBUG_NVM, + "NVMUPD: clearing wait on opcode 0x%04x\n", + hw->nvm_wait_opcode); - /* check copylength range */ - if (cmd->data_size > aq_total_len) { - i40e_debug(hw, I40E_DEBUG_NVM, - "%s: copy length %d too big, trimming to %d\n", - __func__, cmd->data_size, aq_total_len); - cmd->data_size = aq_total_len; + if (hw->nvm_release_on_done) { + i40e_release_nvm(hw); + hw->nvm_release_on_done = false; } + hw->nvm_wait_opcode = 0; - memcpy(bytes, &hw->nvm_aq_event_desc, cmd->data_size); - - return 0; -} - -/** - * i40e_nvmupd_nvm_read - Read NVM - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @bytes: pointer to the data buffer - * @perrno: pointer to return error code - * - * cmd structure contains identifiers and data buffer - **/ -static int i40e_nvmupd_nvm_read(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) -{ - struct i40e_asq_cmd_details cmd_details; - u8 module, transaction; - int status; - bool last; - - transaction = i40e_nvmupd_get_transaction(cmd->config); - module = i40e_nvmupd_get_module(cmd->config); - last = (transaction == I40E_NVM_LCB) || (transaction == I40E_NVM_SA); - - memset(&cmd_details, 0, sizeof(cmd_details)); - cmd_details.wb_desc = &hw->nvm_wb_desc; - - status = i40e_aq_read_nvm(hw, module, cmd->offset, (u16)cmd->data_size, - bytes, last, &cmd_details); - if (status) { - i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_nvm_read mod 0x%x off 0x%x len 0x%x\n", - module, cmd->offset, cmd->data_size); - i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_nvm_read status %d aq %d\n", - status, hw->aq.asq_last_status); - *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); + if (hw->aq.arq_last_status) { + hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR; + return; } - return status; -} - -/** - * i40e_nvmupd_nvm_erase - Erase an NVM module - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @perrno: pointer to return error code - * - * module, offset, data_size and data are in cmd structure - **/ -static int i40e_nvmupd_nvm_erase(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - int *perrno) -{ - struct i40e_asq_cmd_details cmd_details; - u8 module, transaction; - int status = 0; - bool last; - - transaction = i40e_nvmupd_get_transaction(cmd->config); - module = i40e_nvmupd_get_module(cmd->config); - last = (transaction & I40E_NVM_LCB); + switch (hw->nvmupd_state) { + case I40E_NVMUPD_STATE_INIT_WAIT: + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + break; - memset(&cmd_details, 0, sizeof(cmd_details)); - cmd_details.wb_desc = &hw->nvm_wb_desc; + case I40E_NVMUPD_STATE_WRITE_WAIT: + hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING; + break; - status = i40e_aq_erase_nvm(hw, module, cmd->offset, (u16)cmd->data_size, - last, &cmd_details); - if (status) { - i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_nvm_erase mod 0x%x off 0x%x len 0x%x\n", - module, cmd->offset, cmd->data_size); - i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_nvm_erase status %d aq %d\n", - status, hw->aq.asq_last_status); - *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); + default: + break; } - - return status; } /** - * i40e_nvmupd_nvm_write - Write NVM - * @hw: pointer to hardware structure - * @cmd: pointer to nvm update command buffer - * @bytes: pointer to the data buffer - * @perrno: pointer to return error code - * - * module, offset, data_size and data are in cmd structure + * i40e_nvmupd_check_wait_event - handle NVM update operation events + * @hw: pointer to the hardware structure + * @opcode: the event that just happened + * @desc: AdminQ descriptor **/ -static int i40e_nvmupd_nvm_write(struct i40e_hw *hw, - struct i40e_nvm_access *cmd, - u8 *bytes, int *perrno) +void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode, + struct i40e_aq_desc *desc) { - struct i40e_asq_cmd_details cmd_details; - u8 module, transaction; - u8 preservation_flags; - int status = 0; - bool last; - - transaction = i40e_nvmupd_get_transaction(cmd->config); - module = i40e_nvmupd_get_module(cmd->config); - last = (transaction & I40E_NVM_LCB); - preservation_flags = i40e_nvmupd_get_preservation_flags(cmd->config); - - memset(&cmd_details, 0, sizeof(cmd_details)); - cmd_details.wb_desc = &hw->nvm_wb_desc; + u32 aq_desc_len = sizeof(struct i40e_aq_desc); - status = i40e_aq_update_nvm(hw, module, cmd->offset, - (u16)cmd->data_size, bytes, last, - preservation_flags, &cmd_details); - if (status) { - i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_nvm_write mod 0x%x off 0x%x len 0x%x\n", - module, cmd->offset, cmd->data_size); - i40e_debug(hw, I40E_DEBUG_NVM, - "i40e_nvmupd_nvm_write status %d aq %d\n", - status, hw->aq.asq_last_status); - *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); + if (opcode == hw->nvm_wait_opcode) { + memcpy(&hw->nvm_aq_event_desc, desc, aq_desc_len); + i40e_nvmupd_clear_wait_state(hw); } - - return status; } -- cgit v1.2.3 From b6694abcf5dffd2dac83a7fea067d27a49df0079 Mon Sep 17 00:00:00 2001 From: Radha Mohan Chintakuntla Date: Tue, 26 Mar 2024 11:45:14 -0700 Subject: octeontx2-af: Increase maximum BPID channels Any NIX interface type can have maximum 256 channels. So increased the backpressure ID count to 256 so that it can cover cn9k and cn10k SoCs that have different NIX interface types with varied maximum channels. Signed-off-by: Radha Mohan Chintakuntla Link: https://lore.kernel.org/r/20240326184514.1628284-1-radhac@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index eb2a20b5a0d0..3d801a1a4f70 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1213,10 +1213,8 @@ struct nix_bp_cfg_req { /* bpid_per_chan = 1 assigns separate bp id for each channel */ }; -/* PF can be mapped to either CGX or LBK interface, - * so maximum 64 channels are possible. - */ -#define NIX_MAX_BPID_CHAN 64 +/* Maximum channels any single NIX interface can have */ +#define NIX_MAX_BPID_CHAN 256 struct nix_bp_cfg_rsp { struct mbox_msghdr hdr; u16 chan_bpid[NIX_MAX_BPID_CHAN]; /* Channel and bpid mapping */ -- cgit v1.2.3 From 9046d581ed586f3c715357638ca12c0e84402002 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 Mar 2024 23:38:01 +0100 Subject: enetc: avoid truncating error message As clang points out, the error message in enetc_setup_xdp_prog() still does not fit in the buffer and will be truncated: drivers/net/ethernet/freescale/enetc/enetc.c:2771:3: error: 'snprintf' will always be truncated; specified size is 80, but format string expands to at least 87 [-Werror,-Wformat-truncation] Replace it with an even shorter message that should fit. Fixes: f968c56417f0 ("net: enetc: shorten enetc_setup_xdp_prog() error message to fit NETLINK_MAX_FMTMSG_LEN") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240326223825.4084412-3-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 9f07f4947b63..5c45f42232d3 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2769,7 +2769,7 @@ static int enetc_setup_xdp_prog(struct net_device *ndev, struct bpf_prog *prog, if (priv->min_num_stack_tx_queues + num_xdp_tx_queues > priv->num_tx_rings) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Reserving %d XDP TXQs does not leave a minimum of %d for stack (total %d)", + "Reserving %d XDP TXQs leaves under %d for stack (total %d)", num_xdp_tx_queues, priv->min_num_stack_tx_queues, priv->num_tx_rings); -- cgit v1.2.3 From 954fd908f177604d4cce77e2a88cc50b29bad5ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 Mar 2024 23:38:02 +0100 Subject: qed: avoid truncating work queue length clang complains that the temporary string for the name passed into alloc_workqueue() is too short for its contents: drivers/net/ethernet/qlogic/qed/qed_main.c:1218:3: error: 'snprintf' will always be truncated; specified size is 16, but format string expands to at least 18 [-Werror,-Wformat-truncation] There is no need for a temporary buffer, and the actual name of a workqueue is 32 bytes (WQ_NAME_LEN), so just use the interface as intended to avoid the truncation. Fixes: 59ccf86fe69a ("qed: Add driver infrastucture for handling mfw requests.") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240326223825.4084412-4-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_main.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 408da4312e01..17f284e9f06d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1205,7 +1205,6 @@ out: static int qed_slowpath_wq_start(struct qed_dev *cdev) { struct qed_hwfn *hwfn; - char name[NAME_SIZE]; int i; if (IS_VF(cdev)) @@ -1214,11 +1213,11 @@ static int qed_slowpath_wq_start(struct qed_dev *cdev) for_each_hwfn(cdev, i) { hwfn = &cdev->hwfns[i]; - snprintf(name, NAME_SIZE, "slowpath-%02x:%02x.%02x", - cdev->pdev->bus->number, - PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id); + hwfn->slowpath_wq = alloc_workqueue("slowpath-%02x:%02x.%02x", + 0, 0, cdev->pdev->bus->number, + PCI_SLOT(cdev->pdev->devfn), + hwfn->abs_pf_id); - hwfn->slowpath_wq = alloc_workqueue(name, 0, 0); if (!hwfn->slowpath_wq) { DP_NOTICE(hwfn, "Cannot create slowpath workqueue\n"); return -ENOMEM; -- cgit v1.2.3 From b324a960354b872431d25959ad384ab66a7116ec Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 Mar 2024 23:38:03 +0100 Subject: mlx5: avoid truncating error message clang warns that one error message is too long for its destination buffer: drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c:1876:4: error: 'snprintf' will always be truncated; specified size is 80, but format string expands to at least 94 [-Werror,-Wformat-truncation-non-kprintf] Reword it to be a bit shorter so it always fits. Fixes: 70f0302b3f20 ("net/mlx5: Bridge, implement mdb offload") Signed-off-by: Arnd Bergmann Reviewed-by: Subbaraya Sundeep Link: https://lore.kernel.org/r/20240326223825.4084412-5-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index 1b9bc32efd6f..c5ea1d1d2b03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -1874,7 +1874,7 @@ int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_ "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n", addr, vid, vport_num); NL_SET_ERR_MSG_FMT_MOD(extack, - "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n", + "Failed to lookup vlan metadata for MDB (MAC=%pM,vid=%u,vport=%u)\n", addr, vid, vport_num); return -EINVAL; } -- cgit v1.2.3 From 648bb2bf444fd557192d6647945d6d8c8f7062e1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 27 Mar 2024 18:48:07 +0100 Subject: net: microchip: encx24j600: drop driver owner assignment Core in spi_register_driver() already sets the .owner, so driver does not need to. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240327174810.519676-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/encx24j600.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index d7c8aa77ec75..cdc2872ace1b 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -1112,7 +1112,6 @@ MODULE_DEVICE_TABLE(spi, encx24j600_spi_id_table); static struct spi_driver encx24j600_spi_net_driver = { .driver = { .name = DRV_NAME, - .owner = THIS_MODULE, .bus = &spi_bus_type, }, .probe = encx24j600_spi_probe, -- cgit v1.2.3 From 1ab6fe64d220eaeb42a1130b3c31ea24c84cf5ad Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 28 Mar 2024 10:07:24 +0800 Subject: octeontx2-pf: remove unused variables req_hdr and rsp_hdr Clang static checker(scan-buid): drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c:503:2: warning: Value stored to 'rsp_hdr' is never read [deadcode.DeadStores] Remove these unused variables to save some space. Signed-off-by: Su Hui Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240328020723.4071539-1-suhui@nfschina.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index b40bd0e46751..5d8359b54098 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -450,7 +450,6 @@ static void otx2_pfvf_mbox_handler(struct work_struct *work) struct mbox_msghdr *msg = NULL; int offset, vf_idx, id, err; struct otx2_mbox_dev *mdev; - struct mbox_hdr *req_hdr; struct otx2_mbox *mbox; struct mbox *vf_mbox; struct otx2_nic *pf; @@ -461,9 +460,8 @@ static void otx2_pfvf_mbox_handler(struct work_struct *work) mbox = &pf->mbox_pfvf[0].mbox; mdev = &mbox->dev[vf_idx]; - req_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - offset = ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN); + offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); for (id = 0; id < vf_mbox->num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + mbox->rx_start + @@ -494,7 +492,6 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work) struct otx2_nic *pf = vf_mbox->pfvf; struct otx2_mbox_dev *mdev; int offset, id, vf_idx = 0; - struct mbox_hdr *rsp_hdr; struct mbox_msghdr *msg; struct otx2_mbox *mbox; @@ -502,8 +499,7 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work) mbox = &pf->mbox_pfvf[0].mbox_up; mdev = &mbox->dev[vf_idx]; - rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); - offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); + offset = mbox->rx_start + ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); for (id = 0; id < vf_mbox->up_num_msgs; id++) { msg = mdev->mbase + offset; -- cgit v1.2.3 From 06c2a5cd48fe50f24f8801dd10fcd2b6fd526566 Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Thu, 28 Mar 2024 16:37:13 +0530 Subject: net: axienet: Fix kernel doc warnings Add description of mdio enable, mdio disable and mdio wait functions. Add description of skb pointer in axidma_bd data structure. Remove 'phy_node' description in axienet local data structure since it is not a valid struct member. Correct description of struct axienet_option. Fix below kernel-doc warnings in drivers/net/ethernet/xilinx/: 1) xilinx_axienet_mdio.c:1: warning: no structured comments found 2) xilinx_axienet.h:379: warning: Function parameter or struct member 'skb' not described in 'axidma_bd' 3) xilinx_axienet.h:538: warning: Excess struct member 'phy_node' description in 'axienet_local' 4) xilinx_axienet.h:1002: warning: expecting prototype for struct axiethernet_option. Prototype was for struct axienet_option instead Signed-off-by: Suraj Gupta Reviewed-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/20240328110713.12885-1-suraj.gupta2@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet.h | 4 ++-- drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c | 23 ++++++++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 807ead678551..fa5500decc96 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -359,6 +359,7 @@ * @app2: MM2S/S2MM User Application Field 2. * @app3: MM2S/S2MM User Application Field 3. * @app4: MM2S/S2MM User Application Field 4. + * @skb: Pointer to SKB transferred using DMA */ struct axidma_bd { u32 next; /* Physical address of next buffer descriptor */ @@ -399,7 +400,6 @@ struct skbuf_dma_descriptor { * struct axienet_local - axienet private per device data * @ndev: Pointer for net_device to which it will be attached. * @dev: Pointer to device structure - * @phy_node: Pointer to device node structure * @phylink: Pointer to phylink instance * @phylink_config: phylink configuration settings * @pcs_phy: Reference to PCS/PMA PHY if used @@ -537,7 +537,7 @@ struct axienet_local { }; /** - * struct axiethernet_option - Used to set axi ethernet hardware options + * struct axienet_option - Used to set axi ethernet hardware options * @opt: Option to be set. * @reg: Register offset to be written for setting the option * @m_or: Mask to be ORed for setting the option in the register diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 2f07fde361aa..9ca2643c921e 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -20,7 +20,14 @@ #define DEFAULT_MDIO_FREQ 2500000 /* 2.5 MHz */ #define DEFAULT_HOST_CLOCK 150000000 /* 150 MHz */ -/* Wait till MDIO interface is ready to accept a new transaction.*/ +/** + * axienet_mdio_wait_until_ready - MDIO wait function + * @lp: Pointer to axienet local data structure. + * + * Return : 0 on success, Negative value on errors + * + * Wait till MDIO interface is ready to accept a new transaction. + */ static int axienet_mdio_wait_until_ready(struct axienet_local *lp) { u32 val; @@ -30,14 +37,24 @@ static int axienet_mdio_wait_until_ready(struct axienet_local *lp) 1, 20000); } -/* Enable the MDIO MDC. Called prior to a read/write operation */ +/** + * axienet_mdio_mdc_enable - MDIO MDC enable function + * @lp: Pointer to axienet local data structure. + * + * Enable the MDIO MDC. Called prior to a read/write operation + */ static void axienet_mdio_mdc_enable(struct axienet_local *lp) { axienet_iow(lp, XAE_MDIO_MC_OFFSET, ((u32)lp->mii_clk_div | XAE_MDIO_MC_MDIOEN_MASK)); } -/* Disable the MDIO MDC. Called after a read/write operation*/ +/** + * axienet_mdio_mdc_disable - MDIO MDC disable function + * @lp: Pointer to axienet local data structure. + * + * Disable the MDIO MDC. Called after a read/write operation + */ static void axienet_mdio_mdc_disable(struct axienet_local *lp) { u32 mc_reg; -- cgit v1.2.3 From a5535e5336943b33689f558199366102387b7bbf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 28 Mar 2024 15:30:46 +0100 Subject: mlx5: stop warning for 64KB pages When building with 64KB pages, clang points out that xsk->chunk_size can never be PAGE_SIZE: drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c:19:22: error: result of comparison of constant 65536 with expression of type 'u16' (aka 'unsigned short') is always false [-Werror,-Wtautological-constant-out-of-range-compare] if (xsk->chunk_size > PAGE_SIZE || ~~~~~~~~~~~~~~~ ^ ~~~~~~~~~ In older versions of this code, using PAGE_SIZE was the only possibility, so this would have never worked on 64KB page kernels, but the patch apparently did not address this case completely. As Maxim Mikityanskiy suggested, 64KB chunks are really not all that useful, so just shut up the warning by adding a cast. Fixes: 282c0c798f8e ("net/mlx5e: Allow XSK frames smaller than a page") Link: https://lore.kernel.org/netdev/20211013150232.2942146-1-arnd@kernel.org/ Link: https://lore.kernel.org/lkml/a7b27541-0ebb-4f2d-bd06-270a4d404613@app.fastmail.com/ Signed-off-by: Arnd Bergmann Acked-by: Maxim Mikityanskiy Reviewed-by: Justin Stitt Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20240328143051.1069575-9-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 06592b9f0424..9240cfe25d10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -28,8 +28,10 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5_core_dev *mdev) { - /* AF_XDP doesn't support frames larger than PAGE_SIZE. */ - if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) { + /* AF_XDP doesn't support frames larger than PAGE_SIZE, + * and xsk->chunk_size is limited to 65535 bytes. + */ + if ((size_t)xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) { mlx5_core_err(mdev, "XSK chunk size %u out of bounds [%u, %lu]\n", xsk->chunk_size, MLX5E_MIN_XSK_CHUNK_SIZE, PAGE_SIZE); return false; -- cgit v1.2.3 From e8058a49e67fe7bc7e4a0308851a3ca3a6d2e45d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Mar 2024 20:31:45 +0100 Subject: netlink: introduce type-checking attribute iteration There are, especially with multi-attr arrays, many cases of needing to iterate all attributes of a specific type in a netlink message or a nested attribute. Add specific macros to support that case. Also convert many instances using this spatch: @@ iterator nla_for_each_attr; iterator name nla_for_each_attr_type; identifier nla; expression head, len, rem; expression ATTR; type T; identifier x; @@ -nla_for_each_attr(nla, head, len, rem) +nla_for_each_attr_type(nla, ATTR, head, len, rem) { <... T x; ...> -if (nla_type(nla) == ATTR) { ... -} } @@ identifier nla; iterator nla_for_each_nested; iterator name nla_for_each_nested_type; expression attr, rem; expression ATTR; type T; identifier x; @@ -nla_for_each_nested(nla, attr, rem) +nla_for_each_nested_type(nla, ATTR, attr, rem) { <... T x; ...> -if (nla_type(nla) == ATTR) { ... -} } @@ iterator nla_for_each_attr; iterator name nla_for_each_attr_type; identifier nla; expression head, len, rem; expression ATTR; type T; identifier x; @@ -nla_for_each_attr(nla, head, len, rem) +nla_for_each_attr_type(nla, ATTR, head, len, rem) { <... T x; ...> -if (nla_type(nla) != ATTR) continue; ... } @@ identifier nla; iterator nla_for_each_nested; iterator name nla_for_each_nested_type; expression attr, rem; expression ATTR; type T; identifier x; @@ -nla_for_each_nested(nla, attr, rem) +nla_for_each_nested_type(nla, ATTR, attr, rem) { <... T x; ...> -if (nla_type(nla) != ATTR) continue; ... } Although I had to undo one bad change this made, and I also adjusted some other code for whitespace and to use direct variable initialization now. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20240328203144.b5a6c895fb80.I1869b44767379f204998ff44dd239803f39c23e0@changeid Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +--- drivers/net/ethernet/emulex/benet/be_main.c | 5 +--- drivers/net/ethernet/intel/i40e/i40e_main.c | 8 ++----- drivers/net/ethernet/intel/ice/ice_main.c | 7 ++---- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 11 +++------ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +--- .../net/ethernet/netronome/nfp/nfp_net_common.c | 5 +--- include/net/netlink.h | 27 ++++++++++++++++++++++ net/8021q/vlan_netlink.c | 10 ++++---- net/core/bpf_sk_storage.c | 23 ++++++++---------- net/core/rtnetlink.c | 15 ++++++------ net/devlink/dev.c | 12 ++++------ net/sched/sch_mqprio.c | 6 ++--- net/sched/sch_taprio.c | 5 +--- 14 files changed, 65 insertions(+), 79 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 388e80bf91f5..b4db4b1aaffb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -14581,12 +14581,9 @@ static int bnxt_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { u16 mode; - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - mode = nla_get_u16(attr); if (mode == bp->br_mode) break; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index ad862ed7888a..a8596ebcdfd6 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4982,10 +4982,7 @@ static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { mode = nla_get_u16(attr); if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f86578857e8a..e427e65af205 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13108,13 +13108,9 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - __u16 mode; + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { + __u16 mode = nla_get_u16(attr); - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - - mode = nla_get_u16(attr); if ((mode != BRIDGE_MODE_VEPA) && (mode != BRIDGE_MODE_VEB)) return -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f2b7d6ca8805..618570f23580 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7993,12 +7993,9 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - __u16 mode; + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { + __u16 mode = nla_get_u16(attr); - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - mode = nla_get_u16(attr); if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB) return -EINVAL; /* Continue if bridge mode is not being flipped */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f985252c8c8d..ed05af665466 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10061,15 +10061,10 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - int status; - __u16 mode; + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { + __u16 mode = nla_get_u16(attr); + int status = ixgbe_configure_bridge_mode(adapter, mode); - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - - mode = nla_get_u16(attr); - status = ixgbe_configure_bridge_mode(adapter, mode); if (status) return status; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 91848eae4565..81e1c1e401f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4950,10 +4950,7 @@ static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { mode = nla_get_u16(attr); if (mode > BRIDGE_MODE_VEPA) return -EINVAL; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index f28e769e6fda..997cc4fcffdb 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2289,10 +2289,7 @@ static int nfp_net_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, if (!br_spec) return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { new_ctrl = nn->dp.ctrl; mode = nla_get_u16(attr); if (mode == BRIDGE_MODE_VEPA) diff --git a/include/net/netlink.h b/include/net/netlink.h index c19ff921b661..1d2bbcc50212 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -157,7 +157,11 @@ * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attributes * nla_for_each_attr() loop over all attributes + * nla_for_each_attr_type() loop over all attributes with the + * given type * nla_for_each_nested() loop over the nested attributes + * nla_for_each_nested_type() loop over the nested attributes with + * the given type *========================================================================= */ @@ -2070,6 +2074,18 @@ static inline int nla_total_size_64bit(int payload) nla_ok(pos, rem); \ pos = nla_next(pos, &(rem))) +/** + * nla_for_each_attr_type - iterate over a stream of attributes + * @pos: loop counter, set to current attribute + * @type: required attribute type for @pos + * @head: head of attribute stream + * @len: length of attribute stream + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_attr_type(pos, type, head, len, rem) \ + nla_for_each_attr(pos, head, len, rem) \ + if (nla_type(pos) == type) + /** * nla_for_each_nested - iterate over nested attributes * @pos: loop counter, set to current attribute @@ -2079,6 +2095,17 @@ static inline int nla_total_size_64bit(int payload) #define nla_for_each_nested(pos, nla, rem) \ nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem) +/** + * nla_for_each_nested_type - iterate over nested attributes + * @pos: loop counter, set to current attribute + * @type: required attribute type for @pos + * @nla: attribute containing the nested attributes + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_nested_type(pos, type, nla, rem) \ + nla_for_each_nested(pos, nla, rem) \ + if (nla_type(pos) == type) + /** * nla_is_last - Test if attribute is last in stream * @nla: attribute to test diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index a3b68243fd4b..cf5219df7903 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -117,17 +117,15 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], return err; } if (data[IFLA_VLAN_INGRESS_QOS]) { - nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { - if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) - continue; + nla_for_each_nested_type(attr, IFLA_VLAN_QOS_MAPPING, + data[IFLA_VLAN_INGRESS_QOS], rem) { m = nla_data(attr); vlan_dev_set_ingress_priority(dev, m->to, m->from); } } if (data[IFLA_VLAN_EGRESS_QOS]) { - nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { - if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) - continue; + nla_for_each_nested_type(attr, IFLA_VLAN_QOS_MAPPING, + data[IFLA_VLAN_EGRESS_QOS], rem) { m = nla_data(attr); err = vlan_dev_set_egress_priority(dev, m->from, m->to); if (err) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 6c4d90b24d46..bc01b3aa6b0f 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -496,27 +496,22 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) if (!bpf_capable()) return ERR_PTR(-EPERM); - nla_for_each_nested(nla, nla_stgs, rem) { - if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) { - if (nla_len(nla) != sizeof(u32)) - return ERR_PTR(-EINVAL); - nr_maps++; - } + nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD, + nla_stgs, rem) { + if (nla_len(nla) != sizeof(u32)) + return ERR_PTR(-EINVAL); + nr_maps++; } diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL); if (!diag) return ERR_PTR(-ENOMEM); - nla_for_each_nested(nla, nla_stgs, rem) { - struct bpf_map *map; - int map_fd; - - if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD) - continue; + nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD, + nla_stgs, rem) { + int map_fd = nla_get_u32(nla); + struct bpf_map *map = bpf_map_get(map_fd); - map_fd = nla_get_u32(nla); - map = bpf_map_get(map_fd); if (IS_ERR(map)) { err = PTR_ERR(map); goto err_free; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a3d7847ce69d..283e42f48af6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5245,15 +5245,14 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { - nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { - if (nla_len(attr) < sizeof(flags)) - return -EINVAL; + nla_for_each_nested_type(attr, IFLA_BRIDGE_FLAGS, br_spec, + rem) { + if (nla_len(attr) < sizeof(flags)) + return -EINVAL; - have_flags = true; - flags = nla_get_u16(attr); - break; - } + have_flags = true; + flags = nla_get_u16(attr); + break; } } diff --git a/net/devlink/dev.c b/net/devlink/dev.c index 19dbf540748a..c609deb42e88 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -1202,17 +1202,13 @@ static void __devlink_compat_running_version(struct devlink *devlink, if (err) goto free_msg; - nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) { + nla_for_each_attr_type(nlattr, DEVLINK_ATTR_INFO_VERSION_RUNNING, + (void *)msg->data, msg->len, rem) { const struct nlattr *kv; int rem_kv; - if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING) - continue; - - nla_for_each_nested(kv, nlattr, rem_kv) { - if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE) - continue; - + nla_for_each_nested_type(kv, DEVLINK_ATTR_INFO_VERSION_VALUE, + nlattr, rem_kv) { strlcat(buf, nla_data(kv), len); strlcat(buf, " ", len); } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 225353fbb3f1..51d4013b6121 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -215,10 +215,8 @@ static int mqprio_parse_tc_entries(struct Qdisc *sch, struct nlattr *nlattr_opt, for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) fp[tc] = priv->fp[tc]; - nla_for_each_attr(n, nlattr_opt, nlattr_opt_len, rem) { - if (nla_type(n) != TCA_MQPRIO_TC_ENTRY) - continue; - + nla_for_each_attr_type(n, TCA_MQPRIO_TC_ENTRY, nlattr_opt, + nlattr_opt_len, rem) { err = mqprio_parse_tc_entry(fp, n, &seen_tcs, extack); if (err) goto out; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index a0d54b422186..1ab17e8a7260 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1752,10 +1752,7 @@ static int taprio_parse_tc_entries(struct Qdisc *sch, fp[tc] = q->fp[tc]; } - nla_for_each_nested(n, opt, rem) { - if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY) - continue; - + nla_for_each_nested_type(n, TCA_TAPRIO_ATTR_TC_ENTRY, opt, rem) { err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs, extack); if (err) -- cgit v1.2.3 From 117aef12a7b1b797bce9f66b156c65eab850b5b5 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 27 Mar 2024 16:23:52 +0100 Subject: ip_tunnel: use a separate struct to store tunnel params in the kernel Unlike IPv6 tunnels which use purely-kernel __ip6_tnl_parm structure to store params inside the kernel, IPv4 tunnel code uses the same ip_tunnel_parm which is being used to talk with the userspace. This makes it difficult to alter or add any fields or use a different format for whatever data. Define struct ip_tunnel_parm_kern, a 1:1 copy of ip_tunnel_parm for now, and use it throughout the code. Define the pieces, where the copy user <-> kernel happens, as standalone functions, and copy the data there field-by-field, so that the kernel-side structure could be easily modified later on and the users wouldn't have to care about this. Reviewed-by: Simon Horman Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_ipip.c | 30 ++++++----- .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h | 2 +- .../net/ethernet/mellanox/mlxsw/spectrum_span.c | 4 +- include/linux/netdevice.h | 7 +-- include/net/ip_tunnels.h | 25 +++++++-- net/ipv4/ip_gre.c | 17 +++--- net/ipv4/ip_tunnel.c | 63 +++++++++++++++++----- net/ipv4/ip_tunnel_core.c | 2 +- net/ipv4/ip_vti.c | 12 ++--- net/ipv4/ipip.c | 12 ++--- net/ipv4/ipmr.c | 2 +- net/ipv6/addrconf.c | 3 +- net/ipv6/sit.c | 33 ++++++------ 13 files changed, 137 insertions(+), 75 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 3340b4a694c3..d67df358a52f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -8,7 +8,7 @@ #include "spectrum_ipip.h" #include "reg.h" -struct ip_tunnel_parm +struct ip_tunnel_parm_kern mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev) { struct ip_tunnel *tun = netdev_priv(ol_dev); @@ -24,7 +24,8 @@ mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev) return tun->parms; } -static bool mlxsw_sp_ipip_parms4_has_ikey(const struct ip_tunnel_parm *parms) +static bool +mlxsw_sp_ipip_parms4_has_ikey(const struct ip_tunnel_parm_kern *parms) { return !!(parms->i_flags & TUNNEL_KEY); } @@ -34,7 +35,8 @@ static bool mlxsw_sp_ipip_parms6_has_ikey(const struct __ip6_tnl_parm *parms) return !!(parms->i_flags & TUNNEL_KEY); } -static bool mlxsw_sp_ipip_parms4_has_okey(const struct ip_tunnel_parm *parms) +static bool +mlxsw_sp_ipip_parms4_has_okey(const struct ip_tunnel_parm_kern *parms) { return !!(parms->o_flags & TUNNEL_KEY); } @@ -44,7 +46,7 @@ static bool mlxsw_sp_ipip_parms6_has_okey(const struct __ip6_tnl_parm *parms) return !!(parms->o_flags & TUNNEL_KEY); } -static u32 mlxsw_sp_ipip_parms4_ikey(const struct ip_tunnel_parm *parms) +static u32 mlxsw_sp_ipip_parms4_ikey(const struct ip_tunnel_parm_kern *parms) { return mlxsw_sp_ipip_parms4_has_ikey(parms) ? be32_to_cpu(parms->i_key) : 0; @@ -56,7 +58,7 @@ static u32 mlxsw_sp_ipip_parms6_ikey(const struct __ip6_tnl_parm *parms) be32_to_cpu(parms->i_key) : 0; } -static u32 mlxsw_sp_ipip_parms4_okey(const struct ip_tunnel_parm *parms) +static u32 mlxsw_sp_ipip_parms4_okey(const struct ip_tunnel_parm_kern *parms) { return mlxsw_sp_ipip_parms4_has_okey(parms) ? be32_to_cpu(parms->o_key) : 0; @@ -69,7 +71,7 @@ static u32 mlxsw_sp_ipip_parms6_okey(const struct __ip6_tnl_parm *parms) } static union mlxsw_sp_l3addr -mlxsw_sp_ipip_parms4_saddr(const struct ip_tunnel_parm *parms) +mlxsw_sp_ipip_parms4_saddr(const struct ip_tunnel_parm_kern *parms) { return (union mlxsw_sp_l3addr) { .addr4 = parms->iph.saddr }; } @@ -81,7 +83,7 @@ mlxsw_sp_ipip_parms6_saddr(const struct __ip6_tnl_parm *parms) } static union mlxsw_sp_l3addr -mlxsw_sp_ipip_parms4_daddr(const struct ip_tunnel_parm *parms) +mlxsw_sp_ipip_parms4_daddr(const struct ip_tunnel_parm_kern *parms) { return (union mlxsw_sp_l3addr) { .addr4 = parms->iph.daddr }; } @@ -96,7 +98,7 @@ union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev) { - struct ip_tunnel_parm parms4; + struct ip_tunnel_parm_kern parms4; struct __ip6_tnl_parm parms6; switch (proto) { @@ -115,7 +117,9 @@ mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) { - struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); + struct ip_tunnel_parm_kern parms4; + + parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); return mlxsw_sp_ipip_parms4_daddr(&parms4).addr4; } @@ -124,7 +128,7 @@ static union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev) { - struct ip_tunnel_parm parms4; + struct ip_tunnel_parm_kern parms4; struct __ip6_tnl_parm parms6; switch (proto) { @@ -150,7 +154,7 @@ bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr) static struct mlxsw_sp_ipip_parms mlxsw_sp_ipip_netdev_parms_init_gre4(const struct net_device *ol_dev) { - struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev); + struct ip_tunnel_parm_kern parms = mlxsw_sp_ipip_netdev_parms4(ol_dev); return (struct mlxsw_sp_ipip_parms) { .proto = MLXSW_SP_L3_PROTO_IPV4, @@ -187,8 +191,8 @@ mlxsw_sp_ipip_decap_config_gre4(struct mlxsw_sp *mlxsw_sp, { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb); + struct ip_tunnel_parm_kern parms; char rtdp_pl[MLXSW_REG_RTDP_LEN]; - struct ip_tunnel_parm parms; unsigned int type_check; bool has_ikey; u32 daddr4; @@ -252,7 +256,7 @@ static struct mlxsw_sp_rif_ipip_lb_config mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp, const struct net_device *ol_dev) { - struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev); + struct ip_tunnel_parm_kern parms = mlxsw_sp_ipip_netdev_parms4(ol_dev); enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(&parms) ? diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index a35f009da561..a66173779641 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -9,7 +9,7 @@ #include #include -struct ip_tunnel_parm +struct ip_tunnel_parm_kern mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev); struct __ip6_tnl_parm mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index af50ff9e5f26..f0cbc6b9b041 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -413,8 +413,8 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev, __be32 *saddrp, __be32 *daddrp) { struct ip_tunnel *tun = netdev_priv(to_dev); + struct ip_tunnel_parm_kern parms; struct net_device *dev = NULL; - struct ip_tunnel_parm parms; struct rtable *rt = NULL; struct flowi4 fl4; @@ -451,7 +451,7 @@ mlxsw_sp_span_entry_gretap4_parms(struct mlxsw_sp *mlxsw_sp, const struct net_device *to_dev, struct mlxsw_sp_span_parms *sparmsp) { - struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev); + struct ip_tunnel_parm_kern tparm = mlxsw_sp_ipip_netdev_parms4(to_dev); union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr }; union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr }; bool inherit_tos = tparm.iph.tos & 0x1; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e41d30ebaca6..55c7cf9404a4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -59,7 +59,7 @@ struct ethtool_ops; struct kernel_hwtstamp_config; struct phy_device; struct dsa_port; -struct ip_tunnel_parm; +struct ip_tunnel_parm_kern; struct macsec_context; struct macsec_ops; struct netdev_name_node; @@ -1327,7 +1327,7 @@ struct netdev_net_notifier { * queue id bound to an AF_XDP socket. The flags field specifies if * only RX, only Tx, or both should be woken up using the flags * XDP_WAKEUP_RX and XDP_WAKEUP_TX. - * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, + * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm_kern *p, * int cmd); * Add, change, delete or get information on an IPv4 tunnel. * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev); @@ -1583,7 +1583,8 @@ struct net_device_ops { int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); int (*ndo_tunnel_ctl)(struct net_device *dev, - struct ip_tunnel_parm *p, int cmd); + struct ip_tunnel_parm_kern *p, + int cmd); struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 5cd64bb2104d..20f0319ab149 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -110,6 +110,17 @@ struct ip_tunnel_prl_entry { struct metadata_dst; +/* Kernel-side copy of ip_tunnel_parm */ +struct ip_tunnel_parm_kern { + char name[IFNAMSIZ]; + int link; + __be16 i_flags; + __be16 o_flags; + __be32 i_key; + __be32 o_key; + struct iphdr iph; +}; + struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; @@ -136,7 +147,7 @@ struct ip_tunnel { struct dst_cache dst_cache; - struct ip_tunnel_parm parms; + struct ip_tunnel_parm_kern parms; int mlink; int encap_hlen; /* Encap header length (FOU,GUE) */ @@ -291,7 +302,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const u8 proto, int tunnel_hlen); -int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); +int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, + int cmd); +bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp, + const void __user *data); +bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp); int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); @@ -307,16 +322,16 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p, __u32 fwmark); + struct ip_tunnel_parm_kern *p, __u32 fwmark); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p, __u32 fwmark); + struct ip_tunnel_parm_kern *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *encap); void ip_tunnel_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms); + struct ip_tunnel_parm_kern *parms); extern const struct header_ops ip_tunnel_header_ops; __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 7b16c211b904..8853c065a985 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -785,7 +785,8 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) } } -static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, +static int ipgre_tunnel_ctl(struct net_device *dev, + struct ip_tunnel_parm_kern *p, int cmd) { int err; @@ -1131,7 +1132,7 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[], static int ipgre_netlink_parms(struct net_device *dev, struct nlattr *data[], struct nlattr *tb[], - struct ip_tunnel_parm *parms, + struct ip_tunnel_parm_kern *parms, __u32 *fwmark) { struct ip_tunnel *t = netdev_priv(dev); @@ -1198,7 +1199,7 @@ static int ipgre_netlink_parms(struct net_device *dev, static int erspan_netlink_parms(struct net_device *dev, struct nlattr *data[], struct nlattr *tb[], - struct ip_tunnel_parm *parms, + struct ip_tunnel_parm_kern *parms, __u32 *fwmark) { struct ip_tunnel *t = netdev_priv(dev); @@ -1357,7 +1358,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip_tunnel_parm p; + struct ip_tunnel_parm_kern p; __u32 fwmark = 0; int err; @@ -1375,7 +1376,7 @@ static int erspan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip_tunnel_parm p; + struct ip_tunnel_parm_kern p; __u32 fwmark = 0; int err; @@ -1394,8 +1395,8 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_parm_kern p; __u32 fwmark = t->fwmark; - struct ip_tunnel_parm p; int err; err = ipgre_newlink_encap_setup(dev, data); @@ -1423,8 +1424,8 @@ static int erspan_changelink(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_parm_kern p; __u32 fwmark = t->fwmark; - struct ip_tunnel_parm p; int err; err = ipgre_newlink_encap_setup(dev, data); @@ -1496,7 +1497,7 @@ static size_t ipgre_get_size(const struct net_device *dev) static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); - struct ip_tunnel_parm *p = &t->parms; + struct ip_tunnel_parm_kern *p = &t->parms; __be16 o_flags = p->o_flags; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 1b8d8ff9a237..d10f41f81463 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -56,7 +56,7 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) IP_TNL_HASH_BITS); } -static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, +static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p, __be16 flags, __be32 key) { if (p->i_flags & TUNNEL_KEY) { @@ -171,7 +171,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, EXPORT_SYMBOL_GPL(ip_tunnel_lookup); static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, - struct ip_tunnel_parm *parms) + struct ip_tunnel_parm_kern *parms) { unsigned int h; __be32 remote; @@ -206,7 +206,7 @@ static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t) } static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, - struct ip_tunnel_parm *parms, + struct ip_tunnel_parm_kern *parms, int type) { __be32 remote = parms->iph.daddr; @@ -230,7 +230,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, static struct net_device *__ip_tunnel_create(struct net *net, const struct rtnl_link_ops *ops, - struct ip_tunnel_parm *parms) + struct ip_tunnel_parm_kern *parms) { int err; struct ip_tunnel *tunnel; @@ -326,7 +326,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) static struct ip_tunnel *ip_tunnel_create(struct net *net, struct ip_tunnel_net *itn, - struct ip_tunnel_parm *parms) + struct ip_tunnel_parm_kern *parms) { struct ip_tunnel *nt; struct net_device *dev; @@ -871,7 +871,7 @@ EXPORT_SYMBOL_GPL(ip_tunnel_xmit); static void ip_tunnel_update(struct ip_tunnel_net *itn, struct ip_tunnel *t, struct net_device *dev, - struct ip_tunnel_parm *p, + struct ip_tunnel_parm_kern *p, bool set_mtu, __u32 fwmark) { @@ -903,7 +903,8 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, netdev_state_change(dev); } -int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, + int cmd) { int err = 0; struct ip_tunnel *t = netdev_priv(dev); @@ -1005,16 +1006,52 @@ done: } EXPORT_SYMBOL_GPL(ip_tunnel_ctl); +bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp, + const void __user *data) +{ + struct ip_tunnel_parm p; + + if (copy_from_user(&p, data, sizeof(p))) + return false; + + strscpy(kp->name, p.name); + kp->link = p.link; + kp->i_flags = p.i_flags; + kp->o_flags = p.o_flags; + kp->i_key = p.i_key; + kp->o_key = p.o_key; + memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph))); + + return true; +} +EXPORT_SYMBOL_GPL(ip_tunnel_parm_from_user); + +bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp) +{ + struct ip_tunnel_parm p; + + strscpy(p.name, kp->name); + p.link = kp->link; + p.i_flags = kp->i_flags; + p.o_flags = kp->o_flags; + p.i_key = kp->i_key; + p.o_key = kp->o_key; + memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph))); + + return !copy_to_user(data, &p, sizeof(p)); +} +EXPORT_SYMBOL_GPL(ip_tunnel_parm_to_user); + int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { - struct ip_tunnel_parm p; + struct ip_tunnel_parm_kern p; int err; - if (copy_from_user(&p, data, sizeof(p))) + if (!ip_tunnel_parm_from_user(&p, data)) return -EFAULT; err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd); - if (!err && copy_to_user(data, &p, sizeof(p))) + if (!err && !ip_tunnel_parm_to_user(data, &p)) return -EFAULT; return err; } @@ -1093,7 +1130,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname) { struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); - struct ip_tunnel_parm parms; + struct ip_tunnel_parm_kern parms; unsigned int i; itn->rtnl_link_ops = ops; @@ -1171,7 +1208,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p, __u32 fwmark) + struct ip_tunnel_parm_kern *p, __u32 fwmark) { struct ip_tunnel *nt; struct net *net = dev_net(dev); @@ -1225,7 +1262,7 @@ err_register_netdevice: EXPORT_SYMBOL_GPL(ip_tunnel_newlink); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p, __u32 fwmark) + struct ip_tunnel_parm_kern *p, __u32 fwmark) { struct ip_tunnel *t; struct ip_tunnel *tunnel = netdev_priv(dev); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 80ccd6661aa3..98f136b07191 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -1116,7 +1116,7 @@ bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], EXPORT_SYMBOL_GPL(ip_tunnel_netlink_encap_parms); void ip_tunnel_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms) + struct ip_tunnel_parm_kern *parms) { if (data[IFLA_IPTUN_LINK]) parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index ee587adb169f..7e595f619615 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -167,7 +167,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct ip_tunnel_parm *parms = &tunnel->parms; + struct ip_tunnel_parm_kern *parms = &tunnel->parms; struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; /* Device to other host */ int pkt_len = skb->len; @@ -373,7 +373,7 @@ static int vti4_err(struct sk_buff *skb, u32 info) } static int -vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) { int err = 0; @@ -531,7 +531,7 @@ static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], } static void vti_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms, + struct ip_tunnel_parm_kern *parms, __u32 *fwmark) { memset(parms, 0, sizeof(*parms)); @@ -566,7 +566,7 @@ static int vti_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip_tunnel_parm parms; + struct ip_tunnel_parm_kern parms; __u32 fwmark = 0; vti_netlink_parms(data, &parms, &fwmark); @@ -578,8 +578,8 @@ static int vti_changelink(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_parm_kern p; __u32 fwmark = t->fwmark; - struct ip_tunnel_parm p; vti_netlink_parms(data, &p, &fwmark); return ip_tunnel_changelink(dev, tb, &p, fwmark); @@ -606,7 +606,7 @@ static size_t vti_get_size(const struct net_device *dev) static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); - struct ip_tunnel_parm *p = &t->parms; + struct ip_tunnel_parm_kern *p = &t->parms; if (nla_put_u32(skb, IFLA_VTI_LINK, p->link) || nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key) || diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index f2696eaadbe6..db960cd08a3b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -330,7 +330,7 @@ static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto) } static int -ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) { if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { if (p->iph.version != 4 || @@ -405,8 +405,8 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], } static void ipip_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms, bool *collect_md, - __u32 *fwmark) + struct ip_tunnel_parm_kern *parms, + bool *collect_md, __u32 *fwmark) { memset(parms, 0, sizeof(*parms)); @@ -432,8 +432,8 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev, struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); - struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; + struct ip_tunnel_parm_kern p; __u32 fwmark = 0; if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { @@ -452,8 +452,8 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); - struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; + struct ip_tunnel_parm_kern p; bool collect_md; __u32 fwmark = t->fwmark; @@ -510,7 +510,7 @@ static size_t ipip_get_size(const struct net_device *dev) static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct ip_tunnel_parm *parm = &tunnel->parms; + struct ip_tunnel_parm_kern *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index fd5c01c8489f..6c750bd13dd8 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -441,7 +441,7 @@ static bool ipmr_init_vif_indev(const struct net_device *dev) static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { struct net_device *tunnel_dev, *new_dev; - struct ip_tunnel_parm p = { }; + struct ip_tunnel_parm_kern p = { }; int err; tunnel_dev = __dev_get_by_name(net, "tunl0"); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 92db9b474f2b..82ad6b8fa007 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -63,6 +63,7 @@ #include #include +#include #include #include #include @@ -2917,7 +2918,7 @@ put: static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev, struct in6_ifreq *ireq) { - struct ip_tunnel_parm p = { }; + struct ip_tunnel_parm_kern p = { }; int err; if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4)) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 655c9b1a19b8..894e6a4c4be3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -132,8 +132,8 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, return NULL; } -static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn, - struct ip_tunnel_parm *parms) +static struct ip_tunnel __rcu ** +__ipip6_bucket(struct sit_net *sitn, struct ip_tunnel_parm_kern *parms) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; @@ -226,7 +226,8 @@ out: } static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, - struct ip_tunnel_parm *parms, int create) + struct ip_tunnel_parm_kern *parms, + int create) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; @@ -1135,7 +1136,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) dev->needed_headroom = t_hlen + hlen; } -static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, +static void ipip6_tunnel_update(struct ip_tunnel *t, + struct ip_tunnel_parm_kern *p, __u32 fwmark) { struct net *net = t->net; @@ -1196,11 +1198,11 @@ static int ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data) { struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_parm_kern p; struct ip_tunnel_6rd ip6rd; - struct ip_tunnel_parm p; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { - if (copy_from_user(&p, data, sizeof(p))) + if (!ip_tunnel_parm_from_user(&p, data)) return -EFAULT; t = ipip6_tunnel_locate(t->net, &p, 0); } @@ -1251,7 +1253,7 @@ static bool ipip6_valid_ip_proto(u8 ipproto) } static int -__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p) +__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm_kern *p) { if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1268,7 +1270,7 @@ __ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p) } static int -ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p) +ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm_kern *p) { struct ip_tunnel *t = netdev_priv(dev); @@ -1281,7 +1283,7 @@ ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p) } static int -ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p) +ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm_kern *p) { struct ip_tunnel *t = netdev_priv(dev); int err; @@ -1297,7 +1299,7 @@ ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p) } static int -ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p) +ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm_kern *p) { struct ip_tunnel *t = netdev_priv(dev); int err; @@ -1328,7 +1330,7 @@ ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p) } static int -ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p) +ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm_kern *p) { struct ip_tunnel *t = netdev_priv(dev); @@ -1348,7 +1350,8 @@ ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p) } static int -ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, + int cmd) { switch (cmd) { case SIOCGETTUNNEL: @@ -1490,7 +1493,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[], } static void ipip6_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms, + struct ip_tunnel_parm_kern *parms, __u32 *fwmark) { memset(parms, 0, sizeof(*parms)); @@ -1599,8 +1602,8 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); - struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; + struct ip_tunnel_parm_kern p; struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); #ifdef CONFIG_IPV6_SIT_6RD @@ -1687,7 +1690,7 @@ static size_t ipip6_get_size(const struct net_device *dev) static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct ip_tunnel_parm *parm = &tunnel->parms; + struct ip_tunnel_parm_kern *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || -- cgit v1.2.3 From 5832c4a77d6931cebf9ba737129ae8f14b66ee1d Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 27 Mar 2024 16:23:53 +0100 Subject: ip_tunnel: convert __be16 tunnel flags to bitmaps Historically, tunnel flags like TUNNEL_CSUM or TUNNEL_ERSPAN_OPT have been defined as __be16. Now all of those 16 bits are occupied and there's no more free space for new flags. It can't be simply switched to a bigger container with no adjustments to the values, since it's an explicit Endian storage, and on LE systems (__be16)0x0001 equals to (__be64)0x0001000000000000. We could probably define new 64-bit flags depending on the Endianness, i.e. (__be64)0x0001 on BE and (__be64)0x00010000... on LE, but that would introduce an Endianness dependency and spawn a ton of Sparse warnings. To mitigate them, all of those places which were adjusted with this change would be touched anyway, so why not define stuff properly if there's no choice. Define IP_TUNNEL_*_BIT counterparts as a bit number instead of the value already coded and a fistful of <16 <-> bitmap> converters and helpers. The two flags which have a different bit position are SIT_ISATAP_BIT and VTI_ISVTI_BIT, as they were defined not as __cpu_to_be16(), but as (__force __be16), i.e. had different positions on LE and BE. Now they both have strongly defined places. Change all __be16 fields which were used to store those flags, to IP_TUNNEL_DECLARE_FLAGS() -> DECLARE_BITMAP(__IP_TUNNEL_FLAG_NUM) -> unsigned long[1] for now, and replace all TUNNEL_* occurrences to their bitmap counterparts. Use the converters in the places which talk to the userspace, hardware (NFP) or other hosts (GRE header). The rest must explicitly use the new flags only. This must be done at once, otherwise there will be too many conversions throughout the code in the intermediate commits. Finally, disable the old __be16 flags for use in the kernel code (except for the two 'irregular' flags mentioned above), to prevent any accidental (mis)use of them. For the userspace, nothing is changed, only additions were made. Most noticeable bloat-o-meter difference (.text): vmlinux: 307/-1 (306) gre.ko: 62/0 (62) ip_gre.ko: 941/-217 (724) [*] ip_tunnel.ko: 390/-900 (-510) [**] ip_vti.ko: 138/0 (138) ip6_gre.ko: 534/-18 (516) [*] ip6_tunnel.ko: 118/-10 (108) [*] gre_flags_to_tnl_flags() grew, but still is inlined [**] ip_tunnel_find() got uninlined, hence such decrease The average code size increase in non-extreme case is 100-200 bytes per module, mostly due to sizeof(long) > sizeof(__be16), as %__IP_TUNNEL_FLAG_NUM is less than %BITS_PER_LONG and the compilers are able to expand the majority of bitmap_*() calls here into direct operations on scalars. Reviewed-by: Simon Horman Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- drivers/net/bareudp.c | 19 ++- .../net/ethernet/mellanox/mlx5/core/en/tc_tun.h | 2 +- .../ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 6 +- .../ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c | 12 +- .../ethernet/mellanox/mlx5/core/en/tc_tun_gre.c | 8 +- .../ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c | 9 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 16 ++- .../net/ethernet/mellanox/mlxsw/spectrum_ipip.c | 26 +++-- .../net/ethernet/mellanox/mlxsw/spectrum_span.c | 6 +- drivers/net/ethernet/netronome/nfp/flower/action.c | 27 ++++- drivers/net/geneve.c | 44 ++++--- drivers/net/vxlan/vxlan_core.c | 14 ++- include/net/dst_metadata.h | 10 +- include/net/flow_dissector.h | 2 +- include/net/gre.h | 70 ++++++------ include/net/ip6_tunnel.h | 4 +- include/net/ip_tunnels.h | 119 ++++++++++++++++--- include/net/udp_tunnel.h | 4 +- include/uapi/linux/if_tunnel.h | 33 ++++++ net/bridge/br_vlan_tunnel.c | 9 +- net/core/filter.c | 26 +++-- net/core/flow_dissector.c | 20 +++- net/ipv4/fou_bpf.c | 2 +- net/ipv4/gre_demux.c | 2 +- net/ipv4/ip_gre.c | 127 +++++++++++++-------- net/ipv4/ip_tunnel.c | 54 +++++---- net/ipv4/ip_tunnel_core.c | 80 ++++++++----- net/ipv4/ip_vti.c | 29 +++-- net/ipv4/ipip.c | 21 +++- net/ipv4/udp_tunnel_core.c | 5 +- net/ipv6/ip6_gre.c | 85 ++++++++------ net/ipv6/ip6_tunnel.c | 14 +-- net/ipv6/sit.c | 5 +- net/netfilter/ipvs/ip_vs_core.c | 6 +- net/netfilter/ipvs/ip_vs_xmit.c | 20 ++-- net/netfilter/nft_tunnel.c | 44 ++++--- net/openvswitch/flow_netlink.c | 61 ++++++---- net/psample/psample.c | 26 +++-- net/sched/act_tunnel_key.c | 36 +++--- net/sched/cls_flower.c | 27 ++--- 40 files changed, 715 insertions(+), 415 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 339db6e4a1d5..d5c56ca91b77 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -61,6 +61,7 @@ struct bareudp_dev { static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct metadata_dst *tun_dst = NULL; + IP_TUNNEL_DECLARE_FLAGS(key) = { }; struct bareudp_dev *bareudp; unsigned short family; unsigned int len; @@ -137,7 +138,10 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) bareudp->dev->stats.rx_dropped++; goto drop; } - tun_dst = udp_tun_rx_dst(skb, family, TUNNEL_KEY, 0, 0); + + __set_bit(IP_TUNNEL_KEY_BIT, key); + + tun_dst = udp_tun_rx_dst(skb, family, key, 0, 0); if (!tun_dst) { bareudp->dev->stats.rx_dropped++; goto drop; @@ -285,10 +289,10 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct bareudp_dev *bareudp, const struct ip_tunnel_info *info) { + bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev)); bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct socket *sock = rcu_dereference(bareudp->sock); - bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); const struct ip_tunnel_key *key = &info->key; struct rtable *rt; __be16 sport, df; @@ -316,7 +320,8 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; - df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? + htons(IP_DF) : 0; skb_scrub_packet(skb, xnet); err = -ENOSPC; @@ -338,7 +343,8 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, udp_tunnel_xmit_skb(rt, sock->sk, skb, saddr, info->key.u.ipv4.dst, tos, ttl, df, sport, bareudp->port, !net_eq(bareudp->net, dev_net(bareudp->dev)), - !(info->key.tun_flags & TUNNEL_CSUM)); + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags)); return 0; free_dst: @@ -350,10 +356,10 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct bareudp_dev *bareudp, const struct ip_tunnel_info *info) { + bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev)); bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct socket *sock = rcu_dereference(bareudp->sock); - bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); const struct ip_tunnel_key *key = &info->key; struct dst_entry *dst = NULL; struct in6_addr saddr, daddr; @@ -402,7 +408,8 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, udp_tunnel6_xmit_skb(dst, sock->sk, skb, dev, &saddr, &daddr, prio, ttl, info->key.label, sport, bareudp->port, - !(info->key.tun_flags & TUNNEL_CSUM)); + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags)); return 0; free_dst: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 92065568bb19..6873c1201803 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -117,7 +117,7 @@ bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b, - __be16 tun_flags); + u32 tun_type); #endif /* CONFIG_MLX5_ESWITCH */ #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index f1d1e1542e81..878cbdbf5ec8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -587,7 +587,7 @@ bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b, - __be16 tun_flags) + u32 tun_type) { struct ip_tunnel_info *a_info; struct ip_tunnel_info *b_info; @@ -596,8 +596,8 @@ bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, if (!mlx5e_tc_tun_encap_info_equal_generic(a, b)) return false; - a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags); - b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags); + a_has_opts = test_bit(tun_type, a->ip_tun_key->tun_flags); + b_has_opts = test_bit(tun_type, b->ip_tun_key->tun_flags); /* keys are equal when both don't have any options attached */ if (!a_has_opts && !b_has_opts) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c index 2bcd10b6d653..bf969212cc77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -106,12 +106,13 @@ static int mlx5e_gen_ip_tunnel_header_geneve(char buf[], memset(geneveh, 0, sizeof(*geneveh)); geneveh->ver = MLX5E_GENEVE_VER; geneveh->opt_len = tun_info->options_len / 4; - geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM); - geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT); + geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, tun_info->key.tun_flags); + geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, + tun_info->key.tun_flags); mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni); geneveh->proto_type = htons(ETH_P_TEB); - if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_info->key.tun_flags)) { if (!geneveh->opt_len) return -EOPNOTSUPP; ip_tunnel_info_opts_get(geneveh->options, tun_info); @@ -188,7 +189,7 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, /* make sure that we're talking about GENEVE options */ - if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) { + if (enc_opts.key->dst_opt_type != IP_TUNNEL_GENEVE_OPT_BIT) { NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options: option type is not GENEVE"); netdev_warn(priv->netdev, @@ -337,7 +338,8 @@ static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv, static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b) { - return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_GENEVE_OPT); + return mlx5e_tc_tun_encap_info_equal_options(a, b, + IP_TUNNEL_GENEVE_OPT_BIT); } struct mlx5e_tc_tunnel geneve_tunnel = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c index ada14f0574dc..579eda89fc76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c @@ -31,12 +31,16 @@ static int mlx5e_gen_ip_tunnel_header_gretap(char buf[], const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + IP_TUNNEL_DECLARE_FLAGS(unsupp) = { }; int hdr_len; *ip_proto = IPPROTO_GRE; /* the HW does not calculate GRE csum or sequences */ - if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) + __set_bit(IP_TUNNEL_CSUM_BIT, unsupp); + __set_bit(IP_TUNNEL_SEQ_BIT, unsupp); + + if (ip_tunnel_flags_intersect(tun_key->tun_flags, unsupp)) return -EOPNOTSUPP; greh->protocol = htons(ETH_P_TEB); @@ -44,7 +48,7 @@ static int mlx5e_gen_ip_tunnel_header_gretap(char buf[], /* GRE key */ hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e); greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); - if (tun_key->tun_flags & TUNNEL_KEY) { + if (test_bit(IP_TUNNEL_KEY_BIT, tun_key->tun_flags)) { __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); *ptr = tun_id; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index a184d739d5f8..e4e487c8431b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -90,7 +90,7 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], const struct vxlan_metadata *md; struct vxlanhdr *vxh; - if ((tun_key->tun_flags & TUNNEL_VXLAN_OPT) && + if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_key->tun_flags) && e->tun_info->options_len != sizeof(*md)) return -EOPNOTSUPP; vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); @@ -99,7 +99,7 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], udp->dest = tun_key->tp_dst; vxh->vx_flags = VXLAN_HF_VNI; vxh->vx_vni = vxlan_vni_field(tun_id); - if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) { + if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_key->tun_flags)) { md = ip_tunnel_info_opts(e->tun_info); vxlan_build_gbp_hdr(vxh, md); } @@ -125,7 +125,7 @@ static int mlx5e_tc_tun_parse_vxlan_gbp_option(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (enc_opts.key->dst_opt_type != TUNNEL_VXLAN_OPT) { + if (enc_opts.key->dst_opt_type != IP_TUNNEL_VXLAN_OPT_BIT) { NL_SET_ERR_MSG_MOD(extack, "Wrong VxLAN option type: not GBP"); return -EOPNOTSUPP; } @@ -208,7 +208,8 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, static bool mlx5e_tc_tun_encap_info_equal_vxlan(struct mlx5e_encap_key *a, struct mlx5e_encap_key *b) { - return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_VXLAN_OPT); + return mlx5e_tc_tun_encap_info_equal_options(a, b, + IP_TUNNEL_VXLAN_OPT_BIT); } static int mlx5e_tc_tun_get_remote_ifindex(struct net_device *mirred_dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 31ed26cac9bf..f2f19fa37d91 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5464,6 +5464,7 @@ static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct tunnel_match_enc_opts enc_opts = {}; struct mlx5_rep_uplink_priv *uplink_priv; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct mlx5e_rep_priv *uplink_rpriv; struct metadata_dst *tun_dst; struct tunnel_match_key key; @@ -5471,6 +5472,8 @@ static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb struct net_device *dev; int err; + __set_bit(IP_TUNNEL_KEY_BIT, flags); + enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; tun_id = tunnel_id >> ENC_OPTS_BITS; @@ -5503,14 +5506,14 @@ static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb case FLOW_DISSECTOR_KEY_IPV4_ADDRS: tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, key.enc_ip.tos, key.enc_ip.ttl, - key.enc_tp.dst, TUNNEL_KEY, + key.enc_tp.dst, flags, key32_to_tunnel_id(key.enc_key_id.keyid), enc_opts.key.len); break; case FLOW_DISSECTOR_KEY_IPV6_ADDRS: tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, key.enc_ip.tos, key.enc_ip.ttl, - key.enc_tp.dst, 0, TUNNEL_KEY, + key.enc_tp.dst, 0, flags, key32_to_tunnel_id(key.enc_key_id.keyid), enc_opts.key.len); break; @@ -5528,11 +5531,16 @@ static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; - if (enc_opts.key.len) + if (enc_opts.key.len) { + ip_tunnel_flags_zero(flags); + if (enc_opts.key.dst_opt_type) + __set_bit(enc_opts.key.dst_opt_type, flags); + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.key.data, enc_opts.key.len, - enc_opts.key.dst_opt_type); + flags); + } skb_dst_set(skb, (struct dst_entry *)tun_dst); dev = dev_get_by_index(&init_net, key.filter_ifindex); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index d67df358a52f..d761a1235994 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -27,23 +27,23 @@ mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev) static bool mlxsw_sp_ipip_parms4_has_ikey(const struct ip_tunnel_parm_kern *parms) { - return !!(parms->i_flags & TUNNEL_KEY); + return test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags); } static bool mlxsw_sp_ipip_parms6_has_ikey(const struct __ip6_tnl_parm *parms) { - return !!(parms->i_flags & TUNNEL_KEY); + return test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags); } static bool mlxsw_sp_ipip_parms4_has_okey(const struct ip_tunnel_parm_kern *parms) { - return !!(parms->o_flags & TUNNEL_KEY); + return test_bit(IP_TUNNEL_KEY_BIT, parms->o_flags); } static bool mlxsw_sp_ipip_parms6_has_okey(const struct __ip6_tnl_parm *parms) { - return !!(parms->o_flags & TUNNEL_KEY); + return test_bit(IP_TUNNEL_KEY_BIT, parms->o_flags); } static u32 mlxsw_sp_ipip_parms4_ikey(const struct ip_tunnel_parm_kern *parms) @@ -242,12 +242,15 @@ static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp, const struct net_device *ol_dev) { struct ip_tunnel *tunnel = netdev_priv(ol_dev); - __be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */ bool inherit_ttl = tunnel->parms.iph.ttl == 0; bool inherit_tos = tunnel->parms.iph.tos & 0x1; + IP_TUNNEL_DECLARE_FLAGS(okflags) = { }; - return (tunnel->parms.i_flags & ~okflags) == 0 && - (tunnel->parms.o_flags & ~okflags) == 0 && + /* We can't offload any other features. */ + __set_bit(IP_TUNNEL_KEY_BIT, okflags); + + return ip_tunnel_flags_subset(tunnel->parms.i_flags, okflags) && + ip_tunnel_flags_subset(tunnel->parms.o_flags, okflags) && inherit_ttl && inherit_tos && mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev); } @@ -443,10 +446,13 @@ static bool mlxsw_sp_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp, struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(ol_dev); bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS; bool inherit_ttl = tparm.hop_limit == 0; - __be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */ + IP_TUNNEL_DECLARE_FLAGS(okflags) = { }; + + /* We can't offload any other features. */ + __set_bit(IP_TUNNEL_KEY_BIT, okflags); - return (tparm.i_flags & ~okflags) == 0 && - (tparm.o_flags & ~okflags) == 0 && + return ip_tunnel_flags_subset(tparm.i_flags, okflags) && + ip_tunnel_flags_subset(tparm.o_flags, okflags) && inherit_ttl && inherit_tos && mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV6, ol_dev); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index f0cbc6b9b041..3de69b2eb5c4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -461,7 +461,8 @@ mlxsw_sp_span_entry_gretap4_parms(struct mlxsw_sp *mlxsw_sp, if (!(to_dev->flags & IFF_UP) || /* Reject tunnels with GRE keys, checksums, etc. */ - tparm.i_flags || tparm.o_flags || + !ip_tunnel_flags_empty(tparm.i_flags) || + !ip_tunnel_flags_empty(tparm.o_flags) || /* Require a fixed TTL and a TOS copied from the mirrored packet. */ inherit_ttl || !inherit_tos || /* A destination address may not be "any". */ @@ -565,7 +566,8 @@ mlxsw_sp_span_entry_gretap6_parms(struct mlxsw_sp *mlxsw_sp, if (!(to_dev->flags & IFF_UP) || /* Reject tunnels with GRE keys, checksums, etc. */ - tparm.i_flags || tparm.o_flags || + !ip_tunnel_flags_empty(tparm.i_flags) || + !ip_tunnel_flags_empty(tparm.o_flags) || /* Require a fixed TTL and a TOS copied from the mirrored packet. */ inherit_ttl || !inherit_tos || /* A destination address may not be "any". */ diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 2c3f62907958..aca2a7417af3 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -396,6 +396,17 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, return 0; } +#define NFP_FL_CHECK(flag) ({ \ + IP_TUNNEL_DECLARE_FLAGS(__check) = { }; \ + __be16 __res; \ + \ + __set_bit(IP_TUNNEL_##flag##_BIT, __check); \ + __res = ip_tunnel_flags_to_be16(__check); \ + \ + BUILD_BUG_ON(__builtin_constant_p(__res) && \ + NFP_FL_TUNNEL_##flag != __res); \ +}) + static int nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, const struct flow_action_entry *act, @@ -410,6 +421,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, u32 tmp_set_ip_tun_type_index = 0; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; + __be16 tun_flags; if (!IS_ENABLED(CONFIG_IPV6) && ipv6) return -EOPNOTSUPP; @@ -417,9 +429,10 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, if (ipv6 && !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) return -EOPNOTSUPP; - BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM || - NFP_FL_TUNNEL_KEY != TUNNEL_KEY || - NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); + NFP_FL_CHECK(CSUM); + NFP_FL_CHECK(KEY); + NFP_FL_CHECK(GENEVE_OPT); + if (ip_tun->options_len && (tun_type != NFP_FL_TUNNEL_GENEVE || !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))) { @@ -427,7 +440,9 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, return -EOPNOTSUPP; } - if (ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS) { + tun_flags = ip_tunnel_flags_to_be16(ip_tun->key.tun_flags); + if (!ip_tunnel_flags_is_be16_compat(ip_tun->key.tun_flags) || + (tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support tunnel flag offload"); return -EOPNOTSUPP; @@ -442,7 +457,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, FIELD_PREP(NFP_FL_PRE_TUN_INDEX, pretun_idx); set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); - if (ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) + if (tun_flags & NFP_FL_TUNNEL_KEY) set_tun->tun_id = ip_tun->key.tun_id; if (ip_tun->key.ttl) { @@ -486,7 +501,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, } set_tun->tos = ip_tun->key.tos; - set_tun->tun_flags = ip_tun->key.tun_flags; + set_tun->tun_flags = tun_flags; if (tun_type == NFP_FL_TUNNEL_GENEVE) { set_tun->tun_proto = htons(ETH_P_TEB); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 2f6739fe78af..163f94a5a58f 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -225,10 +225,11 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { - __be16 flags; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; - flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) | - (gnvh->critical ? TUNNEL_CRIT_OPT : 0); + __set_bit(IP_TUNNEL_KEY_BIT, flags); + __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam); + __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical); tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, vni_to_tunnel_id(gnvh->vni), @@ -238,9 +239,11 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, goto drop; } /* Update tunnel dst according to Geneve options. */ + ip_tunnel_flags_zero(flags); + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags); ip_tunnel_info_opts_set(&tun_dst->u.tun_info, gnvh->options, gnvh->opt_len * 4, - TUNNEL_GENEVE_OPT); + flags); } else { /* Drop packets w/ critical options, * since we don't support any... @@ -745,14 +748,15 @@ static void geneve_build_header(struct genevehdr *geneveh, { geneveh->ver = GENEVE_VER; geneveh->opt_len = info->options_len / 4; - geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM); - geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT); + geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags); + geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, + info->key.tun_flags); geneveh->rsvd1 = 0; tunnel_id_to_vni(info->key.tun_id, geneveh->vni); geneveh->proto_type = inner_proto; geneveh->rsvd2 = 0; - if (info->key.tun_flags & TUNNEL_GENEVE_OPT) + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) ip_tunnel_info_opts_get(geneveh->options, info); } @@ -761,7 +765,7 @@ static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, bool xnet, int ip_hdr_len, bool inner_proto_inherit) { - bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); + bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); struct genevehdr *gnvh; __be16 inner_proto; int min_headroom; @@ -878,7 +882,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (geneve->cfg.collect_md) { ttl = key->ttl; - df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? + htons(IP_DF) : 0; } else { if (geneve->cfg.ttl_inherit) ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); @@ -910,7 +915,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst, tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, !net_eq(geneve->net, dev_net(geneve->dev)), - !(info->key.tun_flags & TUNNEL_CSUM)); + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags)); return 0; } @@ -998,7 +1004,8 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, &saddr, &key->u.ipv6.dst, prio, ttl, info->key.label, sport, geneve->cfg.info.key.tp_dst, - !(info->key.tun_flags & TUNNEL_CSUM)); + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags)); return 0; } #endif @@ -1297,7 +1304,8 @@ static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, static bool is_tnl_info_zero(const struct ip_tunnel_info *info) { - return !(info->key.tun_id || info->key.tun_flags || info->key.tos || + return !(info->key.tun_id || info->key.tos || + !ip_tunnel_flags_empty(info->key.tun_flags) || info->key.ttl || info->key.label || info->key.tp_src || memchr_inv(&info->key.u, 0, sizeof(info->key.u))); } @@ -1435,7 +1443,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], "Remote IPv6 address cannot be Multicast"); return -EINVAL; } - info->key.tun_flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); cfg->use_udp6_rx_checksums = true; #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], @@ -1510,7 +1518,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) - info->key.tun_flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); } if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { @@ -1520,7 +1528,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) - info->key.tun_flags &= ~TUNNEL_CSUM; + __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], "IPv6 support not enabled in the kernel"); @@ -1753,7 +1761,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) info->key.u.ipv4.dst)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, - !!(info->key.tun_flags & TUNNEL_CSUM))) + test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags))) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) @@ -1762,7 +1771,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) &info->key.u.ipv6.dst)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, - !(info->key.tun_flags & TUNNEL_CSUM))) + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags))) goto nla_put_failure; #endif } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 3495591a5c29..72ecf6cf809c 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1584,7 +1584,8 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed, tun_dst = (struct metadata_dst *)skb_dst(skb); if (tun_dst) { - tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, + tun_dst->u.tun_info.key.tun_flags); tun_dst->u.tun_info.options_len = sizeof(*md); } if (gbp->dont_learn) @@ -1716,9 +1717,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) goto drop; if (vxlan_collect_metadata(vs)) { + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct metadata_dst *tun_dst; - tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, + __set_bit(IP_TUNNEL_KEY_BIT, flags); + tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), flags, key32_to_tunnel_id(vni), sizeof(*md)); if (!tun_dst) @@ -2403,14 +2406,14 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, vni = tunnel_id_to_key32(info->key.tun_id); ifindex = 0; dst_cache = &info->dst_cache; - if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { + if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) { if (info->options_len < sizeof(*md)) goto drop; md = ip_tunnel_info_opts(info); } ttl = info->key.ttl; tos = info->key.tos; - udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); + udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); } src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min, vxlan->cfg.port_max, true); @@ -2451,7 +2454,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, old_iph->frag_off & htons(IP_DF))) df = htons(IP_DF); } - } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) { + } else if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, + info->key.tun_flags)) { df = htons(IP_DF); } diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 1b7fae4c6b24..4160731dcb6e 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -198,7 +198,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, __be16 tp_dst, - __be16 flags, + const unsigned long *flags, __be64 tunnel_id, int md_size) { @@ -215,7 +215,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, } static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, - __be16 flags, + const unsigned long *flags, __be64 tunnel_id, int md_size) { @@ -230,7 +230,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad __u8 tos, __u8 ttl, __be16 tp_dst, __be32 label, - __be16 flags, + const unsigned long *flags, __be64 tunnel_id, int md_size) { @@ -243,7 +243,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad info = &tun_dst->u.tun_info; info->mode = IP_TUNNEL_INFO_IPV6; - info->key.tun_flags = flags; + ip_tunnel_flags_copy(info->key.tun_flags, flags); info->key.tun_id = tunnel_id; info->key.tp_src = 0; info->key.tp_dst = tp_dst; @@ -259,7 +259,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad } static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, - __be16 flags, + const unsigned long *flags, __be64 tunnel_id, int md_size) { diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 1a7131d6cb0e..9ab376d1a677 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -97,7 +97,7 @@ struct flow_dissector_key_enc_opts { * here but seems difficult to #include */ u8 len; - __be16 dst_opt_type; + u32 dst_opt_type; }; struct flow_dissector_key_keyid { diff --git a/include/net/gre.h b/include/net/gre.h index 4e209708b754..ccd293203284 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -49,67 +49,61 @@ static inline bool netif_is_ip6gretap(const struct net_device *dev) !strcmp(dev->rtnl_link_ops->kind, "ip6gretap"); } -static inline int gre_calc_hlen(__be16 o_flags) +static inline int gre_calc_hlen(const unsigned long *o_flags) { int addend = 4; - if (o_flags & TUNNEL_CSUM) + if (test_bit(IP_TUNNEL_CSUM_BIT, o_flags)) addend += 4; - if (o_flags & TUNNEL_KEY) + if (test_bit(IP_TUNNEL_KEY_BIT, o_flags)) addend += 4; - if (o_flags & TUNNEL_SEQ) + if (test_bit(IP_TUNNEL_SEQ_BIT, o_flags)) addend += 4; return addend; } -static inline __be16 gre_flags_to_tnl_flags(__be16 flags) +static inline void gre_flags_to_tnl_flags(unsigned long *dst, __be16 flags) { - __be16 tflags = 0; - - if (flags & GRE_CSUM) - tflags |= TUNNEL_CSUM; - if (flags & GRE_ROUTING) - tflags |= TUNNEL_ROUTING; - if (flags & GRE_KEY) - tflags |= TUNNEL_KEY; - if (flags & GRE_SEQ) - tflags |= TUNNEL_SEQ; - if (flags & GRE_STRICT) - tflags |= TUNNEL_STRICT; - if (flags & GRE_REC) - tflags |= TUNNEL_REC; - if (flags & GRE_VERSION) - tflags |= TUNNEL_VERSION; - - return tflags; + IP_TUNNEL_DECLARE_FLAGS(res) = { }; + + __assign_bit(IP_TUNNEL_CSUM_BIT, res, flags & GRE_CSUM); + __assign_bit(IP_TUNNEL_ROUTING_BIT, res, flags & GRE_ROUTING); + __assign_bit(IP_TUNNEL_KEY_BIT, res, flags & GRE_KEY); + __assign_bit(IP_TUNNEL_SEQ_BIT, res, flags & GRE_SEQ); + __assign_bit(IP_TUNNEL_STRICT_BIT, res, flags & GRE_STRICT); + __assign_bit(IP_TUNNEL_REC_BIT, res, flags & GRE_REC); + __assign_bit(IP_TUNNEL_VERSION_BIT, res, flags & GRE_VERSION); + + ip_tunnel_flags_copy(dst, res); } -static inline __be16 gre_tnl_flags_to_gre_flags(__be16 tflags) +static inline __be16 gre_tnl_flags_to_gre_flags(const unsigned long *tflags) { __be16 flags = 0; - if (tflags & TUNNEL_CSUM) + if (test_bit(IP_TUNNEL_CSUM_BIT, tflags)) flags |= GRE_CSUM; - if (tflags & TUNNEL_ROUTING) + if (test_bit(IP_TUNNEL_ROUTING_BIT, tflags)) flags |= GRE_ROUTING; - if (tflags & TUNNEL_KEY) + if (test_bit(IP_TUNNEL_KEY_BIT, tflags)) flags |= GRE_KEY; - if (tflags & TUNNEL_SEQ) + if (test_bit(IP_TUNNEL_SEQ_BIT, tflags)) flags |= GRE_SEQ; - if (tflags & TUNNEL_STRICT) + if (test_bit(IP_TUNNEL_STRICT_BIT, tflags)) flags |= GRE_STRICT; - if (tflags & TUNNEL_REC) + if (test_bit(IP_TUNNEL_REC_BIT, tflags)) flags |= GRE_REC; - if (tflags & TUNNEL_VERSION) + if (test_bit(IP_TUNNEL_VERSION_BIT, tflags)) flags |= GRE_VERSION; return flags; } static inline void gre_build_header(struct sk_buff *skb, int hdr_len, - __be16 flags, __be16 proto, + const unsigned long *flags, __be16 proto, __be32 key, __be32 seq) { + IP_TUNNEL_DECLARE_FLAGS(cond) = { }; struct gre_base_hdr *greh; skb_push(skb, hdr_len); @@ -120,18 +114,22 @@ static inline void gre_build_header(struct sk_buff *skb, int hdr_len, greh->flags = gre_tnl_flags_to_gre_flags(flags); greh->protocol = proto; - if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) { + __set_bit(IP_TUNNEL_KEY_BIT, cond); + __set_bit(IP_TUNNEL_CSUM_BIT, cond); + __set_bit(IP_TUNNEL_SEQ_BIT, cond); + + if (ip_tunnel_flags_intersect(flags, cond)) { __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - if (flags & TUNNEL_SEQ) { + if (test_bit(IP_TUNNEL_SEQ_BIT, flags)) { *ptr = seq; ptr--; } - if (flags & TUNNEL_KEY) { + if (test_bit(IP_TUNNEL_KEY_BIT, flags)) { *ptr = key; ptr--; } - if (flags & TUNNEL_CSUM && + if (test_bit(IP_TUNNEL_CSUM_BIT, flags) && !(skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { *ptr = 0; diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 74b369bddf49..399592405c72 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -30,8 +30,8 @@ struct __ip6_tnl_parm { struct in6_addr laddr; /* local tunnel end-point address */ struct in6_addr raddr; /* remote tunnel end-point address */ - __be16 i_flags; - __be16 o_flags; + IP_TUNNEL_DECLARE_FLAGS(i_flags); + IP_TUNNEL_DECLARE_FLAGS(o_flags); __be32 i_key; __be32 o_key; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 20f0319ab149..ed8e48cc9054 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -36,6 +36,24 @@ (sizeof_field(struct ip_tunnel_key, u) - \ sizeof_field(struct ip_tunnel_key, u.ipv4)) +#define __ipt_flag_op(op, ...) \ + op(__VA_ARGS__, __IP_TUNNEL_FLAG_NUM) + +#define IP_TUNNEL_DECLARE_FLAGS(...) \ + __ipt_flag_op(DECLARE_BITMAP, __VA_ARGS__) + +#define ip_tunnel_flags_zero(...) __ipt_flag_op(bitmap_zero, __VA_ARGS__) +#define ip_tunnel_flags_copy(...) __ipt_flag_op(bitmap_copy, __VA_ARGS__) +#define ip_tunnel_flags_and(...) __ipt_flag_op(bitmap_and, __VA_ARGS__) +#define ip_tunnel_flags_or(...) __ipt_flag_op(bitmap_or, __VA_ARGS__) + +#define ip_tunnel_flags_empty(...) \ + __ipt_flag_op(bitmap_empty, __VA_ARGS__) +#define ip_tunnel_flags_intersect(...) \ + __ipt_flag_op(bitmap_intersects, __VA_ARGS__) +#define ip_tunnel_flags_subset(...) \ + __ipt_flag_op(bitmap_subset, __VA_ARGS__) + struct ip_tunnel_key { __be64 tun_id; union { @@ -48,11 +66,11 @@ struct ip_tunnel_key { struct in6_addr dst; } ipv6; } u; - __be16 tun_flags; - u8 tos; /* TOS for IPv4, TC for IPv6 */ - u8 ttl; /* TTL for IPv4, HL for IPv6 */ + IP_TUNNEL_DECLARE_FLAGS(tun_flags); __be32 label; /* Flow Label for IPv6 */ u32 nhid; + u8 tos; /* TOS for IPv4, TC for IPv6 */ + u8 ttl; /* TTL for IPv4, HL for IPv6 */ __be16 tp_src; __be16 tp_dst; __u8 flow_flags; @@ -110,14 +128,14 @@ struct ip_tunnel_prl_entry { struct metadata_dst; -/* Kernel-side copy of ip_tunnel_parm */ +/* Kernel-side variant of ip_tunnel_parm */ struct ip_tunnel_parm_kern { char name[IFNAMSIZ]; - int link; - __be16 i_flags; - __be16 o_flags; + IP_TUNNEL_DECLARE_FLAGS(i_flags); + IP_TUNNEL_DECLARE_FLAGS(o_flags); __be32 i_key; __be32 o_key; + int link; struct iphdr iph; }; @@ -168,7 +186,7 @@ struct ip_tunnel { }; struct tnl_ptk_info { - __be16 flags; + IP_TUNNEL_DECLARE_FLAGS(flags); __be16 proto; __be32 key; __be32 seq; @@ -190,11 +208,77 @@ struct ip_tunnel_net { int type; }; +static inline void ip_tunnel_set_options_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + + ip_tunnel_flags_or(flags, flags, present); +} + +static inline void ip_tunnel_clear_options_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + + __ipt_flag_op(bitmap_andnot, flags, flags, present); +} + +static inline bool ip_tunnel_is_options_present(const unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + + return ip_tunnel_flags_intersect(flags, present); +} + +static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(supp) = { }; + + bitmap_set(supp, 0, BITS_PER_TYPE(__be16)); + __set_bit(IP_TUNNEL_VTI_BIT, supp); + + return ip_tunnel_flags_subset(flags, supp); +} + +static inline void ip_tunnel_flags_from_be16(unsigned long *dst, __be16 flags) +{ + ip_tunnel_flags_zero(dst); + + bitmap_write(dst, be16_to_cpu(flags), 0, BITS_PER_TYPE(__be16)); + __assign_bit(IP_TUNNEL_VTI_BIT, dst, flags & VTI_ISVTI); +} + +static inline __be16 ip_tunnel_flags_to_be16(const unsigned long *flags) +{ + __be16 ret; + + ret = cpu_to_be16(bitmap_read(flags, 0, BITS_PER_TYPE(__be16))); + if (test_bit(IP_TUNNEL_VTI_BIT, flags)) + ret |= VTI_ISVTI; + + return ret; +} + static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, __be32 saddr, __be32 daddr, u8 tos, u8 ttl, __be32 label, __be16 tp_src, __be16 tp_dst, - __be64 tun_id, __be16 tun_flags) + __be64 tun_id, + const unsigned long *tun_flags) { key->tun_id = tun_id; key->u.ipv4.src = saddr; @@ -204,7 +288,7 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, key->tos = tos; key->ttl = ttl; key->label = label; - key->tun_flags = tun_flags; + ip_tunnel_flags_copy(key->tun_flags, tun_flags); /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of * the upper tunnel are used. @@ -225,12 +309,8 @@ ip_tunnel_dst_cache_usable(const struct sk_buff *skb, { if (skb->mark) return false; - if (!info) - return true; - if (info->key.tun_flags & TUNNEL_NOCACHE) - return false; - return true; + return !info || !test_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags); } static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info @@ -313,7 +393,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, - int link, __be16 flags, + int link, const unsigned long *flags, __be32 remote, __be32 local, __be32 key); @@ -529,12 +609,13 @@ static inline void ip_tunnel_info_opts_get(void *to, static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, - __be16 flags) + const unsigned long *flags) { info->options_len = len; if (len > 0) { memcpy(ip_tunnel_info_opts(info), from, len); - info->key.tun_flags |= flags; + ip_tunnel_flags_or(info->key.tun_flags, info->key.tun_flags, + flags); } } @@ -578,7 +659,7 @@ static inline void ip_tunnel_info_opts_get(void *to, static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, - __be16 flags) + const unsigned long *flags) { info->options_len = 0; } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index d716214fe03d..a93dc51f6323 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -179,8 +179,8 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct dst_cache *dst_cache); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, - __be16 flags, __be64 tunnel_id, - int md_size); + const unsigned long *flags, + __be64 tunnel_id, int md_size); #ifdef CONFIG_INET static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 102119628ff5..838927dd73a4 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -161,6 +161,14 @@ enum { #define IFLA_VTI_MAX (__IFLA_VTI_MAX - 1) +#ifndef __KERNEL__ +/* Historically, tunnel flags have been defined as __be16 and now there are + * no free bits left. It is strongly advised to switch the already existing + * userspace code to the new *_BIT definitions from down below, as __be16 + * can't be simply cast to a wider type on LE systems. All new flags and + * code must use *_BIT only. + */ + #define TUNNEL_CSUM __cpu_to_be16(0x01) #define TUNNEL_ROUTING __cpu_to_be16(0x02) #define TUNNEL_KEY __cpu_to_be16(0x04) @@ -181,5 +189,30 @@ enum { #define TUNNEL_OPTIONS_PRESENT \ (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT | \ TUNNEL_GTP_OPT) +#endif + +enum { + IP_TUNNEL_CSUM_BIT = 0U, + IP_TUNNEL_ROUTING_BIT, + IP_TUNNEL_KEY_BIT, + IP_TUNNEL_SEQ_BIT, + IP_TUNNEL_STRICT_BIT, + IP_TUNNEL_REC_BIT, + IP_TUNNEL_VERSION_BIT, + IP_TUNNEL_NO_KEY_BIT, + IP_TUNNEL_DONT_FRAGMENT_BIT, + IP_TUNNEL_OAM_BIT, + IP_TUNNEL_CRIT_OPT_BIT, + IP_TUNNEL_GENEVE_OPT_BIT, /* OPTIONS_PRESENT */ + IP_TUNNEL_VXLAN_OPT_BIT, /* OPTIONS_PRESENT */ + IP_TUNNEL_NOCACHE_BIT, + IP_TUNNEL_ERSPAN_OPT_BIT, /* OPTIONS_PRESENT */ + IP_TUNNEL_GTP_OPT_BIT, /* OPTIONS_PRESENT */ + + IP_TUNNEL_VTI_BIT, + IP_TUNNEL_SIT_ISATAP_BIT = IP_TUNNEL_VTI_BIT, + + __IP_TUNNEL_FLAG_NUM, +}; #endif /* _UAPI_IF_TUNNEL_H_ */ diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index 81833ca7a2c7..a966a6ec8263 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -65,13 +65,14 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, { struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst); __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id)); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; int err; if (metadata) return -EEXIST; - metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY, - key, 0); + __set_bit(IP_TUNNEL_KEY_BIT, flags); + metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, flags, key, 0); if (!metadata) return -EINVAL; @@ -185,6 +186,7 @@ void br_handle_ingress_vlan_tunnel(struct sk_buff *skb, int br_handle_egress_vlan_tunnel(struct sk_buff *skb, struct net_bridge_vlan *vlan) { + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct metadata_dst *tunnel_dst; __be64 tunnel_id; int err; @@ -202,7 +204,8 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb, return err; if (BR_INPUT_SKB_CB(skb)->backup_nhid) { - tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY, + __set_bit(IP_TUNNEL_KEY_BIT, flags); + tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, flags, tunnel_id, 0); if (!tunnel_dst) return -ENOMEM; diff --git a/net/core/filter.c b/net/core/filter.c index 0c66e4a3fc5b..294670d3850d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4662,7 +4662,7 @@ set_compat: to->tunnel_tos = info->key.tos; to->tunnel_ttl = info->key.ttl; if (flags & BPF_F_TUNINFO_FLAGS) - to->tunnel_flags = info->key.tun_flags; + to->tunnel_flags = ip_tunnel_flags_to_be16(info->key.tun_flags); else to->tunnel_ext = 0; @@ -4705,7 +4705,7 @@ BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size) int err; if (unlikely(!info || - !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) { + !ip_tunnel_is_options_present(info->key.tun_flags))) { err = -ENOENT; goto err_clear; } @@ -4775,15 +4775,15 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, memset(info, 0, sizeof(*info)); info->mode = IP_TUNNEL_INFO_TX; - info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; - if (flags & BPF_F_DONT_FRAGMENT) - info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; - if (flags & BPF_F_ZERO_CSUM_TX) - info->key.tun_flags &= ~TUNNEL_CSUM; - if (flags & BPF_F_SEQ_NUMBER) - info->key.tun_flags |= TUNNEL_SEQ; - if (flags & BPF_F_NO_TUNNEL_KEY) - info->key.tun_flags &= ~TUNNEL_KEY; + __set_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags); + __assign_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, info->key.tun_flags, + flags & BPF_F_DONT_FRAGMENT); + __assign_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags, + !(flags & BPF_F_ZERO_CSUM_TX)); + __assign_bit(IP_TUNNEL_SEQ_BIT, info->key.tun_flags, + flags & BPF_F_SEQ_NUMBER); + __assign_bit(IP_TUNNEL_KEY_BIT, info->key.tun_flags, + !(flags & BPF_F_NO_TUNNEL_KEY)); info->key.tun_id = cpu_to_be64(from->tunnel_id); info->key.tos = from->tunnel_tos; @@ -4821,13 +4821,15 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, { struct ip_tunnel_info *info = skb_tunnel_info(skb); const struct metadata_dst *md = this_cpu_ptr(md_dst); + IP_TUNNEL_DECLARE_FLAGS(present) = { }; if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1)))) return -EINVAL; if (unlikely(size > IP_TUNNEL_OPTS_MAX)) return -ENOMEM; - ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT); + ip_tunnel_set_options_present(present); + ip_tunnel_info_opts_set(info, from, size, present); return 0; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 272f09251343..f82e9a7d3b37 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -455,17 +455,25 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) { struct flow_dissector_key_enc_opts *enc_opt; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; + u32 val; enc_opt = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS, target_container); - if (info->options_len) { - enc_opt->len = info->options_len; - ip_tunnel_info_opts_get(enc_opt->data, info); - enc_opt->dst_opt_type = info->key.tun_flags & - TUNNEL_OPTIONS_PRESENT; - } + if (!info->options_len) + return; + + enc_opt->len = info->options_len; + ip_tunnel_info_opts_get(enc_opt->data, info); + + ip_tunnel_set_options_present(flags); + ip_tunnel_flags_and(flags, info->key.tun_flags, flags); + + val = find_next_bit(flags, __IP_TUNNEL_FLAG_NUM, + IP_TUNNEL_GENEVE_OPT_BIT); + enc_opt->dst_opt_type = val < __IP_TUNNEL_FLAG_NUM ? val : 0; } } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c index 06e5572f296f..54984f3170a8 100644 --- a/net/ipv4/fou_bpf.c +++ b/net/ipv4/fou_bpf.c @@ -64,7 +64,7 @@ __bpf_kfunc int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, info->encap.type = TUNNEL_ENCAP_NONE; } - if (info->key.tun_flags & TUNNEL_CSUM) + if (test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags)) info->encap.flags |= TUNNEL_ENCAP_FLAG_CSUM; info->encap.sport = encap->sport; diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 3757fd93523f..6701a98d9a9f 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -73,7 +73,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; - tpi->flags = gre_flags_to_tnl_flags(greh->flags); + gre_flags_to_tnl_flags(tpi->flags, greh->flags); hdr_len = gre_calc_hlen(tpi->flags); if (!pskb_may_pull(skb, nhs + hdr_len)) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8853c065a985..3c46a7fef8db 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -265,6 +265,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, struct net *net = dev_net(skb->dev); struct metadata_dst *tun_dst = NULL; struct erspan_base_hdr *ershdr; + IP_TUNNEL_DECLARE_FLAGS(flags); struct ip_tunnel_net *itn; struct ip_tunnel *tunnel; const struct iphdr *iph; @@ -272,18 +273,20 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, int ver; int len; + ip_tunnel_flags_copy(flags, tpi->flags); + itn = net_generic(net, erspan_net_id); iph = ip_hdr(skb); if (is_erspan_type1(gre_hdr_len)) { ver = 0; - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, - tpi->flags | TUNNEL_NO_KEY, + __set_bit(IP_TUNNEL_NO_KEY_BIT, flags); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr, iph->daddr, 0); } else { ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); ver = ershdr->ver; - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, - tpi->flags | TUNNEL_KEY, + __set_bit(IP_TUNNEL_KEY_BIT, flags); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr, iph->daddr, tpi->key); } @@ -307,10 +310,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, struct ip_tunnel_info *info; unsigned char *gh; __be64 tun_id; - __be16 flags; - tpi->flags |= TUNNEL_KEY; - flags = tpi->flags; + __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags); + ip_tunnel_flags_copy(flags, tpi->flags); tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ip_tun_rx_dst(skb, flags, @@ -333,7 +335,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, ERSPAN_V2_MDSIZE); info = &tun_dst->u.tun_info; - info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + info->key.tun_flags); info->options_len = sizeof(*md); } @@ -376,10 +379,13 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, tnl_params = &tunnel->parms.iph; if (tunnel->collect_md || tnl_params->daddr == 0) { - __be16 flags; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; __be64 tun_id; - flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); + __set_bit(IP_TUNNEL_CSUM_BIT, flags); + __set_bit(IP_TUNNEL_KEY_BIT, flags); + ip_tunnel_flags_and(flags, tpi->flags, flags); + tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); if (!tun_dst) @@ -459,12 +465,15 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); - __be16 flags = tunnel->parms.o_flags; + IP_TUNNEL_DECLARE_FLAGS(flags); + + ip_tunnel_flags_copy(flags, tunnel->parms.o_flags); /* Push GRE header. */ gre_build_header(skb, tunnel->tun_hlen, flags, proto, tunnel->parms.o_key, - (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); + test_bit(IP_TUNNEL_SEQ_BIT, flags) ? + htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } @@ -478,10 +487,10 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; int tunnel_hlen; - __be16 flags; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || @@ -495,14 +504,19 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, goto err_free_skb; /* Push Tunnel header. */ - if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) + if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, + tunnel->parms.o_flags))) goto err_free_skb; - flags = tun_info->key.tun_flags & - (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); + __set_bit(IP_TUNNEL_CSUM_BIT, flags); + __set_bit(IP_TUNNEL_KEY_BIT, flags); + __set_bit(IP_TUNNEL_SEQ_BIT, flags); + ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags); + gre_build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); + test_bit(IP_TUNNEL_SEQ_BIT, flags) ? + htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); @@ -516,6 +530,7 @@ err_free_skb: static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; struct erspan_metadata *md; @@ -531,7 +546,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) goto err_free_skb; key = &tun_info->key; - if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) + if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags)) goto err_free_skb; if (tun_info->options_len < sizeof(*md)) goto err_free_skb; @@ -584,8 +599,9 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) goto err_free_skb; } - gre_build_header(skb, 8, TUNNEL_SEQ, - proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno))); + __set_bit(IP_TUNNEL_SEQ_BIT, flags); + gre_build_header(skb, 8, flags, proto, 0, + htonl(atomic_fetch_inc(&tunnel->o_seqno))); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); @@ -659,7 +675,8 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, tnl_params = &tunnel->parms.iph; } - if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) + if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, + tunnel->parms.o_flags))) goto free_skb; __gre_xmit(skb, dev, tnl_params, skb->protocol); @@ -701,7 +718,7 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, /* Push ERSPAN header */ if (tunnel->erspan_ver == 0) { proto = htons(ETH_P_ERSPAN); - tunnel->parms.o_flags &= ~TUNNEL_SEQ; + __clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags); } else if (tunnel->erspan_ver == 1) { erspan_build_header(skb, ntohl(tunnel->parms.o_key), tunnel->index, @@ -716,7 +733,7 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, goto free_skb; } - tunnel->parms.o_flags &= ~TUNNEL_KEY; + __clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags); __gre_xmit(skb, dev, &tunnel->parms.iph, proto); return NETDEV_TX_OK; @@ -739,7 +756,8 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } - if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) + if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, + tunnel->parms.o_flags))) goto free_skb; if (skb_cow_head(skb, dev->needed_headroom)) @@ -757,7 +775,6 @@ free_skb: static void ipgre_link_update(struct net_device *dev, bool set_mtu) { struct ip_tunnel *tunnel = netdev_priv(dev); - __be16 flags; int len; len = tunnel->tun_hlen; @@ -773,10 +790,9 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) if (set_mtu) dev->mtu = max_t(int, dev->mtu - len, 68); - flags = tunnel->parms.o_flags; - - if (flags & TUNNEL_SEQ || - (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) { + if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) || + (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) && + tunnel->encap.type != TUNNEL_ENCAP_NONE)) { dev->features &= ~NETIF_F_GSO_SOFTWARE; dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; } else { @@ -789,17 +805,25 @@ static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) { + __be16 i_flags, o_flags; int err; + if (!ip_tunnel_flags_is_be16_compat(p->i_flags) || + !ip_tunnel_flags_is_be16_compat(p->o_flags)) + return -EOVERFLOW; + + i_flags = ip_tunnel_flags_to_be16(p->i_flags); + o_flags = ip_tunnel_flags_to_be16(p->o_flags); + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE || p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) || - ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING))) + ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; } - p->i_flags = gre_flags_to_tnl_flags(p->i_flags); - p->o_flags = gre_flags_to_tnl_flags(p->o_flags); + gre_flags_to_tnl_flags(p->i_flags, i_flags); + gre_flags_to_tnl_flags(p->o_flags, o_flags); err = ip_tunnel_ctl(dev, p, cmd); if (err) @@ -808,15 +832,18 @@ static int ipgre_tunnel_ctl(struct net_device *dev, if (cmd == SIOCCHGTUNNEL) { struct ip_tunnel *t = netdev_priv(dev); - t->parms.i_flags = p->i_flags; - t->parms.o_flags = p->o_flags; + ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags); + ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags); if (strcmp(dev->rtnl_link_ops->kind, "erspan")) ipgre_link_update(dev, true); } - p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); - p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); + i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); + ip_tunnel_flags_from_be16(p->i_flags, i_flags); + o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); + ip_tunnel_flags_from_be16(p->o_flags, o_flags); + return 0; } @@ -956,7 +983,6 @@ static void ipgre_tunnel_setup(struct net_device *dev) static void __gre_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel; - __be16 flags; tunnel = netdev_priv(dev); tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); @@ -968,14 +994,13 @@ static void __gre_tunnel_init(struct net_device *dev) dev->features |= GRE_FEATURES | NETIF_F_LLTX; dev->hw_features |= GRE_FEATURES; - flags = tunnel->parms.o_flags; - /* TCP offload with GRE SEQ is not supported, nor can we support 2 * levels of outer headers requiring an update. */ - if (flags & TUNNEL_SEQ) + if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags)) return; - if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE) + if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) && + tunnel->encap.type != TUNNEL_ENCAP_NONE) return; dev->features |= NETIF_F_GSO_SOFTWARE; @@ -1148,10 +1173,12 @@ static int ipgre_netlink_parms(struct net_device *dev, parms->link = nla_get_u32(data[IFLA_GRE_LINK]); if (data[IFLA_GRE_IFLAGS]) - parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); + gre_flags_to_tnl_flags(parms->i_flags, + nla_get_be16(data[IFLA_GRE_IFLAGS])); if (data[IFLA_GRE_OFLAGS]) - parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); + gre_flags_to_tnl_flags(parms->o_flags, + nla_get_be16(data[IFLA_GRE_OFLAGS])); if (data[IFLA_GRE_IKEY]) parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); @@ -1411,8 +1438,8 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], if (err < 0) return err; - t->parms.i_flags = p.i_flags; - t->parms.o_flags = p.o_flags; + ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags); + ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags); ipgre_link_update(dev, !tb[IFLA_MTU]); @@ -1440,8 +1467,8 @@ static int erspan_changelink(struct net_device *dev, struct nlattr *tb[], if (err < 0) return err; - t->parms.i_flags = p.i_flags; - t->parms.o_flags = p.o_flags; + ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags); + ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags); return 0; } @@ -1498,7 +1525,9 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm_kern *p = &t->parms; - __be16 o_flags = p->o_flags; + IP_TUNNEL_DECLARE_FLAGS(o_flags); + + ip_tunnel_flags_copy(o_flags, p->o_flags); if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, @@ -1546,7 +1575,7 @@ static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev) if (t->erspan_ver <= 2) { if (t->erspan_ver != 0 && !t->collect_md) - t->parms.o_flags |= TUNNEL_KEY; + __set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags); if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) goto nla_put_failure; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index d10f41f81463..f65170a28106 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -57,16 +57,12 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) } static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p, - __be16 flags, __be32 key) + const unsigned long *flags, __be32 key) { - if (p->i_flags & TUNNEL_KEY) { - if (flags & TUNNEL_KEY) - return key == p->i_key; - else - /* key expected, none present */ - return false; - } else - return !(flags & TUNNEL_KEY); + if (!test_bit(IP_TUNNEL_KEY_BIT, flags)) + return !test_bit(IP_TUNNEL_KEY_BIT, p->i_flags); + + return test_bit(IP_TUNNEL_KEY_BIT, p->i_flags) && p->i_key == key; } /* Fallback tunnel: no source, no destination, no key, no options @@ -81,7 +77,7 @@ static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p, Given src, dst and key, find appropriate for input tunnel. */ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, - int link, __be16 flags, + int link, const unsigned long *flags, __be32 remote, __be32 local, __be32 key) { @@ -143,7 +139,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, } hlist_for_each_entry_rcu(t, head, hash_node) { - if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) || + if ((!test_bit(IP_TUNNEL_NO_KEY_BIT, flags) && + t->parms.i_key != key) || t->parms.iph.saddr != 0 || t->parms.iph.daddr != 0 || !(t->dev->flags & IFF_UP)) @@ -182,7 +179,8 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, else remote = 0; - if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) + if (!test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags) && + test_bit(IP_TUNNEL_VTI_BIT, parms->i_flags)) i_key = 0; h = ip_tunnel_hash(i_key, remote); @@ -211,12 +209,14 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; + IP_TUNNEL_DECLARE_FLAGS(flags); __be32 key = parms->i_key; - __be16 flags = parms->i_flags; int link = parms->link; struct ip_tunnel *t = NULL; struct hlist_head *head = ip_bucket(itn, parms); + ip_tunnel_flags_copy(flags, parms->i_flags); + hlist_for_each_entry_rcu(t, head, hash_node) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && @@ -386,15 +386,15 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, } #endif - if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || - ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { + if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) != + test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) { DEV_STATS_INC(tunnel->dev, rx_crc_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } - if (tunnel->parms.i_flags&TUNNEL_SEQ) { - if (!(tpi->flags&TUNNEL_SEQ) || + if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) { + if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) || (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { DEV_STATS_INC(tunnel->dev, rx_fifo_errors); DEV_STATS_INC(tunnel->dev, rx_errors); @@ -638,7 +638,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (key->tun_flags & TUNNEL_DONT_FRAGMENT) + if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags)) df = htons(IP_DF); if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen, key->u.ipv4.dst, true)) { @@ -928,10 +928,10 @@ int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, goto done; if (p->iph.ttl) p->iph.frag_off |= htons(IP_DF); - if (!(p->i_flags & VTI_ISVTI)) { - if (!(p->i_flags & TUNNEL_KEY)) + if (!test_bit(IP_TUNNEL_VTI_BIT, p->i_flags)) { + if (!test_bit(IP_TUNNEL_KEY_BIT, p->i_flags)) p->i_key = 0; - if (!(p->o_flags & TUNNEL_KEY)) + if (!test_bit(IP_TUNNEL_KEY_BIT, p->o_flags)) p->o_key = 0; } @@ -1016,8 +1016,8 @@ bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp, strscpy(kp->name, p.name); kp->link = p.link; - kp->i_flags = p.i_flags; - kp->o_flags = p.o_flags; + ip_tunnel_flags_from_be16(kp->i_flags, p.i_flags); + ip_tunnel_flags_from_be16(kp->o_flags, p.o_flags); kp->i_key = p.i_key; kp->o_key = p.o_key; memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph))); @@ -1030,10 +1030,14 @@ bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp) { struct ip_tunnel_parm p; + if (!ip_tunnel_flags_is_be16_compat(kp->i_flags) || + !ip_tunnel_flags_is_be16_compat(kp->o_flags)) + return false; + strscpy(p.name, kp->name); p.link = kp->link; - p.i_flags = kp->i_flags; - p.o_flags = kp->o_flags; + p.i_flags = ip_tunnel_flags_to_be16(kp->i_flags); + p.o_flags = ip_tunnel_flags_to_be16(kp->o_flags); p.i_key = kp->i_key; p.o_key = kp->o_key; memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph))); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 98f136b07191..a3676155be78 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -125,6 +125,7 @@ EXPORT_SYMBOL_GPL(__iptunnel_pull_header); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags) { + IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { }; struct metadata_dst *res; struct ip_tunnel_info *dst, *src; @@ -144,10 +145,10 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, sizeof(struct in6_addr)); else dst->key.u.ipv4.dst = src->key.u.ipv4.src; - dst->key.tun_flags = src->key.tun_flags; + ip_tunnel_flags_copy(dst->key.tun_flags, src->key.tun_flags); dst->mode = src->mode | IP_TUNNEL_INFO_TX; ip_tunnel_info_opts_set(dst, ip_tunnel_info_opts(src), - src->options_len, 0); + src->options_len, tun_flags); return res; } @@ -497,7 +498,7 @@ static int ip_tun_parse_opts_geneve(struct nlattr *attr, opt->opt_class = nla_get_be16(attr); attr = tb[LWTUNNEL_IP_OPT_GENEVE_TYPE]; opt->type = nla_get_u8(attr); - info->key.tun_flags |= TUNNEL_GENEVE_OPT; + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags); } return sizeof(struct geneve_opt) + data_len; @@ -525,7 +526,7 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr, attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP]; md->gbp = nla_get_u32(attr); md->gbp &= VXLAN_GBP_MASK; - info->key.tun_flags |= TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags); } return sizeof(struct vxlan_metadata); @@ -574,7 +575,7 @@ static int ip_tun_parse_opts_erspan(struct nlattr *attr, set_hwid(&md->u.md2, nla_get_u8(attr)); } - info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags); } return sizeof(struct erspan_metadata); @@ -585,7 +586,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, { int err, rem, opt_len, opts_len = 0; struct nlattr *nla; - __be16 type = 0; + u32 type = 0; if (!attr) return 0; @@ -598,7 +599,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) { switch (nla_type(nla)) { case LWTUNNEL_IP_OPTS_GENEVE: - if (type && type != TUNNEL_GENEVE_OPT) + if (type && type != IP_TUNNEL_GENEVE_OPT_BIT) return -EINVAL; opt_len = ip_tun_parse_opts_geneve(nla, info, opts_len, extack); @@ -607,7 +608,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, opts_len += opt_len; if (opts_len > IP_TUNNEL_OPTS_MAX) return -EINVAL; - type = TUNNEL_GENEVE_OPT; + type = IP_TUNNEL_GENEVE_OPT_BIT; break; case LWTUNNEL_IP_OPTS_VXLAN: if (type) @@ -617,7 +618,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, if (opt_len < 0) return opt_len; opts_len += opt_len; - type = TUNNEL_VXLAN_OPT; + type = IP_TUNNEL_VXLAN_OPT_BIT; break; case LWTUNNEL_IP_OPTS_ERSPAN: if (type) @@ -627,7 +628,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, if (opt_len < 0) return opt_len; opts_len += opt_len; - type = TUNNEL_ERSPAN_OPT; + type = IP_TUNNEL_ERSPAN_OPT_BIT; break; default: return -EINVAL; @@ -705,10 +706,16 @@ static int ip_tun_build_state(struct net *net, struct nlattr *attr, if (tb[LWTUNNEL_IP_TOS]) tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]); - if (tb[LWTUNNEL_IP_FLAGS]) - tun_info->key.tun_flags |= - (nla_get_be16(tb[LWTUNNEL_IP_FLAGS]) & - ~TUNNEL_OPTIONS_PRESENT); + if (tb[LWTUNNEL_IP_FLAGS]) { + IP_TUNNEL_DECLARE_FLAGS(flags); + + ip_tunnel_flags_from_be16(flags, + nla_get_be16(tb[LWTUNNEL_IP_FLAGS])); + ip_tunnel_clear_options_present(flags); + + ip_tunnel_flags_or(tun_info->key.tun_flags, + tun_info->key.tun_flags, flags); + } tun_info->mode = IP_TUNNEL_INFO_TX; tun_info->options_len = opt_len; @@ -812,18 +819,18 @@ static int ip_tun_fill_encap_opts(struct sk_buff *skb, int type, struct nlattr *nest; int err = 0; - if (!(tun_info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)) + if (!ip_tunnel_is_options_present(tun_info->key.tun_flags)) return 0; nest = nla_nest_start_noflag(skb, type); if (!nest) return -ENOMEM; - if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_info->key.tun_flags)) err = ip_tun_fill_encap_opts_geneve(skb, tun_info); - else if (tun_info->key.tun_flags & TUNNEL_VXLAN_OPT) + else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_info->key.tun_flags)) err = ip_tun_fill_encap_opts_vxlan(skb, tun_info); - else if (tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT) + else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags)) err = ip_tun_fill_encap_opts_erspan(skb, tun_info); if (err) { @@ -846,7 +853,8 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || - nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags) || + nla_put_be16(skb, LWTUNNEL_IP_FLAGS, + ip_tunnel_flags_to_be16(tun_info->key.tun_flags)) || ip_tun_fill_encap_opts(skb, LWTUNNEL_IP_OPTS, tun_info)) return -ENOMEM; @@ -857,11 +865,11 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info) { int opt_len; - if (!(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)) + if (!ip_tunnel_is_options_present(info->key.tun_flags)) return 0; opt_len = nla_total_size(0); /* LWTUNNEL_IP_OPTS */ - if (info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) { struct geneve_opt *opt; int offset = 0; @@ -874,10 +882,10 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info) /* OPT_GENEVE_DATA */ offset += sizeof(*opt) + opt->length * 4; } - } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { + } else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) { opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */ + nla_total_size(4); /* OPT_VXLAN_GBP */ - } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) { + } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) { struct erspan_metadata *md = ip_tunnel_info_opts(info); opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_ERSPAN */ @@ -984,10 +992,17 @@ static int ip6_tun_build_state(struct net *net, struct nlattr *attr, if (tb[LWTUNNEL_IP6_TC]) tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]); - if (tb[LWTUNNEL_IP6_FLAGS]) - tun_info->key.tun_flags |= - (nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]) & - ~TUNNEL_OPTIONS_PRESENT); + if (tb[LWTUNNEL_IP6_FLAGS]) { + IP_TUNNEL_DECLARE_FLAGS(flags); + __be16 data; + + data = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]); + ip_tunnel_flags_from_be16(flags, data); + ip_tunnel_clear_options_present(flags); + + ip_tunnel_flags_or(tun_info->key.tun_flags, + tun_info->key.tun_flags, flags); + } tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6; tun_info->options_len = opt_len; @@ -1008,7 +1023,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb, nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) || - nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags) || + nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, + ip_tunnel_flags_to_be16(tun_info->key.tun_flags)) || ip_tun_fill_encap_opts(skb, LWTUNNEL_IP6_OPTS, tun_info)) return -ENOMEM; @@ -1139,8 +1155,12 @@ void ip_tunnel_netlink_parms(struct nlattr *data[], if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) parms->iph.frag_off = htons(IP_DF); - if (data[IFLA_IPTUN_FLAGS]) - parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); + if (data[IFLA_IPTUN_FLAGS]) { + __be16 flags; + + flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); + ip_tunnel_flags_from_be16(parms->i_flags, flags); + } if (data[IFLA_IPTUN_PROTO]) parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 7e595f619615..14536da9f5dc 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -51,8 +51,11 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, const struct iphdr *iph = ip_hdr(skb); struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + __set_bit(IP_TUNNEL_NO_KEY_BIT, flags); + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr, iph->daddr, 0); if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) @@ -322,8 +325,11 @@ static int vti4_err(struct sk_buff *skb, u32 info) const struct iphdr *iph = (const struct iphdr *)skb->data; int protocol = iph->protocol; struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; + + __set_bit(IP_TUNNEL_NO_KEY_BIT, flags); - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->daddr, iph->saddr, 0); if (!tunnel) return -1; @@ -375,6 +381,7 @@ static int vti4_err(struct sk_buff *skb, u32 info) static int vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) { + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; int err = 0; if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { @@ -383,20 +390,26 @@ vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) return -EINVAL; } - if (!(p->i_flags & GRE_KEY)) + if (!ip_tunnel_flags_is_be16_compat(p->i_flags) || + !ip_tunnel_flags_is_be16_compat(p->o_flags)) + return -EOVERFLOW; + + if (!(ip_tunnel_flags_to_be16(p->i_flags) & GRE_KEY)) p->i_key = 0; - if (!(p->o_flags & GRE_KEY)) + if (!(ip_tunnel_flags_to_be16(p->o_flags) & GRE_KEY)) p->o_key = 0; - p->i_flags = VTI_ISVTI; + __set_bit(IP_TUNNEL_VTI_BIT, flags); + ip_tunnel_flags_copy(p->i_flags, flags); err = ip_tunnel_ctl(dev, p, cmd); if (err) return err; if (cmd != SIOCDELTUNNEL) { - p->i_flags |= GRE_KEY; - p->o_flags |= GRE_KEY; + ip_tunnel_flags_from_be16(flags, GRE_KEY); + ip_tunnel_flags_or(p->i_flags, p->i_flags, flags); + ip_tunnel_flags_or(p->o_flags, p->o_flags, flags); } return 0; } @@ -541,7 +554,7 @@ static void vti_netlink_parms(struct nlattr *data[], if (!data) return; - parms->i_flags = VTI_ISVTI; + __set_bit(IP_TUNNEL_VTI_BIT, parms->i_flags); if (data[IFLA_VTI_LINK]) parms->link = nla_get_u32(data[IFLA_VTI_LINK]); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index db960cd08a3b..923a2ef68c2f 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -130,13 +130,16 @@ static int ipip_err(struct sk_buff *skb, u32 info) struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); const struct iphdr *iph = (const struct iphdr *)skb->data; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; int err = 0; - t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->daddr, iph->saddr, 0); + __set_bit(IP_TUNNEL_NO_KEY_BIT, flags); + + t = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->daddr, + iph->saddr, 0); if (!t) { err = -ENOENT; goto out; @@ -213,13 +216,16 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) { struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct metadata_dst *tun_dst = NULL; struct ip_tunnel *tunnel; const struct iphdr *iph; + __set_bit(IP_TUNNEL_NO_KEY_BIT, flags); + iph = ip_hdr(skb); - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->saddr, iph->daddr, 0); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr, + iph->daddr, 0); if (tunnel) { const struct tnl_ptk_info *tpi; @@ -238,7 +244,9 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; if (tunnel->collect_md) { - tun_dst = ip_tun_rx_dst(skb, 0, 0, 0); + ip_tunnel_flags_zero(flags); + + tun_dst = ip_tun_rx_dst(skb, flags, 0, 0); if (!tun_dst) return 0; ip_tunnel_md_udp_encap(skb, &tun_dst->u.tun_info); @@ -340,7 +348,8 @@ ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd) } p->i_key = p->o_key = 0; - p->i_flags = p->o_flags = 0; + ip_tunnel_flags_zero(p->i_flags); + ip_tunnel_flags_zero(p->o_flags); return ip_tunnel_ctl(dev, p, cmd); } diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 860aff5f8599..e4e0fa869fa4 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -183,7 +183,8 @@ void udp_tunnel_sock_release(struct socket *sock) EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, - __be16 flags, __be64 tunnel_id, int md_size) + const unsigned long *flags, + __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; struct ip_tunnel_info *info; @@ -199,7 +200,7 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, info->key.tp_src = udp_hdr(skb)->source; info->key.tp_dst = udp_hdr(skb)->dest; if (udp_hdr(skb)->check) - info->key.tun_flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); return tun_dst; } EXPORT_SYMBOL_GPL(udp_tun_rx_dst); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ca7e77e84283..2146cf69d8e2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -496,11 +496,11 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) tpi->proto); if (tunnel) { if (tunnel->parms.collect_md) { + IP_TUNNEL_DECLARE_FLAGS(flags); struct metadata_dst *tun_dst; __be64 tun_id; - __be16 flags; - flags = tpi->flags; + ip_tunnel_flags_copy(flags, tpi->flags); tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0); @@ -548,14 +548,14 @@ static int ip6erspan_rcv(struct sk_buff *skb, if (tunnel->parms.collect_md) { struct erspan_metadata *pkt_md, *md; + IP_TUNNEL_DECLARE_FLAGS(flags); struct metadata_dst *tun_dst; struct ip_tunnel_info *info; unsigned char *gh; __be64 tun_id; - __be16 flags; - tpi->flags |= TUNNEL_KEY; - flags = tpi->flags; + __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags); + ip_tunnel_flags_copy(flags, tpi->flags); tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, @@ -577,7 +577,8 @@ static int ip6erspan_rcv(struct sk_buff *skb, md2 = &md->u.md2; memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE); - info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + info->key.tun_flags); info->options_len = sizeof(*md); ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); @@ -745,8 +746,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, __u32 *pmtu, __be16 proto) { struct ip6_tnl *tunnel = netdev_priv(dev); + IP_TUNNEL_DECLARE_FLAGS(flags); __be16 protocol; - __be16 flags; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -778,8 +779,11 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; - flags = key->tun_flags & - (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); + ip_tunnel_flags_zero(flags); + __set_bit(IP_TUNNEL_CSUM_BIT, flags); + __set_bit(IP_TUNNEL_KEY_BIT, flags); + __set_bit(IP_TUNNEL_SEQ_BIT, flags); + ip_tunnel_flags_and(flags, flags, key->tun_flags); tun_hlen = gre_calc_hlen(flags); if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen)) @@ -788,19 +792,21 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) - : 0); + test_bit(IP_TUNNEL_SEQ_BIT, flags) ? + htonl(atomic_fetch_inc(&tunnel->o_seqno)) : + 0); } else { if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) return -ENOMEM; - flags = tunnel->parms.o_flags; + ip_tunnel_flags_copy(flags, tunnel->parms.o_flags); gre_build_header(skb, tunnel->tun_hlen, flags, protocol, tunnel->parms.o_key, - (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) - : 0); + test_bit(IP_TUNNEL_SEQ_BIT, flags) ? + htonl(atomic_fetch_inc(&tunnel->o_seqno)) : + 0); } return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, @@ -822,7 +828,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, &dsfield, &encap_limit); - err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); + err = gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, + t->parms.o_flags)); if (err) return -1; @@ -856,7 +863,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit)) return -1; - if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) + if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, + t->parms.o_flags))) return -1; err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, @@ -883,7 +891,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) prepare_ip6gre_xmit_other(skb, dev, &fl6, &dsfield, &encap_limit)) return -1; - err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); + err = gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT, + t->parms.o_flags)); if (err) return err; err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, skb->protocol); @@ -936,6 +945,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, struct ip_tunnel_info *tun_info = NULL; struct ip6_tnl *t = netdev_priv(dev); struct dst_entry *dst = skb_dst(skb); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; bool truncate = false; int encap_limit = -1; __u8 dsfield = false; @@ -979,7 +989,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen)) goto tx_err; - t->parms.o_flags &= ~TUNNEL_KEY; + __clear_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags); IPCB(skb)->flags = 0; /* For collect_md mode, derive fl6 from the tunnel key, @@ -1004,7 +1014,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; - if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) + if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + tun_info->key.tun_flags)) goto tx_err; if (tun_info->options_len < sizeof(*md)) goto tx_err; @@ -1065,7 +1076,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, } /* Push GRE header. */ - gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno))); + __set_bit(IP_TUNNEL_SEQ_BIT, flags); + gre_build_header(skb, 8, flags, proto, 0, + htonl(atomic_fetch_inc(&t->o_seqno))); /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) @@ -1208,8 +1221,8 @@ static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t, t->parms.proto = p->proto; t->parms.i_key = p->i_key; t->parms.o_key = p->o_key; - t->parms.i_flags = p->i_flags; - t->parms.o_flags = p->o_flags; + ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags); + ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags); t->parms.fwmark = p->fwmark; t->parms.erspan_ver = p->erspan_ver; t->parms.index = p->index; @@ -1238,8 +1251,8 @@ static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p, p->link = u->link; p->i_key = u->i_key; p->o_key = u->o_key; - p->i_flags = gre_flags_to_tnl_flags(u->i_flags); - p->o_flags = gre_flags_to_tnl_flags(u->o_flags); + gre_flags_to_tnl_flags(p->i_flags, u->i_flags); + gre_flags_to_tnl_flags(p->o_flags, u->o_flags); memcpy(p->name, u->name, sizeof(u->name)); } @@ -1391,7 +1404,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, ipv6h->daddr = t->parms.raddr; p = (__be16 *)(ipv6h + 1); - p[0] = t->parms.o_flags; + p[0] = ip_tunnel_flags_to_be16(t->parms.o_flags); p[1] = htons(type); /* @@ -1455,19 +1468,17 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static void ip6gre_tnl_init_features(struct net_device *dev) { struct ip6_tnl *nt = netdev_priv(dev); - __be16 flags; dev->features |= GRE6_FEATURES | NETIF_F_LLTX; dev->hw_features |= GRE6_FEATURES; - flags = nt->parms.o_flags; - /* TCP offload with GRE SEQ is not supported, nor can we support 2 * levels of outer headers requiring an update. */ - if (flags & TUNNEL_SEQ) + if (test_bit(IP_TUNNEL_SEQ_BIT, nt->parms.o_flags)) return; - if (flags & TUNNEL_CSUM && nt->encap.type != TUNNEL_ENCAP_NONE) + if (test_bit(IP_TUNNEL_CSUM_BIT, nt->parms.o_flags) && + nt->encap.type != TUNNEL_ENCAP_NONE) return; dev->features |= NETIF_F_GSO_SOFTWARE; @@ -1792,12 +1803,12 @@ static void ip6gre_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_GRE_LINK]); if (data[IFLA_GRE_IFLAGS]) - parms->i_flags = gre_flags_to_tnl_flags( - nla_get_be16(data[IFLA_GRE_IFLAGS])); + gre_flags_to_tnl_flags(parms->i_flags, + nla_get_be16(data[IFLA_GRE_IFLAGS])); if (data[IFLA_GRE_OFLAGS]) - parms->o_flags = gre_flags_to_tnl_flags( - nla_get_be16(data[IFLA_GRE_OFLAGS])); + gre_flags_to_tnl_flags(parms->o_flags, + nla_get_be16(data[IFLA_GRE_OFLAGS])); if (data[IFLA_GRE_IKEY]) parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); @@ -2144,11 +2155,13 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm *p = &t->parms; - __be16 o_flags = p->o_flags; + IP_TUNNEL_DECLARE_FLAGS(o_flags); + + ip_tunnel_flags_copy(o_flags, p->o_flags); if (p->erspan_ver == 1 || p->erspan_ver == 2) { if (!p->collect_md) - o_flags |= TUNNEL_KEY; + __set_bit(IP_TUNNEL_KEY_BIT, o_flags); if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver)) goto nla_put_failure; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e9cc315832cb..57bb3b3ea0c5 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -798,17 +798,15 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, const struct ipv6hdr *ipv6h; int nh, err; - if ((!(tpi->flags & TUNNEL_CSUM) && - (tunnel->parms.i_flags & TUNNEL_CSUM)) || - ((tpi->flags & TUNNEL_CSUM) && - !(tunnel->parms.i_flags & TUNNEL_CSUM))) { + if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) != + test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) { DEV_STATS_INC(tunnel->dev, rx_crc_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } - if (tunnel->parms.i_flags & TUNNEL_SEQ) { - if (!(tpi->flags & TUNNEL_SEQ) || + if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) { + if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) || (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { DEV_STATS_INC(tunnel->dev, rx_fifo_errors); @@ -946,7 +944,9 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; if (t->parms.collect_md) { - tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; + + tun_dst = ipv6_tun_rx_dst(skb, flags, 0, 0); if (!tun_dst) goto drop; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 894e6a4c4be3..83b195f09561 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -207,7 +207,7 @@ static int ipip6_tunnel_create(struct net_device *dev) __dev_addr_set(dev, &t->parms.iph.saddr, 4); memcpy(dev->broadcast, &t->parms.iph.daddr, 4); - if ((__force u16)t->parms.i_flags & SIT_ISATAP) + if (test_bit(IP_TUNNEL_SIT_ISATAP_BIT, t->parms.i_flags)) dev->priv_flags |= IFF_ISATAP; dev->rtnl_link_ops = &sit_link_ops; @@ -1700,7 +1700,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) || - nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags) || + nla_put_be16(skb, IFLA_IPTUN_FLAGS, + ip_tunnel_flags_to_be16(parm->i_flags)) || nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark)) goto nla_put_failure; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index a2c16b501087..c7a8a08b7308 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1550,6 +1550,7 @@ static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb, if (!dest) goto unk; if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + IP_TUNNEL_DECLARE_FLAGS(flags); __be16 type; /* Only support version 0 and C (csum) */ @@ -1560,7 +1561,10 @@ static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb, if (type != htons(ETH_P_IP)) goto unk; *proto = IPPROTO_IPIP; - return gre_calc_hlen(gre_flags_to_tnl_flags(greh->flags)); + + gre_flags_to_tnl_flags(flags, greh->flags); + + return gre_calc_hlen(flags); } unk: diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 65e0259178da..39b5fd6bbf65 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -390,10 +390,10 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, skb->ip_summed == CHECKSUM_PARTIAL) mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV; } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { - __be16 tflags = 0; + IP_TUNNEL_DECLARE_FLAGS(tflags) = { }; if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) - tflags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tflags); mtu -= gre_calc_hlen(tflags); } if (mtu < 68) { @@ -553,10 +553,10 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, skb->ip_summed == CHECKSUM_PARTIAL) mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV; } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { - __be16 tflags = 0; + IP_TUNNEL_DECLARE_FLAGS(tflags) = { }; if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) - tflags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tflags); mtu -= gre_calc_hlen(tflags); } if (mtu < IPV6_MIN_MTU) { @@ -1082,11 +1082,11 @@ ipvs_gre_encap(struct net *net, struct sk_buff *skb, { __be16 proto = *next_protocol == IPPROTO_IPIP ? htons(ETH_P_IP) : htons(ETH_P_IPV6); - __be16 tflags = 0; + IP_TUNNEL_DECLARE_FLAGS(tflags) = { }; size_t hdrlen; if (cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) - tflags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tflags); hdrlen = gre_calc_hlen(tflags); gre_build_header(skb, hdrlen, tflags, proto, 0, 0); @@ -1165,11 +1165,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, max_headroom += sizeof(struct udphdr) + gue_hdrlen; } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + IP_TUNNEL_DECLARE_FLAGS(tflags) = { }; size_t gre_hdrlen; - __be16 tflags = 0; if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) - tflags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tflags); gre_hdrlen = gre_calc_hlen(tflags); max_headroom += gre_hdrlen; @@ -1310,11 +1310,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, max_headroom += sizeof(struct udphdr) + gue_hdrlen; } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + IP_TUNNEL_DECLARE_FLAGS(tflags) = { }; size_t gre_hdrlen; - __be16 tflags = 0; if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) - tflags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tflags); gre_hdrlen = gre_calc_hlen(tflags); max_headroom += gre_hdrlen; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index f735d79d8be5..60a76e6e348e 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -174,8 +174,8 @@ struct nft_tunnel_opts { struct erspan_metadata erspan; u8 data[IP_TUNNEL_OPTS_MAX]; } u; + IP_TUNNEL_DECLARE_FLAGS(flags); u32 len; - __be16 flags; }; struct nft_tunnel_obj { @@ -271,7 +271,8 @@ static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr, opts->u.vxlan.gbp = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_VXLAN_GBP])); opts->len = sizeof(struct vxlan_metadata); - opts->flags = TUNNEL_VXLAN_OPT; + ip_tunnel_flags_zero(opts->flags); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, opts->flags); return 0; } @@ -325,7 +326,8 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr, opts->u.erspan.version = version; opts->len = sizeof(struct erspan_metadata); - opts->flags = TUNNEL_ERSPAN_OPT; + ip_tunnel_flags_zero(opts->flags); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, opts->flags); return 0; } @@ -366,7 +368,8 @@ static int nft_tunnel_obj_geneve_init(const struct nlattr *attr, opt->length = data_len / 4; opt->opt_class = nla_get_be16(tb[NFTA_TUNNEL_KEY_GENEVE_CLASS]); opt->type = nla_get_u8(tb[NFTA_TUNNEL_KEY_GENEVE_TYPE]); - opts->flags = TUNNEL_GENEVE_OPT; + ip_tunnel_flags_zero(opts->flags); + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, opts->flags); return 0; } @@ -385,8 +388,8 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx, struct nft_tunnel_opts *opts) { struct nlattr *nla; - __be16 type = 0; int err, rem; + u32 type = 0; err = nla_validate_nested_deprecated(attr, NFTA_TUNNEL_KEY_OPTS_MAX, nft_tunnel_opts_policy, NULL); @@ -401,7 +404,7 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx, err = nft_tunnel_obj_vxlan_init(nla, opts); if (err) return err; - type = TUNNEL_VXLAN_OPT; + type = IP_TUNNEL_VXLAN_OPT_BIT; break; case NFTA_TUNNEL_KEY_OPTS_ERSPAN: if (type) @@ -409,15 +412,15 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx, err = nft_tunnel_obj_erspan_init(nla, opts); if (err) return err; - type = TUNNEL_ERSPAN_OPT; + type = IP_TUNNEL_ERSPAN_OPT_BIT; break; case NFTA_TUNNEL_KEY_OPTS_GENEVE: - if (type && type != TUNNEL_GENEVE_OPT) + if (type && type != IP_TUNNEL_GENEVE_OPT_BIT) return -EINVAL; err = nft_tunnel_obj_geneve_init(nla, opts); if (err) return err; - type = TUNNEL_GENEVE_OPT; + type = IP_TUNNEL_GENEVE_OPT_BIT; break; default: return -EOPNOTSUPP; @@ -454,7 +457,9 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx, memset(&info, 0, sizeof(info)); info.mode = IP_TUNNEL_INFO_TX; info.key.tun_id = key32_to_tunnel_id(nla_get_be32(tb[NFTA_TUNNEL_KEY_ID])); - info.key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; + __set_bit(IP_TUNNEL_KEY_BIT, info.key.tun_flags); + __set_bit(IP_TUNNEL_CSUM_BIT, info.key.tun_flags); + __set_bit(IP_TUNNEL_NOCACHE_BIT, info.key.tun_flags); if (tb[NFTA_TUNNEL_KEY_IP]) { err = nft_tunnel_obj_ip_init(ctx, tb[NFTA_TUNNEL_KEY_IP], &info); @@ -483,11 +488,12 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; if (tun_flags & NFT_TUNNEL_F_ZERO_CSUM_TX) - info.key.tun_flags &= ~TUNNEL_CSUM; + __clear_bit(IP_TUNNEL_CSUM_BIT, info.key.tun_flags); if (tun_flags & NFT_TUNNEL_F_DONT_FRAGMENT) - info.key.tun_flags |= TUNNEL_DONT_FRAGMENT; + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, + info.key.tun_flags); if (tun_flags & NFT_TUNNEL_F_SEQ_NUMBER) - info.key.tun_flags |= TUNNEL_SEQ; + __set_bit(IP_TUNNEL_SEQ_BIT, info.key.tun_flags); } if (tb[NFTA_TUNNEL_KEY_TOS]) info.key.tos = nla_get_u8(tb[NFTA_TUNNEL_KEY_TOS]); @@ -583,7 +589,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, if (!nest) return -1; - if (opts->flags & TUNNEL_VXLAN_OPT) { + if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, opts->flags)) { inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_VXLAN); if (!inner) goto failure; @@ -591,7 +597,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, htonl(opts->u.vxlan.gbp))) goto inner_failure; nla_nest_end(skb, inner); - } else if (opts->flags & TUNNEL_ERSPAN_OPT) { + } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, opts->flags)) { inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_ERSPAN); if (!inner) goto failure; @@ -613,7 +619,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, break; } nla_nest_end(skb, inner); - } else if (opts->flags & TUNNEL_GENEVE_OPT) { + } else if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, opts->flags)) { struct geneve_opt *opt; int offset = 0; @@ -658,11 +664,11 @@ static int nft_tunnel_flags_dump(struct sk_buff *skb, { u32 flags = 0; - if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) + if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, info->key.tun_flags)) flags |= NFT_TUNNEL_F_DONT_FRAGMENT; - if (!(info->key.tun_flags & TUNNEL_CSUM)) + if (!test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags)) flags |= NFT_TUNNEL_F_ZERO_CSUM_TX; - if (info->key.tun_flags & TUNNEL_SEQ) + if (test_bit(IP_TUNNEL_SEQ_BIT, info->key.tun_flags)) flags |= NFT_TUNNEL_F_SEQ_NUMBER; if (nla_put_be32(skb, NFTA_TUNNEL_KEY_FLAGS, htonl(flags)) < 0) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ebc5728aab4e..f224d9bcea5e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -152,6 +152,13 @@ static void update_range(struct sw_flow_match *match, sizeof((match)->key->field)); \ } while (0) +#define SW_FLOW_KEY_BITMAP_COPY(match, field, value_p, nbits, is_mask) ({ \ + update_range(match, offsetof(struct sw_flow_key, field), \ + bitmap_size(nbits), is_mask); \ + bitmap_copy(is_mask ? (match)->mask->key.field : (match)->key->field, \ + value_p, nbits); \ +}) + static bool match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs, bool log) { @@ -670,8 +677,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, bool log) { bool ttl = false, ipv4 = false, ipv6 = false; + IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { }; bool info_bridge_mode = false; - __be16 tun_flags = 0; int opts_type = 0; struct nlattr *a; int rem; @@ -697,7 +704,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_ID: SW_FLOW_KEY_PUT(match, tun_key.tun_id, nla_get_be64(a), is_mask); - tun_flags |= TUNNEL_KEY; + __set_bit(IP_TUNNEL_KEY_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src, @@ -729,10 +736,10 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, ttl = true; break; case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: - tun_flags |= TUNNEL_DONT_FRAGMENT; + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_CSUM: - tun_flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_TP_SRC: SW_FLOW_KEY_PUT(match, tun_key.tp_src, @@ -743,7 +750,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, nla_get_be16(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_OAM: - tun_flags |= TUNNEL_OAM; + __set_bit(IP_TUNNEL_OAM_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: if (opts_type) { @@ -755,7 +762,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, if (err) return err; - tun_flags |= TUNNEL_GENEVE_OPT; + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_flags); opts_type = type; break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: @@ -768,7 +775,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, if (err) return err; - tun_flags |= TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_flags); opts_type = type; break; case OVS_TUNNEL_KEY_ATTR_PAD: @@ -784,7 +791,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, if (err) return err; - tun_flags |= TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_flags); opts_type = type; break; case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE: @@ -798,7 +805,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, } } - SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); + SW_FLOW_KEY_BITMAP_COPY(match, tun_key.tun_flags, tun_flags, + __IP_TUNNEL_FLAG_NUM, is_mask); if (is_mask) SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true); else @@ -823,13 +831,15 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, } if (ipv4) { if (info_bridge_mode) { + __clear_bit(IP_TUNNEL_KEY_BIT, tun_flags); + if (match->key->tun_key.u.ipv4.src || match->key->tun_key.u.ipv4.dst || match->key->tun_key.tp_src || match->key->tun_key.tp_dst || match->key->tun_key.ttl || match->key->tun_key.tos || - tun_flags & ~TUNNEL_KEY) { + !ip_tunnel_flags_empty(tun_flags)) { OVS_NLERR(log, "IPv4 tun info is not correct"); return -EINVAL; } @@ -874,7 +884,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, const void *tun_opts, int swkey_tun_opts_len, unsigned short tun_proto, u8 mode) { - if (output->tun_flags & TUNNEL_KEY && + if (test_bit(IP_TUNNEL_KEY_BIT, output->tun_flags) && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id, OVS_TUNNEL_KEY_ATTR_PAD)) return -EMSGSIZE; @@ -910,10 +920,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, return -EMSGSIZE; if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl)) return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && + if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, output->tun_flags) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_CSUM) && + if (test_bit(IP_TUNNEL_CSUM_BIT, output->tun_flags) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; if (output->tp_src && @@ -922,18 +932,20 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, if (output->tp_dst && nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_OAM) && + if (test_bit(IP_TUNNEL_OAM_BIT, output->tun_flags) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; if (swkey_tun_opts_len) { - if (output->tun_flags & TUNNEL_GENEVE_OPT && + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, output->tun_flags) && nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, swkey_tun_opts_len, tun_opts)) return -EMSGSIZE; - else if (output->tun_flags & TUNNEL_VXLAN_OPT && + else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, + output->tun_flags) && vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) return -EMSGSIZE; - else if (output->tun_flags & TUNNEL_ERSPAN_OPT && + else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + output->tun_flags) && nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, swkey_tun_opts_len, tun_opts)) return -EMSGSIZE; @@ -2029,7 +2041,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if ((swkey->tun_proto || is_mask)) { const void *opts = NULL; - if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) + if (ip_tunnel_is_options_present(output->tun_key.tun_flags)) opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); if (ip_tun_to_nlattr(skb, &output->tun_key, opts, @@ -2752,7 +2764,8 @@ static int validate_geneve_opts(struct sw_flow_key *key) opts_len -= len; } - key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + if (crit_opt) + __set_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_key.tun_flags); return 0; } @@ -2760,6 +2773,7 @@ static int validate_geneve_opts(struct sw_flow_key *key) static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_actions **sfa, bool log) { + IP_TUNNEL_DECLARE_FLAGS(dst_opt_type) = { }; struct sw_flow_match match; struct sw_flow_key key; struct metadata_dst *tun_dst; @@ -2767,9 +2781,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct ovs_tunnel_info *ovs_tun; struct nlattr *a; int err = 0, start, opts_type; - __be16 dst_opt_type; - dst_opt_type = 0; ovs_match_init(&match, &key, true, NULL); opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); if (opts_type < 0) @@ -2781,13 +2793,14 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, err = validate_geneve_opts(&key); if (err < 0) return err; - dst_opt_type = TUNNEL_GENEVE_OPT; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, dst_opt_type); break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: - dst_opt_type = TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, dst_opt_type); break; case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: - dst_opt_type = TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, dst_opt_type); break; } } diff --git a/net/psample/psample.c b/net/psample/psample.c index ddd211a151d0..a5d9b8446f77 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -221,7 +221,7 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb, const struct ip_tunnel_key *tun_key = &tun_info->key; int tun_opts_len = tun_info->options_len; - if (tun_key->tun_flags & TUNNEL_KEY && + if (test_bit(IP_TUNNEL_KEY_BIT, tun_key->tun_flags) && nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id, PSAMPLE_TUNNEL_KEY_ATTR_PAD)) return -EMSGSIZE; @@ -257,10 +257,10 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb, return -EMSGSIZE; if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl)) return -EMSGSIZE; - if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && + if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_key->tun_flags) && nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) return -EMSGSIZE; - if ((tun_key->tun_flags & TUNNEL_CSUM) && + if (test_bit(IP_TUNNEL_CSUM_BIT, tun_key->tun_flags) && nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; if (tun_key->tp_src && @@ -269,15 +269,16 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb, if (tun_key->tp_dst && nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst)) return -EMSGSIZE; - if ((tun_key->tun_flags & TUNNEL_OAM) && + if (test_bit(IP_TUNNEL_OAM_BIT, tun_key->tun_flags) && nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; if (tun_opts_len) { - if (tun_key->tun_flags & TUNNEL_GENEVE_OPT && + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_key->tun_flags) && nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS, tun_opts_len, tun_opts)) return -EMSGSIZE; - else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT && + else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + tun_key->tun_flags) && nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS, tun_opts_len, tun_opts)) return -EMSGSIZE; @@ -314,7 +315,7 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) int tun_opts_len = tun_info->options_len; int sum = nla_total_size(0); /* PSAMPLE_ATTR_TUNNEL */ - if (tun_key->tun_flags & TUNNEL_KEY) + if (test_bit(IP_TUNNEL_KEY_BIT, tun_key->tun_flags)) sum += nla_total_size_64bit(sizeof(u64)); if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE) @@ -337,20 +338,21 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) if (tun_key->tos) sum += nla_total_size(sizeof(u8)); sum += nla_total_size(sizeof(u8)); /* TTL */ - if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) + if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_key->tun_flags)) sum += nla_total_size(0); - if (tun_key->tun_flags & TUNNEL_CSUM) + if (test_bit(IP_TUNNEL_CSUM_BIT, tun_key->tun_flags)) sum += nla_total_size(0); if (tun_key->tp_src) sum += nla_total_size(sizeof(u16)); if (tun_key->tp_dst) sum += nla_total_size(sizeof(u16)); - if (tun_key->tun_flags & TUNNEL_OAM) + if (test_bit(IP_TUNNEL_OAM_BIT, tun_key->tun_flags)) sum += nla_total_size(0); if (tun_opts_len) { - if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_key->tun_flags)) sum += nla_total_size(tun_opts_len); - else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT) + else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, + tun_key->tun_flags)) sum += nla_total_size(tun_opts_len); } diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 1536f8b16f1b..af7c99845948 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -230,7 +230,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, nla_for_each_attr(attr, head, len, rem) { switch (nla_type(attr)) { case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE: - if (type && type != TUNNEL_GENEVE_OPT) { + if (type && type != IP_TUNNEL_GENEVE_OPT_BIT) { NL_SET_ERR_MSG(extack, "Duplicate type for geneve options"); return -EINVAL; } @@ -247,7 +247,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, dst_len -= opt_len; dst += opt_len; } - type = TUNNEL_GENEVE_OPT; + type = IP_TUNNEL_GENEVE_OPT_BIT; break; case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN: if (type) { @@ -259,7 +259,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, if (opt_len < 0) return opt_len; opts_len += opt_len; - type = TUNNEL_VXLAN_OPT; + type = IP_TUNNEL_VXLAN_OPT_BIT; break; case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN: if (type) { @@ -271,7 +271,7 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, if (opt_len < 0) return opt_len; opts_len += opt_len; - type = TUNNEL_ERSPAN_OPT; + type = IP_TUNNEL_ERSPAN_OPT_BIT; break; } } @@ -302,7 +302,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info, switch (nla_type(nla_data(nla))) { case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE: #if IS_ENABLED(CONFIG_INET) - info->key.tun_flags |= TUNNEL_GENEVE_OPT; + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags); return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), opts_len, extack); #else @@ -310,7 +310,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info, #endif case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN: #if IS_ENABLED(CONFIG_INET) - info->key.tun_flags |= TUNNEL_VXLAN_OPT; + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags); return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), opts_len, extack); #else @@ -318,7 +318,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info, #endif case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN: #if IS_ENABLED(CONFIG_INET) - info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags); return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info), opts_len, extack); #else @@ -363,6 +363,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; + IP_TUNNEL_DECLARE_FLAGS(flags) = { }; struct metadata_dst *metadata = NULL; struct tcf_chain *goto_ch = NULL; struct tc_tunnel_key *parm; @@ -371,7 +372,6 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, __be16 dst_port = 0; __be64 key_id = 0; int opts_len = 0; - __be16 flags = 0; u8 tos, ttl; int ret = 0; u32 index; @@ -412,16 +412,16 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, key32 = nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]); key_id = key32_to_tunnel_id(key32); - flags = TUNNEL_KEY; + __set_bit(IP_TUNNEL_KEY_BIT, flags); } - flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, flags); if (tb[TCA_TUNNEL_KEY_NO_CSUM] && nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM])) - flags &= ~TUNNEL_CSUM; + __clear_bit(IP_TUNNEL_CSUM_BIT, flags); if (nla_get_flag(tb[TCA_TUNNEL_KEY_NO_FRAG])) - flags |= TUNNEL_DONT_FRAGMENT; + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, flags); if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT]) dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); @@ -663,15 +663,15 @@ static int tunnel_key_opts_dump(struct sk_buff *skb, if (!start) return -EMSGSIZE; - if (info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) { err = tunnel_key_geneve_opts_dump(skb, info); if (err) goto err_out; - } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { + } else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) { err = tunnel_key_vxlan_opts_dump(skb, info); if (err) goto err_out; - } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) { + } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) { err = tunnel_key_erspan_opts_dump(skb, info); if (err) goto err_out; @@ -741,7 +741,7 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, struct ip_tunnel_key *key = &info->key; __be32 key_id = tunnel_id_to_key32(key->tun_id); - if (((key->tun_flags & TUNNEL_KEY) && + if ((test_bit(IP_TUNNEL_KEY_BIT, key->tun_flags) && nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id)) || tunnel_key_dump_addresses(skb, ¶ms->tcft_enc_metadata->u.tun_info) || @@ -749,8 +749,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst)) || nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM, - !(key->tun_flags & TUNNEL_CSUM)) || - ((key->tun_flags & TUNNEL_DONT_FRAGMENT) && + !test_bit(IP_TUNNEL_CSUM_BIT, key->tun_flags)) || + (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) && nla_put_flag(skb, TCA_TUNNEL_KEY_NO_FRAG)) || tunnel_key_opts_dump(skb, info)) goto nla_put_failure; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e1314674b4a9..ae14d649140f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1454,12 +1454,13 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, switch (nla_type(nla_opt_key)) { case TCA_FLOWER_KEY_ENC_OPTS_GENEVE: if (key->enc_opts.dst_opt_type && - key->enc_opts.dst_opt_type != TUNNEL_GENEVE_OPT) { + key->enc_opts.dst_opt_type != + IP_TUNNEL_GENEVE_OPT_BIT) { NL_SET_ERR_MSG(extack, "Duplicate type for geneve options"); return -EINVAL; } option_len = 0; - key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + key->enc_opts.dst_opt_type = IP_TUNNEL_GENEVE_OPT_BIT; option_len = fl_set_geneve_opt(nla_opt_key, key, key_depth, option_len, extack); @@ -1470,7 +1471,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, /* At the same time we need to parse through the mask * in order to verify exact and mask attribute lengths. */ - mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + mask->enc_opts.dst_opt_type = IP_TUNNEL_GENEVE_OPT_BIT; option_len = fl_set_geneve_opt(nla_opt_msk, mask, msk_depth, option_len, extack); @@ -1489,7 +1490,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return -EINVAL; } option_len = 0; - key->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT; + key->enc_opts.dst_opt_type = IP_TUNNEL_VXLAN_OPT_BIT; option_len = fl_set_vxlan_opt(nla_opt_key, key, key_depth, option_len, extack); @@ -1500,7 +1501,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, /* At the same time we need to parse through the mask * in order to verify exact and mask attribute lengths. */ - mask->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT; + mask->enc_opts.dst_opt_type = IP_TUNNEL_VXLAN_OPT_BIT; option_len = fl_set_vxlan_opt(nla_opt_msk, mask, msk_depth, option_len, extack); @@ -1519,7 +1520,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return -EINVAL; } option_len = 0; - key->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT; + key->enc_opts.dst_opt_type = IP_TUNNEL_ERSPAN_OPT_BIT; option_len = fl_set_erspan_opt(nla_opt_key, key, key_depth, option_len, extack); @@ -1530,7 +1531,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, /* At the same time we need to parse through the mask * in order to verify exact and mask attribute lengths. */ - mask->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT; + mask->enc_opts.dst_opt_type = IP_TUNNEL_ERSPAN_OPT_BIT; option_len = fl_set_erspan_opt(nla_opt_msk, mask, msk_depth, option_len, extack); @@ -1550,7 +1551,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return -EINVAL; } option_len = 0; - key->enc_opts.dst_opt_type = TUNNEL_GTP_OPT; + key->enc_opts.dst_opt_type = IP_TUNNEL_GTP_OPT_BIT; option_len = fl_set_gtp_opt(nla_opt_key, key, key_depth, option_len, extack); @@ -1561,7 +1562,7 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, /* At the same time we need to parse through the mask * in order to verify exact and mask attribute lengths. */ - mask->enc_opts.dst_opt_type = TUNNEL_GTP_OPT; + mask->enc_opts.dst_opt_type = IP_TUNNEL_GTP_OPT_BIT; option_len = fl_set_gtp_opt(nla_opt_msk, mask, msk_depth, option_len, extack); @@ -3202,22 +3203,22 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, goto nla_put_failure; switch (enc_opts->dst_opt_type) { - case TUNNEL_GENEVE_OPT: + case IP_TUNNEL_GENEVE_OPT_BIT: err = fl_dump_key_geneve_opt(skb, enc_opts); if (err) goto nla_put_failure; break; - case TUNNEL_VXLAN_OPT: + case IP_TUNNEL_VXLAN_OPT_BIT: err = fl_dump_key_vxlan_opt(skb, enc_opts); if (err) goto nla_put_failure; break; - case TUNNEL_ERSPAN_OPT: + case IP_TUNNEL_ERSPAN_OPT_BIT: err = fl_dump_key_erspan_opt(skb, enc_opts); if (err) goto nla_put_failure; break; - case TUNNEL_GTP_OPT: + case IP_TUNNEL_GTP_OPT_BIT: err = fl_dump_key_gtp_opt(skb, enc_opts); if (err) goto nla_put_failure; -- cgit v1.2.3 From 2312dfdfab34884b0403c8a370a0e12910fe41ef Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Wed, 27 Mar 2024 16:23:57 +0100 Subject: ice: refactor ICE_TC_FLWR_FIELD_ENC_OPTS FLOW_DISSECTOR_KEY_ENC_OPTS can be used for multiple headers, but currently it is treated as GTP-exclusive in ice. Rename ICE_TC_FLWR_FIELD_ENC_OPTS to ICE_TC_FLWR_FIELD_GTP_OPTS and check for tunnel type earlier. After this refactor, it is easier to add new headers using FLOW_DISSECTOR_KEY_ENC_OPTS - instead of checking tunnel type in ice_tc_count_lkups() and ice_tc_fill_tunnel_outer(), it needs to be checked only once, in ice_parse_tunnel_attr(). Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 10 +++++----- drivers/net/ethernet/intel/ice/ice_tc_lib.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index b890410a2bc0..80797db9f2b9 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -35,7 +35,7 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, if (flags & ICE_TC_FLWR_FIELD_ENC_DST_MAC) lkups_cnt++; - if (flags & ICE_TC_FLWR_FIELD_ENC_OPTS) + if (flags & ICE_TC_FLWR_FIELD_GTP_OPTS) lkups_cnt++; if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | @@ -219,8 +219,7 @@ ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr, i++; } - if (flags & ICE_TC_FLWR_FIELD_ENC_OPTS && - (fltr->tunnel_type == TNL_GTPU || fltr->tunnel_type == TNL_GTPC)) { + if (flags & ICE_TC_FLWR_FIELD_GTP_OPTS) { list[i].type = ice_proto_type_from_tunnel(fltr->tunnel_type); if (fltr->gtp_pdu_info_masks.pdu_type) { @@ -1401,7 +1400,8 @@ ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule, } } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS) && + (fltr->tunnel_type == TNL_GTPU || fltr->tunnel_type == TNL_GTPC)) { struct flow_match_enc_opts match; flow_rule_match_enc_opts(rule, &match); @@ -1412,7 +1412,7 @@ ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule, memcpy(&fltr->gtp_pdu_info_masks, &match.mask->data[0], sizeof(struct gtp_pdu_session_info)); - fltr->flags |= ICE_TC_FLWR_FIELD_ENC_OPTS; + fltr->flags |= ICE_TC_FLWR_FIELD_GTP_OPTS; } return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index 65d387163a46..5d188ad7517a 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -22,7 +22,7 @@ #define ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT BIT(15) #define ICE_TC_FLWR_FIELD_ENC_DST_MAC BIT(16) #define ICE_TC_FLWR_FIELD_ETH_TYPE_ID BIT(17) -#define ICE_TC_FLWR_FIELD_ENC_OPTS BIT(18) +#define ICE_TC_FLWR_FIELD_GTP_OPTS BIT(18) #define ICE_TC_FLWR_FIELD_CVLAN BIT(19) #define ICE_TC_FLWR_FIELD_PPPOE_SESSID BIT(20) #define ICE_TC_FLWR_FIELD_PPP_PROTO BIT(21) -- cgit v1.2.3 From 784feaa65dfd2695f837842bcd151db0add4cb17 Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Wed, 27 Mar 2024 16:23:58 +0100 Subject: ice: Add support for PFCP hardware offload in switchdev Add support for creating PFCP filters in switchdev mode. Add support for parsing PFCP-specific tc options: S flag and SEID. To create a PFCP filter, a special netdev must be created and passed to tc command: ip link add pfcp0 type pfcp tc filter add dev eth0 ingress prio 1 flower pfcp_opts \ 1:123/ff:fffffffffffffff0 skip_hw action mirred egress redirect \ dev pfcp0 Changes in iproute2 [1] are required to be able to use pfcp_opts in tc. ICE COMMS package is required to create a filter as it contains PFCP profiles. Link: https://lore.kernel.org/netdev/20230614091758.11180-1-marcin.szycik@linux.intel.com [1] Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_ddp.c | 9 +++ drivers/net/ethernet/intel/ice/ice_flex_type.h | 4 +- drivers/net/ethernet/intel/ice/ice_protocol_type.h | 12 +++ drivers/net/ethernet/intel/ice/ice_switch.c | 85 ++++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_switch.h | 2 + drivers/net/ethernet/intel/ice/ice_tc_lib.c | 58 +++++++++++++-- drivers/net/ethernet/intel/ice/ice_tc_lib.h | 6 ++ 7 files changed, 169 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index fc91c4d41186..2ffa11fa2df1 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -721,6 +721,12 @@ static bool ice_is_gtp_c_profile(u16 prof_idx) } } +static bool ice_is_pfcp_profile(u16 prof_idx) +{ + return prof_idx >= ICE_PROFID_IPV4_PFCP_NODE && + prof_idx <= ICE_PROFID_IPV6_PFCP_SESSION; +} + /** * ice_get_sw_prof_type - determine switch profile type * @hw: pointer to the HW structure @@ -738,6 +744,9 @@ static enum ice_prof_type ice_get_sw_prof_type(struct ice_hw *hw, if (ice_is_gtp_u_profile(prof_idx)) return ICE_PROF_TUN_GTPU; + if (ice_is_pfcp_profile(prof_idx)) + return ICE_PROF_TUN_PFCP; + for (i = 0; i < hw->blk[ICE_BLK_SW].es.fvw; i++) { /* UDP tunnel will have UDP_OF protocol ID and VNI offset */ if (fv->ew[i].prot_id == (u8)ICE_PROT_UDP_OF && diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h index d427a79d001a..817beca591e0 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_type.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -93,6 +93,7 @@ enum ice_tunnel_type { TNL_GRETAP, TNL_GTPC, TNL_GTPU, + TNL_PFCP, __TNL_TYPE_CNT, TNL_LAST = 0xFF, TNL_ALL = 0xFF, @@ -358,7 +359,8 @@ enum ice_prof_type { ICE_PROF_TUN_GRE = 0x4, ICE_PROF_TUN_GTPU = 0x8, ICE_PROF_TUN_GTPC = 0x10, - ICE_PROF_TUN_ALL = 0x1E, + ICE_PROF_TUN_PFCP = 0x20, + ICE_PROF_TUN_ALL = 0x3E, ICE_PROF_ALL = 0xFF, }; diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index f6f27361c3cf..755a9c55267c 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -43,6 +43,7 @@ enum ice_protocol_type { ICE_NVGRE, ICE_GTP, ICE_GTP_NO_PAY, + ICE_PFCP, ICE_PPPOE, ICE_L2TPV3, ICE_VLAN_EX, @@ -61,6 +62,7 @@ enum ice_sw_tunnel_type { ICE_SW_TUN_NVGRE, ICE_SW_TUN_GTPU, ICE_SW_TUN_GTPC, + ICE_SW_TUN_PFCP, ICE_ALL_TUNNELS /* All tunnel types including NVGRE */ }; @@ -202,6 +204,15 @@ struct ice_udp_gtp_hdr { u8 rsvrd; }; +struct ice_pfcp_hdr { + u8 flags; + u8 msg_type; + __be16 length; + __be64 seid; + __be32 seq; + u8 spare; +} __packed __aligned(__alignof__(u16)); + struct ice_pppoe_hdr { u8 rsrvd_ver_type; u8 rsrvd_code; @@ -418,6 +429,7 @@ union ice_prot_hdr { struct ice_udp_tnl_hdr tnl_hdr; struct ice_nvgre_hdr nvgre_hdr; struct ice_udp_gtp_hdr gtp_hdr; + struct ice_pfcp_hdr pfcp_hdr; struct ice_pppoe_hdr pppoe_hdr; struct ice_l2tpv3_sess_hdr l2tpv3_sess_hdr; struct ice_hw_metadata metadata; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index f1f9d6b7b5c1..606c2419182a 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -42,6 +42,7 @@ enum { ICE_PKT_KMALLOC = BIT(9), ICE_PKT_PPPOE = BIT(10), ICE_PKT_L2TPV3 = BIT(11), + ICE_PKT_PFCP = BIT(12), }; struct ice_dummy_pkt_offsets { @@ -1110,6 +1111,77 @@ ICE_DECLARE_PKT_TEMPLATE(ipv6_gtp) = { 0x00, 0x00, }; +ICE_DECLARE_PKT_OFFSETS(pfcp_session_ipv4) = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_UDP_ILOS, 34 }, + { ICE_PFCP, 42 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +ICE_DECLARE_PKT_TEMPLATE(pfcp_session_ipv4) = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x2c, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x22, 0x65, /* ICE_UDP_ILOS 34 */ + 0x00, 0x18, 0x00, 0x00, + + 0x21, 0x01, 0x00, 0x0c, /* ICE_PFCP 42 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +ICE_DECLARE_PKT_OFFSETS(pfcp_session_ipv6) = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV6_OFOS, 14 }, + { ICE_UDP_ILOS, 54 }, + { ICE_PFCP, 62 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +ICE_DECLARE_PKT_TEMPLATE(pfcp_session_ipv6) = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x86, 0xdd, /* ICE_ETYPE_OL 12 */ + + 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 14 */ + 0x00, 0x10, 0x11, 0x00, /* Next header UDP */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x22, 0x65, /* ICE_UDP_ILOS 54 */ + 0x00, 0x18, 0x00, 0x00, + + 0x21, 0x01, 0x00, 0x0c, /* ICE_PFCP 62 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + ICE_DECLARE_PKT_OFFSETS(pppoe_ipv4_tcp) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, @@ -1343,6 +1415,8 @@ static const struct ice_dummy_pkt_profile ice_dummy_pkt_profiles[] = { ICE_PKT_PROFILE(ipv4_gtpu_ipv4_tcp, ICE_PKT_TUN_GTPU), ICE_PKT_PROFILE(ipv6_gtp, ICE_PKT_TUN_GTPC | ICE_PKT_OUTER_IPV6), ICE_PKT_PROFILE(ipv4_gtpu_ipv4, ICE_PKT_TUN_GTPC), + ICE_PKT_PROFILE(pfcp_session_ipv6, ICE_PKT_PFCP | ICE_PKT_OUTER_IPV6), + ICE_PKT_PROFILE(pfcp_session_ipv4, ICE_PKT_PFCP), ICE_PKT_PROFILE(pppoe_ipv6_udp, ICE_PKT_PPPOE | ICE_PKT_OUTER_IPV6 | ICE_PKT_INNER_UDP), ICE_PKT_PROFILE(pppoe_ipv6_tcp, ICE_PKT_PPPOE | ICE_PKT_OUTER_IPV6), @@ -4532,6 +4606,7 @@ static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = { ICE_PROTOCOL_ENTRY(ICE_NVGRE, 0, 2, 4, 6), ICE_PROTOCOL_ENTRY(ICE_GTP, 8, 10, 12, 14, 16, 18, 20, 22), ICE_PROTOCOL_ENTRY(ICE_GTP_NO_PAY, 8, 10, 12, 14), + ICE_PROTOCOL_ENTRY(ICE_PFCP, 8, 10, 12, 14, 16, 18, 20, 22), ICE_PROTOCOL_ENTRY(ICE_PPPOE, 0, 2, 4, 6), ICE_PROTOCOL_ENTRY(ICE_L2TPV3, 0, 2, 4, 6, 8, 10), ICE_PROTOCOL_ENTRY(ICE_VLAN_EX, 2, 0), @@ -4565,6 +4640,7 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { { ICE_NVGRE, ICE_GRE_OF_HW }, { ICE_GTP, ICE_UDP_OF_HW }, { ICE_GTP_NO_PAY, ICE_UDP_ILOS_HW }, + { ICE_PFCP, ICE_UDP_ILOS_HW }, { ICE_PPPOE, ICE_PPPOE_HW }, { ICE_L2TPV3, ICE_L2TPV3_HW }, { ICE_VLAN_EX, ICE_VLAN_OF_HW }, @@ -5272,6 +5348,9 @@ ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo, case ICE_SW_TUN_GTPC: prof_type = ICE_PROF_TUN_GTPC; break; + case ICE_SW_TUN_PFCP: + prof_type = ICE_PROF_TUN_PFCP; + break; case ICE_SW_TUN_AND_NON_TUN: default: prof_type = ICE_PROF_ALL; @@ -5556,6 +5635,9 @@ ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, case ICE_SW_TUN_VXLAN: match |= ICE_PKT_TUN_UDP; break; + case ICE_SW_TUN_PFCP: + match |= ICE_PKT_PFCP; + break; default: break; } @@ -5696,6 +5778,9 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, case ICE_GTP: len = sizeof(struct ice_udp_gtp_hdr); break; + case ICE_PFCP: + len = sizeof(struct ice_pfcp_hdr); + break; case ICE_PPPOE: len = sizeof(struct ice_pppoe_hdr); break; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 89e6b18975ef..322ef81d3913 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -22,6 +22,8 @@ #define ICE_PROFID_IPV6_GTPC_NO_TEID 45 #define ICE_PROFID_IPV6_GTPU_TEID 46 #define ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER 70 +#define ICE_PROFID_IPV4_PFCP_NODE 79 +#define ICE_PROFID_IPV6_PFCP_SESSION 82 #define ICE_SW_RULE_VSI_LIST_SIZE(s, n) struct_size((s), vsi, (n)) #define ICE_SW_RULE_RX_TX_HDR_SIZE(s, l) struct_size((s), hdr_data, (l)) diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 80797db9f2b9..2f2fce285ecd 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -38,6 +38,9 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, if (flags & ICE_TC_FLWR_FIELD_GTP_OPTS) lkups_cnt++; + if (flags & ICE_TC_FLWR_FIELD_PFCP_OPTS) + lkups_cnt++; + if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 | ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 | @@ -138,6 +141,8 @@ ice_proto_type_from_tunnel(enum ice_tunnel_type type) return ICE_GTP; case TNL_GTPC: return ICE_GTP_NO_PAY; + case TNL_PFCP: + return ICE_PFCP; default: return 0; } @@ -157,6 +162,8 @@ ice_sw_type_from_tunnel(enum ice_tunnel_type type) return ICE_SW_TUN_GTPU; case TNL_GTPC: return ICE_SW_TUN_GTPC; + case TNL_PFCP: + return ICE_SW_TUN_PFCP; default: return ICE_NON_TUN; } @@ -236,6 +243,22 @@ ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr, i++; } + if (flags & ICE_TC_FLWR_FIELD_PFCP_OPTS) { + struct ice_pfcp_hdr *hdr_h, *hdr_m; + + hdr_h = &list[i].h_u.pfcp_hdr; + hdr_m = &list[i].m_u.pfcp_hdr; + list[i].type = ICE_PFCP; + + hdr_h->flags = fltr->pfcp_meta_keys.type; + hdr_m->flags = fltr->pfcp_meta_masks.type & 0x01; + + hdr_h->seid = fltr->pfcp_meta_keys.seid; + hdr_m->seid = fltr->pfcp_meta_masks.seid; + + i++; + } + if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | ICE_TC_FLWR_FIELD_ENC_DEST_IPV4)) { list[i].type = ice_proto_type_from_ipv4(false); @@ -366,8 +389,11 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, if (tc_fltr->tunnel_type != TNL_LAST) { i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list, i); - headers = &tc_fltr->inner_headers; - inner = true; + /* PFCP is considered non-tunneled - don't swap headers. */ + if (tc_fltr->tunnel_type != TNL_PFCP) { + headers = &tc_fltr->inner_headers; + inner = true; + } } if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) { @@ -621,6 +647,8 @@ static int ice_tc_tun_get_type(struct net_device *tunnel_dev) */ if (netif_is_gtp(tunnel_dev)) return TNL_GTPU; + if (netif_is_pfcp(tunnel_dev)) + return TNL_PFCP; return TNL_LAST; } @@ -1415,6 +1443,20 @@ ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule, fltr->flags |= ICE_TC_FLWR_FIELD_GTP_OPTS; } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS) && + fltr->tunnel_type == TNL_PFCP) { + struct flow_match_enc_opts match; + + flow_rule_match_enc_opts(rule, &match); + + memcpy(&fltr->pfcp_meta_keys, match.key->data, + sizeof(struct pfcp_metadata)); + memcpy(&fltr->pfcp_meta_masks, match.mask->data, + sizeof(struct pfcp_metadata)); + + fltr->flags |= ICE_TC_FLWR_FIELD_PFCP_OPTS; + } + return 0; } @@ -1473,10 +1515,14 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, return err; } - /* header pointers should point to the inner headers, outer - * header were already set by ice_parse_tunnel_attr - */ - headers = &fltr->inner_headers; + /* PFCP is considered non-tunneled - don't swap headers. */ + if (fltr->tunnel_type != TNL_PFCP) { + /* Header pointers should point to the inner headers, + * outer header were already set by + * ice_parse_tunnel_attr(). + */ + headers = &fltr->inner_headers; + } } else if (dissector->used_keys & (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index 5d188ad7517a..d84f153517ec 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -4,6 +4,9 @@ #ifndef _ICE_TC_LIB_H_ #define _ICE_TC_LIB_H_ +#include +#include + #define ICE_TC_FLWR_FIELD_DST_MAC BIT(0) #define ICE_TC_FLWR_FIELD_SRC_MAC BIT(1) #define ICE_TC_FLWR_FIELD_VLAN BIT(2) @@ -34,6 +37,7 @@ #define ICE_TC_FLWR_FIELD_VLAN_PRIO BIT(27) #define ICE_TC_FLWR_FIELD_CVLAN_PRIO BIT(28) #define ICE_TC_FLWR_FIELD_VLAN_TPID BIT(29) +#define ICE_TC_FLWR_FIELD_PFCP_OPTS BIT(30) #define ICE_TC_FLOWER_MASK_32 0xFFFFFFFF @@ -161,6 +165,8 @@ struct ice_tc_flower_fltr { __be32 tenant_id; struct gtp_pdu_session_info gtp_pdu_info_keys; struct gtp_pdu_session_info gtp_pdu_info_masks; + struct pfcp_metadata pfcp_meta_keys; + struct pfcp_metadata pfcp_meta_masks; u32 flags; u8 tunnel_type; struct ice_tc_flower_action action; -- cgit v1.2.3 From 0e2bddf9e5f926ce32ed635012d0f8a0b54075d5 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Tue, 26 Mar 2024 00:20:37 +0100 Subject: ice: add ice_adapter for shared data across PFs on the same NIC There is a need for synchronization between ice PFs on the same physical adapter. Add a "struct ice_adapter" for holding data shared between PFs of the same multifunction PCI device. The struct is refcounted - each ice_pf holds a reference to it. Its first use will be for PTP. I expect it will be useful also to improve the ugliness that is ice_prot_id_tbl. Reviewed-by: Przemek Kitszel Signed-off-by: Michal Schmidt Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/Makefile | 3 +- drivers/net/ethernet/intel/ice/ice.h | 2 + drivers/net/ethernet/intel/ice/ice_adapter.c | 114 +++++++++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_adapter.h | 22 ++++++ drivers/net/ethernet/intel/ice/ice_main.c | 8 ++ 5 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.c create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.h (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index cddd82d4ca0f..4fa09c321440 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -36,7 +36,8 @@ ice-y := ice_main.o \ ice_repr.o \ ice_tc_lib.o \ ice_fwlog.o \ - ice_debugfs.o + ice_debugfs.o \ + ice_adapter.o ice-$(CONFIG_PCI_IOV) += \ ice_sriov.o \ ice_virtchnl.o \ diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index c4127d5f2be3..a7e88d797d4c 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -77,6 +77,7 @@ #include "ice_gnss.h" #include "ice_irq.h" #include "ice_dpll.h" +#include "ice_adapter.h" #define ICE_BAR0 0 #define ICE_REQ_DESC_MULTIPLE 32 @@ -537,6 +538,7 @@ struct ice_agg_node { struct ice_pf { struct pci_dev *pdev; + struct ice_adapter *adapter; struct devlink_region *nvm_region; struct devlink_region *sram_region; diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c new file mode 100644 index 000000000000..f00ab998e853 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright Red Hat + +#include +#include +#include +#include +#include +#include +#include "ice_adapter.h" + +static DEFINE_XARRAY(ice_adapters); + +/* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */ +#define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13) +#define INDEX_FIELD_BUS GENMASK(12, 5) +#define INDEX_FIELD_SLOT GENMASK(4, 0) + +static unsigned long ice_adapter_index(const struct pci_dev *pdev) +{ + unsigned int domain = pci_domain_nr(pdev->bus); + + WARN_ON(domain > FIELD_MAX(INDEX_FIELD_DOMAIN)); + + return FIELD_PREP(INDEX_FIELD_DOMAIN, domain) | + FIELD_PREP(INDEX_FIELD_BUS, pdev->bus->number) | + FIELD_PREP(INDEX_FIELD_SLOT, PCI_SLOT(pdev->devfn)); +} + +static struct ice_adapter *ice_adapter_new(void) +{ + struct ice_adapter *adapter; + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) + return NULL; + + refcount_set(&adapter->refcount, 1); + + return adapter; +} + +static void ice_adapter_free(struct ice_adapter *adapter) +{ + kfree(adapter); +} + +DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T)) + +/** + * ice_adapter_get - Get a shared ice_adapter structure. + * @pdev: Pointer to the pci_dev whose driver is getting the ice_adapter. + * + * Gets a pointer to a shared ice_adapter structure. Physical functions (PFs) + * of the same multi-function PCI device share one ice_adapter structure. + * The ice_adapter is reference-counted. The PF driver must use ice_adapter_put + * to release its reference. + * + * Context: Process, may sleep. + * Return: Pointer to ice_adapter on success. + * ERR_PTR() on error. -ENOMEM is the only possible error. + */ +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) +{ + struct ice_adapter *ret, __free(ice_adapter_free) *adapter = NULL; + unsigned long index = ice_adapter_index(pdev); + + adapter = ice_adapter_new(); + if (!adapter) + return ERR_PTR(-ENOMEM); + + xa_lock(&ice_adapters); + ret = __xa_cmpxchg(&ice_adapters, index, NULL, adapter, GFP_KERNEL); + if (xa_is_err(ret)) { + ret = ERR_PTR(xa_err(ret)); + goto unlock; + } + if (ret) { + refcount_inc(&ret->refcount); + goto unlock; + } + ret = no_free_ptr(adapter); +unlock: + xa_unlock(&ice_adapters); + return ret; +} + +/** + * ice_adapter_put - Release a reference to the shared ice_adapter structure. + * @pdev: Pointer to the pci_dev whose driver is releasing the ice_adapter. + * + * Releases the reference to ice_adapter previously obtained with + * ice_adapter_get. + * + * Context: Any. + */ +void ice_adapter_put(const struct pci_dev *pdev) +{ + unsigned long index = ice_adapter_index(pdev); + struct ice_adapter *adapter; + + xa_lock(&ice_adapters); + adapter = xa_load(&ice_adapters, index); + if (WARN_ON(!adapter)) + goto unlock; + + if (!refcount_dec_and_test(&adapter->refcount)) + goto unlock; + + WARN_ON(__xa_erase(&ice_adapters, index) != adapter); + ice_adapter_free(adapter); +unlock: + xa_unlock(&ice_adapters); +} diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h new file mode 100644 index 000000000000..cb5a02eb24c1 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-FileCopyrightText: Copyright Red Hat */ + +#ifndef _ICE_ADAPTER_H_ +#define _ICE_ADAPTER_H_ + +#include + +struct pci_dev; + +/** + * struct ice_adapter - PCI adapter resources shared across PFs + * @refcount: Reference count. struct ice_pf objects hold the references. + */ +struct ice_adapter { + refcount_t refcount; +}; + +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev); +void ice_adapter_put(const struct pci_dev *pdev); + +#endif /* _ICE_ADAPTER_H */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 618570f23580..d85736f700dd 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5093,6 +5093,7 @@ static int ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) { struct device *dev = &pdev->dev; + struct ice_adapter *adapter; struct ice_pf *pf; struct ice_hw *hw; int err; @@ -5145,7 +5146,12 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) pci_set_master(pdev); + adapter = ice_adapter_get(pdev); + if (IS_ERR(adapter)) + return PTR_ERR(adapter); + pf->pdev = pdev; + pf->adapter = adapter; pci_set_drvdata(pdev, pf); set_bit(ICE_DOWN, pf->state); /* Disable service task until DOWN bit is cleared */ @@ -5196,6 +5202,7 @@ err_init_devlink: err_load: ice_deinit(pf); err_init: + ice_adapter_put(pdev); pci_disable_device(pdev); return err; } @@ -5302,6 +5309,7 @@ static void ice_remove(struct pci_dev *pdev) ice_setup_mc_magic_wake(pf); ice_set_wake(pf); + ice_adapter_put(pdev); pci_disable_device(pdev); } -- cgit v1.2.3 From d29a8134c78232213fb88f20d7ae865ec364e367 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Tue, 26 Mar 2024 00:20:38 +0100 Subject: ice: avoid the PTP hardware semaphore in gettimex64 path The PTP hardware semaphore (PFTSYN_SEM) is used to synchronize operations that program the PTP timers. The operations involve issuing commands to the sideband queue. The E810 does not have a hardware sideband queue, so the admin queue is used. The admin queue is slow. I have observed delays in hundreds of milliseconds waiting for ice_sq_done. When phc2sys reads the time from the ice PTP clock and PFTSYN_SEM is held by a task performing one of the slow operations, ice_ptp_lock can easily time out. phc2sys gets -EBUSY and the kernel prints: ice 0000:XX:YY.0: PTP failed to get time These messages appear once every few seconds, causing log spam. The E810 datasheet recommends an algorithm for reading the upper 64 bits of the GLTSYN_TIME register. It matches what's implemented in ice_ptp_read_src_clk_reg. It is robust against wrap-around, but not necessarily against the concurrent setting of the register (with GLTSYN_CMD_{INIT,ADJ}_TIME commands). Perhaps that's why ice_ptp_gettimex64 also takes PFTSYN_SEM. The race with time setters can be prevented without relying on the PTP hardware semaphore. Using the "ice_adapter" from the previous patch, we can have a common spinlock for the PFs that share the clock hardware. It will protect the reading and writing to the GLTSYN_TIME register. The writing is performed indirectly, by the hardware, as a result of the driver writing GLTSYN_CMD_SYNC in ice_ptp_exec_tmr_cmd. I wasn't sure if the ice_flush there is enough to make sure GLTSYN_TIME has been updated, but it works well in my testing. My test code can be seen here: https://gitlab.com/mschmidt2/linux/-/commits/ice-ptp-host-side-lock-10 It consists of: - kernel threads reading the time in a busy loop and looking at the deltas between consecutive values, reporting new maxima. - a shell script that sets the time repeatedly; - a bpftrace probe to produce a histogram of the measured deltas. Without the spinlock ptp_gltsyn_time_lock, it is easy to see tearing. Deltas in the [2G, 4G) range appear in the histograms. With the spinlock added, there is no tearing and the biggest delta I saw was in the range [1M, 2M), that is under 2 ms. Reviewed-by: Jacob Keller Reviewed-by: Przemek Kitszel Signed-off-by: Michal Schmidt Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_adapter.c | 2 ++ drivers/net/ethernet/intel/ice/ice_adapter.h | 6 ++++++ drivers/net/ethernet/intel/ice/ice_ptp.c | 8 +------- drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 3 +++ 4 files changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index f00ab998e853..52d15ef7f4b1 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "ice_adapter.h" @@ -35,6 +36,7 @@ static struct ice_adapter *ice_adapter_new(void) if (!adapter) return NULL; + spin_lock_init(&adapter->ptp_gltsyn_time_lock); refcount_set(&adapter->refcount, 1); return adapter; diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h index cb5a02eb24c1..9d11014ec02f 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.h +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -4,15 +4,21 @@ #ifndef _ICE_ADAPTER_H_ #define _ICE_ADAPTER_H_ +#include #include struct pci_dev; /** * struct ice_adapter - PCI adapter resources shared across PFs + * @ptp_gltsyn_time_lock: Spinlock protecting access to the GLTSYN_TIME + * register of the PTP clock. * @refcount: Reference count. struct ice_pf objects hold the references. */ struct ice_adapter { + /* For access to the GLTSYN_TIME register */ + spinlock_t ptp_gltsyn_time_lock; + refcount_t refcount; }; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index c11eba07283c..0875f37add78 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -374,6 +374,7 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts) u8 tmr_idx; tmr_idx = ice_get_ptp_src_clock_index(hw); + guard(spinlock)(&pf->adapter->ptp_gltsyn_time_lock); /* Read the system timestamp pre PHC read */ ptp_read_system_prets(sts); @@ -1925,15 +1926,8 @@ ice_ptp_gettimex64(struct ptp_clock_info *info, struct timespec64 *ts, struct ptp_system_timestamp *sts) { struct ice_pf *pf = ptp_info_to_pf(info); - struct ice_hw *hw = &pf->hw; - - if (!ice_ptp_lock(hw)) { - dev_err(ice_pf_to_dev(pf), "PTP failed to get time\n"); - return -EBUSY; - } ice_ptp_read_time(pf, ts, sts); - ice_ptp_unlock(hw); return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 187ce9b54e1a..2b9423a173bb 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -274,6 +274,9 @@ void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) */ static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw) { + struct ice_pf *pf = container_of(hw, struct ice_pf, hw); + + guard(spinlock)(&pf->adapter->ptp_gltsyn_time_lock); wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD); ice_flush(hw); } -- cgit v1.2.3 From 22118810fc7cc98f3afb38919348060ab67ddc5b Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Tue, 26 Mar 2024 00:20:39 +0100 Subject: ice: fold ice_ptp_read_time into ice_ptp_gettimex64 This is a cleanup. It is unnecessary to have this function just to call another function. Reviewed-by: Przemek Kitszel Signed-off-by: Michal Schmidt Reviewed-by: Sai Krishna Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Reviewed-by: Kalesh AP Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 0875f37add78..0f17fc1181d2 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1166,26 +1166,6 @@ static void ice_ptp_reset_cached_phctime(struct ice_pf *pf) ice_ptp_mark_tx_tracker_stale(&pf->ptp.port.tx); } -/** - * ice_ptp_read_time - Read the time from the device - * @pf: Board private structure - * @ts: timespec structure to hold the current time value - * @sts: Optional parameter for holding a pair of system timestamps from - * the system clock. Will be ignored if NULL is given. - * - * This function reads the source clock registers and stores them in a timespec. - * However, since the registers are 64 bits of nanoseconds, we must convert the - * result to a timespec before we can return. - */ -static void -ice_ptp_read_time(struct ice_pf *pf, struct timespec64 *ts, - struct ptp_system_timestamp *sts) -{ - u64 time_ns = ice_ptp_read_src_clk_reg(pf, sts); - - *ts = ns_to_timespec64(time_ns); -} - /** * ice_ptp_write_init - Set PHC time to provided value * @pf: Board private structure @@ -1926,9 +1906,10 @@ ice_ptp_gettimex64(struct ptp_clock_info *info, struct timespec64 *ts, struct ptp_system_timestamp *sts) { struct ice_pf *pf = ptp_info_to_pf(info); + u64 time_ns; - ice_ptp_read_time(pf, ts, sts); - + time_ns = ice_ptp_read_src_clk_reg(pf, sts); + *ts = ns_to_timespec64(time_ns); return 0; } -- cgit v1.2.3 From 95ad92d687e78c90e720174ffdf7a728add95b9e Mon Sep 17 00:00:00 2001 From: Steven Zou Date: Thu, 8 Feb 2024 11:18:37 +0800 Subject: ice: Add switch recipe reusing feature New E810 firmware supports the corresponding functionality, so the driver allows PFs to subscribe the same switch recipes. Then when the PF is done with a switch recipes, the PF can ask firmware to free that switch recipe. When users configure a rule to PFn into E810 switch component, if there is no existing recipe matching this rule's pattern, the driver will request firmware to allocate and return a new recipe resource for the rule by calling ice_add_sw_recipe() and ice_alloc_recipe(). If there is an existing recipe matching this rule's pattern with different key value, or this is a same second rule to PFm into switch component, the driver checks out this recipe by calling ice_find_recp(), the driver will tell firmware to share using this same recipe resource by calling ice_subscribable_recp_shared() and ice_subscribe_recipe(). When firmware detects that all subscribing PFs have freed the switch recipe, firmware will free the switch recipe so that it can be reused. This feature also fixes a problem where all switch recipes would eventually be exhausted because switch recipes could not be freed, as freeing a shared recipe could potentially break other PFs that were using it. Reviewed-by: Przemek Kitszel Reviewed-by: Andrii Staikov Reviewed-by: Simon Horman Signed-off-by: Steven Zou Tested-by: Mayank Sharma Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 2 + drivers/net/ethernet/intel/ice/ice_common.c | 2 + drivers/net/ethernet/intel/ice/ice_switch.c | 187 +++++++++++++++++++++--- drivers/net/ethernet/intel/ice/ice_switch.h | 1 + drivers/net/ethernet/intel/ice/ice_type.h | 2 + 5 files changed, 177 insertions(+), 17 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 1f3e7a6903e5..540c0bdca936 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -264,6 +264,8 @@ struct ice_aqc_set_port_params { #define ICE_AQC_RES_TYPE_FLAG_SHARED BIT(7) #define ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM BIT(12) #define ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX BIT(13) +#define ICE_AQC_RES_TYPE_FLAG_SUBSCRIBE_SHARED BIT(14) +#define ICE_AQC_RES_TYPE_FLAG_SUBSCRIBE_CTL BIT(15) #define ICE_AQC_RES_TYPE_FLAG_DEDICATED 0x00 diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index db4b2844e1f7..dc5a1ef3364d 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1142,6 +1142,8 @@ int ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_fltr_mgmt_struct; mutex_init(&hw->tnl_lock); + ice_init_chk_recipe_reuse_support(hw); + return 0; err_unroll_fltr_mgmt_struct: diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 606c2419182a..94d6670d0901 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -2148,6 +2148,18 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc, return status; } +/** + * ice_init_chk_recipe_reuse_support - check if recipe reuse is supported + * @hw: pointer to the hardware structure + */ +void ice_init_chk_recipe_reuse_support(struct ice_hw *hw) +{ + struct ice_nvm_info *nvm = &hw->flash.nvm; + + hw->recp_reuse = (nvm->major == 0x4 && nvm->minor >= 0x30) || + nvm->major > 0x4; +} + /** * ice_alloc_recipe - add recipe resource * @hw: pointer to the hardware structure @@ -2157,12 +2169,16 @@ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid) { DEFINE_RAW_FLEX(struct ice_aqc_alloc_free_res_elem, sw_buf, elem, 1); u16 buf_len = __struct_size(sw_buf); + u16 res_type; int status; sw_buf->num_elems = cpu_to_le16(1); - sw_buf->res_type = cpu_to_le16((ICE_AQC_RES_TYPE_RECIPE << - ICE_AQC_RES_TYPE_S) | - ICE_AQC_RES_TYPE_FLAG_SHARED); + res_type = FIELD_PREP(ICE_AQC_RES_TYPE_M, ICE_AQC_RES_TYPE_RECIPE); + if (hw->recp_reuse) + res_type |= ICE_AQC_RES_TYPE_FLAG_SUBSCRIBE_SHARED; + else + res_type |= ICE_AQC_RES_TYPE_FLAG_SHARED; + sw_buf->res_type = cpu_to_le16(res_type); status = ice_aq_alloc_free_res(hw, sw_buf, buf_len, ice_aqc_opc_alloc_res); if (!status) @@ -2171,6 +2187,70 @@ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid) return status; } +/** + * ice_free_recipe_res - free recipe resource + * @hw: pointer to the hardware structure + * @rid: recipe ID to free + * + * Return: 0 on success, and others on error + */ +static int ice_free_recipe_res(struct ice_hw *hw, u16 rid) +{ + return ice_free_hw_res(hw, ICE_AQC_RES_TYPE_RECIPE, 1, &rid); +} + +/** + * ice_release_recipe_res - disassociate and free recipe resource + * @hw: pointer to the hardware structure + * @recp: the recipe struct resource to unassociate and free + * + * Return: 0 on success, and others on error + */ +static int ice_release_recipe_res(struct ice_hw *hw, + struct ice_sw_recipe *recp) +{ + DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + struct ice_switch_info *sw = hw->switch_info; + u64 recp_assoc; + u32 rid, prof; + int status; + + for_each_set_bit(rid, recp->r_bitmap, ICE_MAX_NUM_RECIPES) { + for_each_set_bit(prof, recipe_to_profile[rid], + ICE_MAX_NUM_PROFILES) { + status = ice_aq_get_recipe_to_profile(hw, prof, + &recp_assoc, + NULL); + if (status) + return status; + + bitmap_from_arr64(r_bitmap, &recp_assoc, + ICE_MAX_NUM_RECIPES); + bitmap_andnot(r_bitmap, r_bitmap, recp->r_bitmap, + ICE_MAX_NUM_RECIPES); + bitmap_to_arr64(&recp_assoc, r_bitmap, + ICE_MAX_NUM_RECIPES); + ice_aq_map_recipe_to_profile(hw, prof, + recp_assoc, NULL); + + clear_bit(rid, profile_to_recipe[prof]); + clear_bit(prof, recipe_to_profile[rid]); + } + + status = ice_free_recipe_res(hw, rid); + if (status) + return status; + + sw->recp_list[rid].recp_created = false; + sw->recp_list[rid].adv_rule = false; + memset(&sw->recp_list[rid].lkup_exts, 0, + sizeof(sw->recp_list[rid].lkup_exts)); + clear_bit(rid, recp->r_bitmap); + } + + return 0; +} + /** * ice_get_recp_to_prof_map - updates recipe to profile mapping * @hw: pointer to hardware structure @@ -2220,6 +2300,7 @@ ice_collect_result_idx(struct ice_aqc_recipe_data_elem *buf, * @recps: struct that we need to populate * @rid: recipe ID that we are populating * @refresh_required: true if we should get recipe to profile mapping from FW + * @is_add: flag of adding recipe * * This function is used to populate all the necessary entries into our * bookkeeping so that we have a current list of all the recipes that are @@ -2227,7 +2308,7 @@ ice_collect_result_idx(struct ice_aqc_recipe_data_elem *buf, */ static int ice_get_recp_frm_fw(struct ice_hw *hw, struct ice_sw_recipe *recps, u8 rid, - bool *refresh_required) + bool *refresh_required, bool is_add) { DECLARE_BITMAP(result_bm, ICE_MAX_FV_WORDS); struct ice_aqc_recipe_data_elem *tmp; @@ -2344,8 +2425,12 @@ ice_get_recp_frm_fw(struct ice_hw *hw, struct ice_sw_recipe *recps, u8 rid, recps[idx].chain_idx = ICE_INVAL_CHAIN_IND; } - if (!is_root) + if (!is_root) { + if (hw->recp_reuse && is_add) + recps[idx].recp_created = true; + continue; + } /* Only do the following for root recipes entries */ memcpy(recps[idx].r_bitmap, root_bufs.recipe_bitmap, @@ -2369,7 +2454,8 @@ ice_get_recp_frm_fw(struct ice_hw *hw, struct ice_sw_recipe *recps, u8 rid, /* Copy result indexes */ bitmap_copy(recps[rid].res_idxs, result_bm, ICE_MAX_FV_WORDS); - recps[rid].recp_created = true; + if (is_add) + recps[rid].recp_created = true; err_unroll: kfree(tmp); @@ -4653,12 +4739,13 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { * @hw: pointer to the hardware structure * @lkup_exts: extension sequence to match * @rinfo: information regarding the rule e.g. priority and action info + * @is_add: flag of adding recipe * * Returns index of matching recipe, or ICE_MAX_NUM_RECIPES if not found. */ static u16 ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts, - const struct ice_adv_rule_info *rinfo) + const struct ice_adv_rule_info *rinfo, bool is_add) { bool refresh_required = true; struct ice_sw_recipe *recp; @@ -4672,11 +4759,12 @@ ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts, * entry update it in our SW bookkeeping and continue with the * matching. */ - if (!recp[i].recp_created) + if (hw->recp_reuse) { if (ice_get_recp_frm_fw(hw, hw->switch_info->recp_list, i, - &refresh_required)) + &refresh_required, is_add)) continue; + } /* Skip inverse action recipes */ if (recp[i].root_buf && recp[i].root_buf->content.act_ctrl & @@ -5360,6 +5448,49 @@ ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo, ice_get_sw_fv_bitmap(hw, prof_type, bm); } +/** + * ice_subscribe_recipe - subscribe to an existing recipe + * @hw: pointer to the hardware structure + * @rid: recipe ID to subscribe to + * + * Return: 0 on success, and others on error + */ +static int ice_subscribe_recipe(struct ice_hw *hw, u16 rid) +{ + DEFINE_RAW_FLEX(struct ice_aqc_alloc_free_res_elem, sw_buf, elem, 1); + u16 buf_len = __struct_size(sw_buf); + u16 res_type; + int status; + + /* Prepare buffer to allocate resource */ + sw_buf->num_elems = cpu_to_le16(1); + res_type = FIELD_PREP(ICE_AQC_RES_TYPE_M, ICE_AQC_RES_TYPE_RECIPE) | + ICE_AQC_RES_TYPE_FLAG_SUBSCRIBE_SHARED | + ICE_AQC_RES_TYPE_FLAG_SUBSCRIBE_CTL; + sw_buf->res_type = cpu_to_le16(res_type); + + sw_buf->elem[0].e.sw_resp = cpu_to_le16(rid); + + status = ice_aq_alloc_free_res(hw, sw_buf, buf_len, + ice_aqc_opc_alloc_res); + + return status; +} + +/** + * ice_subscribable_recp_shared - share an existing subscribable recipe + * @hw: pointer to the hardware structure + * @rid: recipe ID to subscribe to + */ +static void ice_subscribable_recp_shared(struct ice_hw *hw, u16 rid) +{ + struct ice_sw_recipe *recps = hw->switch_info->recp_list; + u16 sub_rid; + + for_each_set_bit(sub_rid, recps[rid].r_bitmap, ICE_MAX_NUM_RECIPES) + ice_subscribe_recipe(hw, sub_rid); +} + /** * ice_add_adv_recipe - Add an advanced recipe that is not part of the default * @hw: pointer to hardware structure @@ -5382,6 +5513,7 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, struct ice_sw_fv_list_entry *tmp; struct ice_sw_recipe *rm; int status = 0; + u16 rid_tmp; u8 i; if (!lkups_cnt) @@ -5459,10 +5591,14 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, } /* Look for a recipe which matches our requested fv / mask list */ - *rid = ice_find_recp(hw, lkup_exts, rinfo); - if (*rid < ICE_MAX_NUM_RECIPES) + *rid = ice_find_recp(hw, lkup_exts, rinfo, true); + if (*rid < ICE_MAX_NUM_RECIPES) { /* Success if found a recipe that match the existing criteria */ + if (hw->recp_reuse) + ice_subscribable_recp_shared(hw, *rid); + goto err_unroll; + } rm->tun_type = rinfo->tun_type; /* Recipe we need does not exist, add a recipe */ @@ -5481,14 +5617,14 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id, &recp_assoc, NULL); if (status) - goto err_unroll; + goto err_free_recipe; bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES); bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap, ICE_MAX_NUM_RECIPES); status = ice_acquire_change_lock(hw, ICE_RES_WRITE); if (status) - goto err_unroll; + goto err_free_recipe; bitmap_to_arr64(&recp_assoc, r_bitmap, ICE_MAX_NUM_RECIPES); status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id, @@ -5496,7 +5632,7 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, ice_release_change_lock(hw); if (status) - goto err_unroll; + goto err_free_recipe; /* Update profile to recipe bitmap array */ bitmap_copy(profile_to_recipe[fvit->profile_id], r_bitmap, @@ -5510,6 +5646,16 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, *rid = rm->root_rid; memcpy(&hw->switch_info->recp_list[*rid].lkup_exts, lkup_exts, sizeof(*lkup_exts)); + goto err_unroll; + +err_free_recipe: + if (hw->recp_reuse) { + for_each_set_bit(rid_tmp, rm->r_bitmap, ICE_MAX_NUM_RECIPES) { + if (!ice_free_recipe_res(hw, rid_tmp)) + clear_bit(rid_tmp, rm->r_bitmap); + } + } + err_unroll: list_for_each_entry_safe(r_entry, r_tmp, &rm->rg_list, l_entry) { list_del(&r_entry->l_entry); @@ -6529,7 +6675,7 @@ ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, return -EIO; } - rid = ice_find_recp(hw, &lkup_exts, rinfo); + rid = ice_find_recp(hw, &lkup_exts, rinfo, false); /* If did not find a recipe that match the existing criteria */ if (rid == ICE_MAX_NUM_RECIPES) return -EINVAL; @@ -6573,14 +6719,21 @@ ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, ice_aqc_opc_remove_sw_rules, NULL); if (!status || status == -ENOENT) { struct ice_switch_info *sw = hw->switch_info; + struct ice_sw_recipe *r_list = sw->recp_list; mutex_lock(rule_lock); list_del(&list_elem->list_entry); devm_kfree(ice_hw_to_dev(hw), list_elem->lkups); devm_kfree(ice_hw_to_dev(hw), list_elem); mutex_unlock(rule_lock); - if (list_empty(&sw->recp_list[rid].filt_rules)) - sw->recp_list[rid].adv_rule = false; + if (list_empty(&r_list[rid].filt_rules)) { + r_list[rid].adv_rule = false; + + /* All rules for this recipe are now removed */ + if (hw->recp_reuse) + ice_release_recipe_res(hw, + &r_list[rid]); + } } kfree(s_rule); } diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 322ef81d3913..ad98e98c812d 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -432,5 +432,6 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc, int ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc, struct ice_sq_cd *cd); +void ice_init_chk_recipe_reuse_support(struct ice_hw *hw); #endif /* _ICE_SWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 64b8e7ec0b63..08ec5efdafe6 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -848,6 +848,8 @@ struct ice_hw { u16 max_burst_size; /* driver sets this value */ + u8 recp_reuse:1; /* indicates whether FW supports recipe reuse */ + /* Tx Scheduler values */ u8 num_tx_sched_layers; u8 num_tx_sched_phys_layers; -- cgit v1.2.3 From e6893962ef0ee15501a860615da8798c877961cd Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 21 Mar 2024 09:58:17 -0700 Subject: ice: Remove newlines in NL_SET_ERR_MSG_MOD Fixes Coccinelle/coccicheck warnings reported by newline_in_nl_msg.cocci. Signed-off-by: Thorsten Blum Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_devlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index b516e42b41f0..3c3616f0f811 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -478,17 +478,17 @@ ice_devlink_reload_down(struct devlink *devlink, bool netns_change, case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: if (ice_is_eswitch_mode_switchdev(pf)) { NL_SET_ERR_MSG_MOD(extack, - "Go to legacy mode before doing reinit\n"); + "Go to legacy mode before doing reinit"); return -EOPNOTSUPP; } if (ice_is_adq_active(pf)) { NL_SET_ERR_MSG_MOD(extack, - "Turn off ADQ before doing reinit\n"); + "Turn off ADQ before doing reinit"); return -EOPNOTSUPP; } if (ice_has_vfs(pf)) { NL_SET_ERR_MSG_MOD(extack, - "Remove all VFs before doing reinit\n"); + "Remove all VFs before doing reinit"); return -EOPNOTSUPP; } ice_devlink_reinit_down(pf); -- cgit v1.2.3 From 0545cc86767e044b0c7bbcbbf59698a2ad78e0c5 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 25 Mar 2024 22:34:31 +0100 Subject: ice: move ice_devlink.[ch] to devlink folder Only moving whole files, fixing Makefile and bunch of includes. Some changes to ice_devlink file was done even in representor part (Tx topology), so keep it as final patch to not mess up with rebasing. After moving to devlink folder there is no need to have such long name for these files. Rename them to simple devlink. Reviewed-by: Aleksandr Loktionov Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/Makefile | 3 +- drivers/net/ethernet/intel/ice/devlink/devlink.c | 2022 ++++++++++++++++++++++ drivers/net/ethernet/intel/ice/devlink/devlink.h | 25 + drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 2 +- drivers/net/ethernet/intel/ice/ice_devlink.c | 2022 ---------------------- drivers/net/ethernet/intel/ice/ice_devlink.h | 25 - drivers/net/ethernet/intel/ice/ice_eswitch.c | 2 +- drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- drivers/net/ethernet/intel/ice/ice_repr.c | 2 +- 10 files changed, 2054 insertions(+), 2053 deletions(-) create mode 100644 drivers/net/ethernet/intel/ice/devlink/devlink.c create mode 100644 drivers/net/ethernet/intel/ice/devlink/devlink.h delete mode 100644 drivers/net/ethernet/intel/ice/ice_devlink.c delete mode 100644 drivers/net/ethernet/intel/ice/ice_devlink.h (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 4fa09c321440..736b1cae2033 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -5,6 +5,7 @@ # Makefile for the Intel(R) Ethernet Connection E800 Series Linux Driver # +subdir-ccflags-y += -I$(src) obj-$(CONFIG_ICE) += ice.o ice-y := ice_main.o \ @@ -28,7 +29,7 @@ ice-y := ice_main.o \ ice_flex_pipe.o \ ice_flow.o \ ice_idc.o \ - ice_devlink.o \ + devlink/devlink.o \ ice_ddp.o \ ice_fw_update.o \ ice_lag.o \ diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c new file mode 100644 index 000000000000..82983eab52e0 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -0,0 +1,2022 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Intel Corporation. */ + +#include + +#include "ice.h" +#include "ice_lib.h" +#include "devlink.h" +#include "ice_eswitch.h" +#include "ice_fw_update.h" +#include "ice_dcb_lib.h" + +static int ice_active_port_option = -1; + +/* context for devlink info version reporting */ +struct ice_info_ctx { + char buf[128]; + struct ice_orom_info pending_orom; + struct ice_nvm_info pending_nvm; + struct ice_netlist_info pending_netlist; + struct ice_hw_dev_caps dev_caps; +}; + +/* The following functions are used to format specific strings for various + * devlink info versions. The ctx parameter is used to provide the storage + * buffer, as well as any ancillary information calculated when the info + * request was made. + * + * If a version does not exist, for example when attempting to get the + * inactive version of flash when there is no pending update, the function + * should leave the buffer in the ctx structure empty. + */ + +static void ice_info_get_dsn(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + u8 dsn[8]; + + /* Copy the DSN into an array in Big Endian format */ + put_unaligned_be64(pci_get_dsn(pf->pdev), dsn); + + snprintf(ctx->buf, sizeof(ctx->buf), "%8phD", dsn); +} + +static void ice_info_pba(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_hw *hw = &pf->hw; + int status; + + status = ice_read_pba_string(hw, (u8 *)ctx->buf, sizeof(ctx->buf)); + if (status) + /* We failed to locate the PBA, so just skip this entry */ + dev_dbg(ice_pf_to_dev(pf), "Failed to read Product Board Assembly string, status %d\n", + status); +} + +static void ice_info_fw_mgmt(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_hw *hw = &pf->hw; + + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", + hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch); +} + +static void ice_info_fw_api(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_hw *hw = &pf->hw; + + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", hw->api_maj_ver, + hw->api_min_ver, hw->api_patch); +} + +static void ice_info_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_hw *hw = &pf->hw; + + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", hw->fw_build); +} + +static void ice_info_orom_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_orom_info *orom = &pf->hw.flash.orom; + + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", + orom->major, orom->build, orom->patch); +} + +static void +ice_info_pending_orom_ver(struct ice_pf __always_unused *pf, + struct ice_info_ctx *ctx) +{ + struct ice_orom_info *orom = &ctx->pending_orom; + + if (ctx->dev_caps.common_cap.nvm_update_pending_orom) + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", + orom->major, orom->build, orom->patch); +} + +static void ice_info_nvm_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_nvm_info *nvm = &pf->hw.flash.nvm; + + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor); +} + +static void +ice_info_pending_nvm_ver(struct ice_pf __always_unused *pf, + struct ice_info_ctx *ctx) +{ + struct ice_nvm_info *nvm = &ctx->pending_nvm; + + if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", + nvm->major, nvm->minor); +} + +static void ice_info_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_nvm_info *nvm = &pf->hw.flash.nvm; + + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack); +} + +static void +ice_info_pending_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_nvm_info *nvm = &ctx->pending_nvm; + + if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack); +} + +static void ice_info_ddp_pkg_name(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_hw *hw = &pf->hw; + + snprintf(ctx->buf, sizeof(ctx->buf), "%s", hw->active_pkg_name); +} + +static void +ice_info_ddp_pkg_version(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_pkg_ver *pkg = &pf->hw.active_pkg_ver; + + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u.%u", + pkg->major, pkg->minor, pkg->update, pkg->draft); +} + +static void +ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", pf->hw.active_track_id); +} + +static void ice_info_netlist_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_netlist_info *netlist = &pf->hw.flash.netlist; + + /* The netlist version fields are BCD formatted */ + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x", + netlist->major, netlist->minor, + netlist->type >> 16, netlist->type & 0xFFFF, + netlist->rev, netlist->cust_ver); +} + +static void ice_info_netlist_build(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + struct ice_netlist_info *netlist = &pf->hw.flash.netlist; + + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash); +} + +static void +ice_info_pending_netlist_ver(struct ice_pf __always_unused *pf, + struct ice_info_ctx *ctx) +{ + struct ice_netlist_info *netlist = &ctx->pending_netlist; + + /* The netlist version fields are BCD formatted */ + if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x", + netlist->major, netlist->minor, + netlist->type >> 16, netlist->type & 0xFFFF, + netlist->rev, netlist->cust_ver); +} + +static void +ice_info_pending_netlist_build(struct ice_pf __always_unused *pf, + struct ice_info_ctx *ctx) +{ + struct ice_netlist_info *netlist = &ctx->pending_netlist; + + if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash); +} + +static void ice_info_cgu_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + u32 id, cfg_ver, fw_ver; + + if (!ice_is_feature_supported(pf, ICE_F_CGU)) + return; + if (ice_aq_get_cgu_info(&pf->hw, &id, &cfg_ver, &fw_ver)) + return; + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", id, cfg_ver, fw_ver); +} + +static void ice_info_cgu_id(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + if (!ice_is_feature_supported(pf, ICE_F_CGU)) + return; + snprintf(ctx->buf, sizeof(ctx->buf), "%u", pf->hw.cgu_part_number); +} + +#define fixed(key, getter) { ICE_VERSION_FIXED, key, getter, NULL } +#define running(key, getter) { ICE_VERSION_RUNNING, key, getter, NULL } +#define stored(key, getter, fallback) { ICE_VERSION_STORED, key, getter, fallback } + +/* The combined() macro inserts both the running entry as well as a stored + * entry. The running entry will always report the version from the active + * handler. The stored entry will first try the pending handler, and fallback + * to the active handler if the pending function does not report a version. + * The pending handler should check the status of a pending update for the + * relevant flash component. It should only fill in the buffer in the case + * where a valid pending version is available. This ensures that the related + * stored and running versions remain in sync, and that stored versions are + * correctly reported as expected. + */ +#define combined(key, active, pending) \ + running(key, active), \ + stored(key, pending, active) + +enum ice_version_type { + ICE_VERSION_FIXED, + ICE_VERSION_RUNNING, + ICE_VERSION_STORED, +}; + +static const struct ice_devlink_version { + enum ice_version_type type; + const char *key; + void (*getter)(struct ice_pf *pf, struct ice_info_ctx *ctx); + void (*fallback)(struct ice_pf *pf, struct ice_info_ctx *ctx); +} ice_devlink_versions[] = { + fixed(DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, ice_info_pba), + running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt), + running("fw.mgmt.api", ice_info_fw_api), + running("fw.mgmt.build", ice_info_fw_build), + combined(DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, ice_info_orom_ver, ice_info_pending_orom_ver), + combined("fw.psid.api", ice_info_nvm_ver, ice_info_pending_nvm_ver), + combined(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack, ice_info_pending_eetrack), + running("fw.app.name", ice_info_ddp_pkg_name), + running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version), + running("fw.app.bundle_id", ice_info_ddp_pkg_bundle_id), + combined("fw.netlist", ice_info_netlist_ver, ice_info_pending_netlist_ver), + combined("fw.netlist.build", ice_info_netlist_build, ice_info_pending_netlist_build), + fixed("cgu.id", ice_info_cgu_id), + running("fw.cgu", ice_info_cgu_fw_build), +}; + +/** + * ice_devlink_info_get - .info_get devlink handler + * @devlink: devlink instance structure + * @req: the devlink info request + * @extack: extended netdev ack structure + * + * Callback for the devlink .info_get operation. Reports information about the + * device. + * + * Return: zero on success or an error code on failure. + */ +static int ice_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + struct ice_info_ctx *ctx; + size_t i; + int err; + + err = ice_wait_for_reset(pf, 10 * HZ); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Device is busy resetting"); + return err; + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + /* discover capabilities first */ + err = ice_discover_dev_caps(hw, &ctx->dev_caps); + if (err) { + dev_dbg(dev, "Failed to discover device capabilities, status %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + NL_SET_ERR_MSG_MOD(extack, "Unable to discover device capabilities"); + goto out_free_ctx; + } + + if (ctx->dev_caps.common_cap.nvm_update_pending_orom) { + err = ice_get_inactive_orom_ver(hw, &ctx->pending_orom); + if (err) { + dev_dbg(dev, "Unable to read inactive Option ROM version data, status %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + + /* disable display of pending Option ROM */ + ctx->dev_caps.common_cap.nvm_update_pending_orom = false; + } + } + + if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) { + err = ice_get_inactive_nvm_ver(hw, &ctx->pending_nvm); + if (err) { + dev_dbg(dev, "Unable to read inactive NVM version data, status %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + + /* disable display of pending Option ROM */ + ctx->dev_caps.common_cap.nvm_update_pending_nvm = false; + } + } + + if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) { + err = ice_get_inactive_netlist_ver(hw, &ctx->pending_netlist); + if (err) { + dev_dbg(dev, "Unable to read inactive Netlist version data, status %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + + /* disable display of pending Option ROM */ + ctx->dev_caps.common_cap.nvm_update_pending_netlist = false; + } + } + + ice_info_get_dsn(pf, ctx); + + err = devlink_info_serial_number_put(req, ctx->buf); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set serial number"); + goto out_free_ctx; + } + + for (i = 0; i < ARRAY_SIZE(ice_devlink_versions); i++) { + enum ice_version_type type = ice_devlink_versions[i].type; + const char *key = ice_devlink_versions[i].key; + + memset(ctx->buf, 0, sizeof(ctx->buf)); + + ice_devlink_versions[i].getter(pf, ctx); + + /* If the default getter doesn't report a version, use the + * fallback function. This is primarily useful in the case of + * "stored" versions that want to report the same value as the + * running version in the normal case of no pending update. + */ + if (ctx->buf[0] == '\0' && ice_devlink_versions[i].fallback) + ice_devlink_versions[i].fallback(pf, ctx); + + /* Do not report missing versions */ + if (ctx->buf[0] == '\0') + continue; + + switch (type) { + case ICE_VERSION_FIXED: + err = devlink_info_version_fixed_put(req, key, ctx->buf); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set fixed version"); + goto out_free_ctx; + } + break; + case ICE_VERSION_RUNNING: + err = devlink_info_version_running_put(req, key, ctx->buf); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set running version"); + goto out_free_ctx; + } + break; + case ICE_VERSION_STORED: + err = devlink_info_version_stored_put(req, key, ctx->buf); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set stored version"); + goto out_free_ctx; + } + break; + } + } + +out_free_ctx: + kfree(ctx); + return err; +} + +/** + * ice_devlink_reload_empr_start - Start EMP reset to activate new firmware + * @pf: pointer to the pf instance + * @extack: netlink extended ACK structure + * + * Allow user to activate new Embedded Management Processor firmware by + * issuing device specific EMP reset. Called in response to + * a DEVLINK_CMD_RELOAD with the DEVLINK_RELOAD_ACTION_FW_ACTIVATE. + * + * Note that teardown and rebuild of the driver state happens automatically as + * part of an interrupt and watchdog task. This is because all physical + * functions on the device must be able to reset when an EMP reset occurs from + * any source. + */ +static int +ice_devlink_reload_empr_start(struct ice_pf *pf, + struct netlink_ext_ack *extack) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + u8 pending; + int err; + + err = ice_get_pending_updates(pf, &pending, extack); + if (err) + return err; + + /* pending is a bitmask of which flash banks have a pending update, + * including the main NVM bank, the Option ROM bank, and the netlist + * bank. If any of these bits are set, then there is a pending update + * waiting to be activated. + */ + if (!pending) { + NL_SET_ERR_MSG_MOD(extack, "No pending firmware update"); + return -ECANCELED; + } + + if (pf->fw_emp_reset_disabled) { + NL_SET_ERR_MSG_MOD(extack, "EMP reset is not available. To activate firmware, a reboot or power cycle is needed"); + return -ECANCELED; + } + + dev_dbg(dev, "Issuing device EMP reset to activate firmware\n"); + + err = ice_aq_nvm_update_empr(hw); + if (err) { + dev_err(dev, "Failed to trigger EMP device reset to reload firmware, err %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + NL_SET_ERR_MSG_MOD(extack, "Failed to trigger EMP device reset to reload firmware"); + return err; + } + + return 0; +} + +/** + * ice_devlink_reinit_down - unload given PF + * @pf: pointer to the PF struct + */ +static void ice_devlink_reinit_down(struct ice_pf *pf) +{ + /* No need to take devl_lock, it's already taken by devlink API */ + ice_unload(pf); + rtnl_lock(); + ice_vsi_decfg(ice_get_main_vsi(pf)); + rtnl_unlock(); + ice_deinit_dev(pf); +} + +/** + * ice_devlink_reload_down - prepare for reload + * @devlink: pointer to the devlink instance to reload + * @netns_change: if true, the network namespace is changing + * @action: the action to perform + * @limit: limits on what reload should do, such as not resetting + * @extack: netlink extended ACK structure + */ +static int +ice_devlink_reload_down(struct devlink *devlink, bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + if (ice_is_eswitch_mode_switchdev(pf)) { + NL_SET_ERR_MSG_MOD(extack, + "Go to legacy mode before doing reinit"); + return -EOPNOTSUPP; + } + if (ice_is_adq_active(pf)) { + NL_SET_ERR_MSG_MOD(extack, + "Turn off ADQ before doing reinit"); + return -EOPNOTSUPP; + } + if (ice_has_vfs(pf)) { + NL_SET_ERR_MSG_MOD(extack, + "Remove all VFs before doing reinit"); + return -EOPNOTSUPP; + } + ice_devlink_reinit_down(pf); + return 0; + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: + return ice_devlink_reload_empr_start(pf, extack); + default: + WARN_ON(1); + return -EOPNOTSUPP; + } +} + +/** + * ice_devlink_reload_empr_finish - Wait for EMP reset to finish + * @pf: pointer to the pf instance + * @extack: netlink extended ACK structure + * + * Wait for driver to finish rebuilding after EMP reset is completed. This + * includes time to wait for both the actual device reset as well as the time + * for the driver's rebuild to complete. + */ +static int +ice_devlink_reload_empr_finish(struct ice_pf *pf, + struct netlink_ext_ack *extack) +{ + int err; + + err = ice_wait_for_reset(pf, 60 * HZ); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Device still resetting after 1 minute"); + return err; + } + + return 0; +} + +/** + * ice_devlink_port_opt_speed_str - convert speed to a string + * @speed: speed value + */ +static const char *ice_devlink_port_opt_speed_str(u8 speed) +{ + switch (speed & ICE_AQC_PORT_OPT_MAX_LANE_M) { + case ICE_AQC_PORT_OPT_MAX_LANE_100M: + return "0.1"; + case ICE_AQC_PORT_OPT_MAX_LANE_1G: + return "1"; + case ICE_AQC_PORT_OPT_MAX_LANE_2500M: + return "2.5"; + case ICE_AQC_PORT_OPT_MAX_LANE_5G: + return "5"; + case ICE_AQC_PORT_OPT_MAX_LANE_10G: + return "10"; + case ICE_AQC_PORT_OPT_MAX_LANE_25G: + return "25"; + case ICE_AQC_PORT_OPT_MAX_LANE_50G: + return "50"; + case ICE_AQC_PORT_OPT_MAX_LANE_100G: + return "100"; + } + + return "-"; +} + +#define ICE_PORT_OPT_DESC_LEN 50 +/** + * ice_devlink_port_options_print - Print available port split options + * @pf: the PF to print split port options + * + * Prints a table with available port split options and max port speeds + */ +static void ice_devlink_port_options_print(struct ice_pf *pf) +{ + u8 i, j, options_count, cnt, speed, pending_idx, active_idx; + struct ice_aqc_get_port_options_elem *options, *opt; + struct device *dev = ice_pf_to_dev(pf); + bool active_valid, pending_valid; + char desc[ICE_PORT_OPT_DESC_LEN]; + const char *str; + int status; + + options = kcalloc(ICE_AQC_PORT_OPT_MAX * ICE_MAX_PORT_PER_PCI_DEV, + sizeof(*options), GFP_KERNEL); + if (!options) + return; + + for (i = 0; i < ICE_MAX_PORT_PER_PCI_DEV; i++) { + opt = options + i * ICE_AQC_PORT_OPT_MAX; + options_count = ICE_AQC_PORT_OPT_MAX; + active_valid = 0; + + status = ice_aq_get_port_options(&pf->hw, opt, &options_count, + i, true, &active_idx, + &active_valid, &pending_idx, + &pending_valid); + if (status) { + dev_dbg(dev, "Couldn't read port option for port %d, err %d\n", + i, status); + goto err; + } + } + + dev_dbg(dev, "Available port split options and max port speeds (Gbps):\n"); + dev_dbg(dev, "Status Split Quad 0 Quad 1\n"); + dev_dbg(dev, " count L0 L1 L2 L3 L4 L5 L6 L7\n"); + + for (i = 0; i < options_count; i++) { + cnt = 0; + + if (i == ice_active_port_option) + str = "Active"; + else if ((i == pending_idx) && pending_valid) + str = "Pending"; + else + str = ""; + + cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, + "%-8s", str); + + cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, + "%-6u", options[i].pmd); + + for (j = 0; j < ICE_MAX_PORT_PER_PCI_DEV; ++j) { + speed = options[i + j * ICE_AQC_PORT_OPT_MAX].max_lane_speed; + str = ice_devlink_port_opt_speed_str(speed); + cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, + "%3s ", str); + } + + dev_dbg(dev, "%s\n", desc); + } + +err: + kfree(options); +} + +/** + * ice_devlink_aq_set_port_option - Send set port option admin queue command + * @pf: the PF to print split port options + * @option_idx: selected port option + * @extack: extended netdev ack structure + * + * Sends set port option admin queue command with selected port option and + * calls NVM write activate. + */ +static int +ice_devlink_aq_set_port_option(struct ice_pf *pf, u8 option_idx, + struct netlink_ext_ack *extack) +{ + struct device *dev = ice_pf_to_dev(pf); + int status; + + status = ice_aq_set_port_option(&pf->hw, 0, true, option_idx); + if (status) { + dev_dbg(dev, "ice_aq_set_port_option, err %d aq_err %d\n", + status, pf->hw.adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Port split request failed"); + return -EIO; + } + + status = ice_acquire_nvm(&pf->hw, ICE_RES_WRITE); + if (status) { + dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", + status, pf->hw.adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); + return -EIO; + } + + status = ice_nvm_write_activate(&pf->hw, ICE_AQC_NVM_ACTIV_REQ_EMPR, NULL); + if (status) { + dev_dbg(dev, "ice_nvm_write_activate failed, err %d aq_err %d\n", + status, pf->hw.adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Port split request failed to save data"); + ice_release_nvm(&pf->hw); + return -EIO; + } + + ice_release_nvm(&pf->hw); + + NL_SET_ERR_MSG_MOD(extack, "Reboot required to finish port split"); + return 0; +} + +/** + * ice_devlink_port_split - .port_split devlink handler + * @devlink: devlink instance structure + * @port: devlink port structure + * @count: number of ports to split to + * @extack: extended netdev ack structure + * + * Callback for the devlink .port_split operation. + * + * Unfortunately, the devlink expression of available options is limited + * to just a number, so search for an FW port option which supports + * the specified number. As there could be multiple FW port options with + * the same port split count, allow switching between them. When the same + * port split count request is issued again, switch to the next FW port + * option with the same port split count. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_split(struct devlink *devlink, struct devlink_port *port, + unsigned int count, struct netlink_ext_ack *extack) +{ + struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX]; + u8 i, j, active_idx, pending_idx, new_option; + struct ice_pf *pf = devlink_priv(devlink); + u8 option_count = ICE_AQC_PORT_OPT_MAX; + struct device *dev = ice_pf_to_dev(pf); + bool active_valid, pending_valid; + int status; + + status = ice_aq_get_port_options(&pf->hw, options, &option_count, + 0, true, &active_idx, &active_valid, + &pending_idx, &pending_valid); + if (status) { + dev_dbg(dev, "Couldn't read port split options, err = %d\n", + status); + NL_SET_ERR_MSG_MOD(extack, "Failed to get available port split options"); + return -EIO; + } + + new_option = ICE_AQC_PORT_OPT_MAX; + active_idx = pending_valid ? pending_idx : active_idx; + for (i = 1; i <= option_count; i++) { + /* In order to allow switching between FW port options with + * the same port split count, search for a new option starting + * from the active/pending option (with array wrap around). + */ + j = (active_idx + i) % option_count; + + if (count == options[j].pmd) { + new_option = j; + break; + } + } + + if (new_option == active_idx) { + dev_dbg(dev, "request to split: count: %u is already set and there are no other options\n", + count); + NL_SET_ERR_MSG_MOD(extack, "Requested split count is already set"); + ice_devlink_port_options_print(pf); + return -EINVAL; + } + + if (new_option == ICE_AQC_PORT_OPT_MAX) { + dev_dbg(dev, "request to split: count: %u not found\n", count); + NL_SET_ERR_MSG_MOD(extack, "Port split requested unsupported port config"); + ice_devlink_port_options_print(pf); + return -EINVAL; + } + + status = ice_devlink_aq_set_port_option(pf, new_option, extack); + if (status) + return status; + + ice_devlink_port_options_print(pf); + + return 0; +} + +/** + * ice_devlink_port_unsplit - .port_unsplit devlink handler + * @devlink: devlink instance structure + * @port: devlink port structure + * @extack: extended netdev ack structure + * + * Callback for the devlink .port_unsplit operation. + * Calls ice_devlink_port_split with split count set to 1. + * There could be no FW option available with split count 1. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port, + struct netlink_ext_ack *extack) +{ + return ice_devlink_port_split(devlink, port, 1, extack); +} + +/** + * ice_tear_down_devlink_rate_tree - removes devlink-rate exported tree + * @pf: pf struct + * + * This function tears down tree exported during VF's creation. + */ +void ice_tear_down_devlink_rate_tree(struct ice_pf *pf) +{ + struct devlink *devlink; + struct ice_vf *vf; + unsigned int bkt; + + devlink = priv_to_devlink(pf); + + devl_lock(devlink); + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { + if (vf->devlink_port.devlink_rate) + devl_rate_leaf_destroy(&vf->devlink_port); + } + mutex_unlock(&pf->vfs.table_lock); + + devl_rate_nodes_destroy(devlink); + devl_unlock(devlink); +} + +/** + * ice_enable_custom_tx - try to enable custom Tx feature + * @pf: pf struct + * + * This function tries to enable custom Tx feature, + * it's not possible to enable it, if DCB or ADQ is active. + */ +static bool ice_enable_custom_tx(struct ice_pf *pf) +{ + struct ice_port_info *pi = ice_get_main_vsi(pf)->port_info; + struct device *dev = ice_pf_to_dev(pf); + + if (pi->is_custom_tx_enabled) + /* already enabled, return true */ + return true; + + if (ice_is_adq_active(pf)) { + dev_err(dev, "ADQ active, can't modify Tx scheduler tree\n"); + return false; + } + + if (ice_is_dcb_active(pf)) { + dev_err(dev, "DCB active, can't modify Tx scheduler tree\n"); + return false; + } + + pi->is_custom_tx_enabled = true; + + return true; +} + +/** + * ice_traverse_tx_tree - traverse Tx scheduler tree + * @devlink: devlink struct + * @node: current node, used for recursion + * @tc_node: tc_node struct, that is treated as a root + * @pf: pf struct + * + * This function traverses Tx scheduler tree and exports + * entire structure to the devlink-rate. + */ +static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node *node, + struct ice_sched_node *tc_node, struct ice_pf *pf) +{ + struct devlink_rate *rate_node = NULL; + struct ice_vf *vf; + int i; + + if (node->rate_node) + /* already added, skip to the next */ + goto traverse_children; + + if (node->parent == tc_node) { + /* create root node */ + rate_node = devl_rate_node_create(devlink, node, node->name, NULL); + } else if (node->vsi_handle && + pf->vsi[node->vsi_handle]->vf) { + vf = pf->vsi[node->vsi_handle]->vf; + if (!vf->devlink_port.devlink_rate) + /* leaf nodes doesn't have children + * so we don't set rate_node + */ + devl_rate_leaf_create(&vf->devlink_port, node, + node->parent->rate_node); + } else if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF && + node->parent->rate_node) { + rate_node = devl_rate_node_create(devlink, node, node->name, + node->parent->rate_node); + } + + if (rate_node && !IS_ERR(rate_node)) + node->rate_node = rate_node; + +traverse_children: + for (i = 0; i < node->num_children; i++) + ice_traverse_tx_tree(devlink, node->children[i], tc_node, pf); +} + +/** + * ice_devlink_rate_init_tx_topology - export Tx scheduler tree to devlink rate + * @devlink: devlink struct + * @vsi: main vsi struct + * + * This function finds a root node, then calls ice_traverse_tx tree, which + * traverses the tree and exports it's contents to devlink rate. + */ +int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi) +{ + struct ice_port_info *pi = vsi->port_info; + struct ice_sched_node *tc_node; + struct ice_pf *pf = vsi->back; + int i; + + tc_node = pi->root->children[0]; + mutex_lock(&pi->sched_lock); + devl_lock(devlink); + for (i = 0; i < tc_node->num_children; i++) + ice_traverse_tx_tree(devlink, tc_node->children[i], tc_node, pf); + devl_unlock(devlink); + mutex_unlock(&pi->sched_lock); + + return 0; +} + +static void ice_clear_rate_nodes(struct ice_sched_node *node) +{ + node->rate_node = NULL; + + for (int i = 0; i < node->num_children; i++) + ice_clear_rate_nodes(node->children[i]); +} + +/** + * ice_devlink_rate_clear_tx_topology - clear node->rate_node + * @vsi: main vsi struct + * + * Clear rate_node to cleanup creation of Tx topology. + * + */ +void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi) +{ + struct ice_port_info *pi = vsi->port_info; + + mutex_lock(&pi->sched_lock); + ice_clear_rate_nodes(pi->root->children[0]); + mutex_unlock(&pi->sched_lock); +} + +/** + * ice_set_object_tx_share - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @bw: bandwidth in bytes per second + * @extack: extended netdev ack structure + * + * This function sets ICE_MIN_BW scheduling BW limit. + */ +static int ice_set_object_tx_share(struct ice_port_info *pi, struct ice_sched_node *node, + u64 bw, struct netlink_ext_ack *extack) +{ + int status; + + mutex_lock(&pi->sched_lock); + /* converts bytes per second to kilo bits per second */ + node->tx_share = div_u64(bw, 125); + status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW, node->tx_share); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_share"); + + return status; +} + +/** + * ice_set_object_tx_max - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @bw: bandwidth in bytes per second + * @extack: extended netdev ack structure + * + * This function sets ICE_MAX_BW scheduling BW limit. + */ +static int ice_set_object_tx_max(struct ice_port_info *pi, struct ice_sched_node *node, + u64 bw, struct netlink_ext_ack *extack) +{ + int status; + + mutex_lock(&pi->sched_lock); + /* converts bytes per second value to kilo bits per second */ + node->tx_max = div_u64(bw, 125); + status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW, node->tx_max); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_max"); + + return status; +} + +/** + * ice_set_object_tx_priority - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @priority: value representing priority for strict priority arbitration + * @extack: extended netdev ack structure + * + * This function sets priority of node among siblings. + */ +static int ice_set_object_tx_priority(struct ice_port_info *pi, struct ice_sched_node *node, + u32 priority, struct netlink_ext_ack *extack) +{ + int status; + + if (priority >= 8) { + NL_SET_ERR_MSG_MOD(extack, "Priority should be less than 8"); + return -EINVAL; + } + + mutex_lock(&pi->sched_lock); + node->tx_priority = priority; + status = ice_sched_set_node_priority(pi, node, node->tx_priority); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_priority"); + + return status; +} + +/** + * ice_set_object_tx_weight - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @weight: value represeting relative weight for WFQ arbitration + * @extack: extended netdev ack structure + * + * This function sets node weight for WFQ algorithm. + */ +static int ice_set_object_tx_weight(struct ice_port_info *pi, struct ice_sched_node *node, + u32 weight, struct netlink_ext_ack *extack) +{ + int status; + + if (weight > 200 || weight < 1) { + NL_SET_ERR_MSG_MOD(extack, "Weight must be between 1 and 200"); + return -EINVAL; + } + + mutex_lock(&pi->sched_lock); + node->tx_weight = weight; + status = ice_sched_set_node_weight(pi, node, node->tx_weight); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_weight"); + + return status; +} + +/** + * ice_get_pi_from_dev_rate - get port info from devlink_rate + * @rate_node: devlink struct instance + * + * This function returns corresponding port_info struct of devlink_rate + */ +static struct ice_port_info *ice_get_pi_from_dev_rate(struct devlink_rate *rate_node) +{ + struct ice_pf *pf = devlink_priv(rate_node->devlink); + + return ice_get_main_vsi(pf)->port_info; +} + +static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node; + struct ice_port_info *pi; + + pi = ice_get_pi_from_dev_rate(rate_node); + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + /* preallocate memory for ice_sched_node */ + node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL); + *priv = node; + + return 0; +} + +static int ice_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node, *tc_node; + struct ice_port_info *pi; + + pi = ice_get_pi_from_dev_rate(rate_node); + tc_node = pi->root->children[0]; + node = priv; + + if (!rate_node->parent || !node || tc_node == node || !extack) + return 0; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + /* can't allow to delete a node with children */ + if (node->num_children) + return -EINVAL; + + mutex_lock(&pi->sched_lock); + ice_free_sched_node(pi, node); + mutex_unlock(&pi->sched_lock); + + return 0; +} + +static int ice_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_leaf), + node, tx_max, extack); +} + +static int ice_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_leaf), node, + tx_share, extack); +} + +static int ice_devlink_rate_leaf_tx_priority_set(struct devlink_rate *rate_leaf, void *priv, + u32 tx_priority, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_leaf), node, + tx_priority, extack); +} + +static int ice_devlink_rate_leaf_tx_weight_set(struct devlink_rate *rate_leaf, void *priv, + u32 tx_weight, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_leaf), node, + tx_weight, extack); +} + +static int ice_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_node), + node, tx_max, extack); +} + +static int ice_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_node), + node, tx_share, extack); +} + +static int ice_devlink_rate_node_tx_priority_set(struct devlink_rate *rate_node, void *priv, + u32 tx_priority, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_node), + node, tx_priority, extack); +} + +static int ice_devlink_rate_node_tx_weight_set(struct devlink_rate *rate_node, void *priv, + u32 tx_weight, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_node), + node, tx_weight, extack); +} + +static int ice_devlink_set_parent(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) +{ + struct ice_port_info *pi = ice_get_pi_from_dev_rate(devlink_rate); + struct ice_sched_node *tc_node, *node, *parent_node; + u16 num_nodes_added; + u32 first_node_teid; + u32 node_teid; + int status; + + tc_node = pi->root->children[0]; + node = priv; + + if (!extack) + return 0; + + if (!ice_enable_custom_tx(devlink_priv(devlink_rate->devlink))) + return -EBUSY; + + if (!parent) { + if (!node || tc_node == node || node->num_children) + return -EINVAL; + + mutex_lock(&pi->sched_lock); + ice_free_sched_node(pi, node); + mutex_unlock(&pi->sched_lock); + + return 0; + } + + parent_node = parent_priv; + + /* if the node doesn't exist, create it */ + if (!node->parent) { + mutex_lock(&pi->sched_lock); + status = ice_sched_add_elems(pi, tc_node, parent_node, + parent_node->tx_sched_layer + 1, + 1, &num_nodes_added, &first_node_teid, + &node); + mutex_unlock(&pi->sched_lock); + + if (status) { + NL_SET_ERR_MSG_MOD(extack, "Can't add a new node"); + return status; + } + + if (devlink_rate->tx_share) + ice_set_object_tx_share(pi, node, devlink_rate->tx_share, extack); + if (devlink_rate->tx_max) + ice_set_object_tx_max(pi, node, devlink_rate->tx_max, extack); + if (devlink_rate->tx_priority) + ice_set_object_tx_priority(pi, node, devlink_rate->tx_priority, extack); + if (devlink_rate->tx_weight) + ice_set_object_tx_weight(pi, node, devlink_rate->tx_weight, extack); + } else { + node_teid = le32_to_cpu(node->info.node_teid); + mutex_lock(&pi->sched_lock); + status = ice_sched_move_nodes(pi, parent_node, 1, &node_teid); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't move existing node to a new parent"); + } + + return status; +} + +/** + * ice_devlink_reinit_up - do reinit of the given PF + * @pf: pointer to the PF struct + */ +static int ice_devlink_reinit_up(struct ice_pf *pf) +{ + struct ice_vsi *vsi = ice_get_main_vsi(pf); + struct ice_vsi_cfg_params params; + int err; + + err = ice_init_dev(pf); + if (err) + return err; + + params = ice_vsi_to_params(vsi); + params.flags = ICE_VSI_FLAG_INIT; + + rtnl_lock(); + err = ice_vsi_cfg(vsi, ¶ms); + rtnl_unlock(); + if (err) + goto err_vsi_cfg; + + /* No need to take devl_lock, it's already taken by devlink API */ + err = ice_load(pf); + if (err) + goto err_load; + + return 0; + +err_load: + rtnl_lock(); + ice_vsi_decfg(vsi); + rtnl_unlock(); +err_vsi_cfg: + ice_deinit_dev(pf); + return err; +} + +/** + * ice_devlink_reload_up - do reload up after reinit + * @devlink: pointer to the devlink instance reloading + * @action: the action requested + * @limit: limits imposed by userspace, such as not resetting + * @actions_performed: on return, indicate what actions actually performed + * @extack: netlink extended ACK structure + */ +static int +ice_devlink_reload_up(struct devlink *devlink, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); + return ice_devlink_reinit_up(pf); + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: + *actions_performed = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE); + return ice_devlink_reload_empr_finish(pf, extack); + default: + WARN_ON(1); + return -EOPNOTSUPP; + } +} + +static const struct devlink_ops ice_devlink_ops = { + .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), + .reload_down = ice_devlink_reload_down, + .reload_up = ice_devlink_reload_up, + .eswitch_mode_get = ice_eswitch_mode_get, + .eswitch_mode_set = ice_eswitch_mode_set, + .info_get = ice_devlink_info_get, + .flash_update = ice_devlink_flash_update, + + .rate_node_new = ice_devlink_rate_node_new, + .rate_node_del = ice_devlink_rate_node_del, + + .rate_leaf_tx_max_set = ice_devlink_rate_leaf_tx_max_set, + .rate_leaf_tx_share_set = ice_devlink_rate_leaf_tx_share_set, + .rate_leaf_tx_priority_set = ice_devlink_rate_leaf_tx_priority_set, + .rate_leaf_tx_weight_set = ice_devlink_rate_leaf_tx_weight_set, + + .rate_node_tx_max_set = ice_devlink_rate_node_tx_max_set, + .rate_node_tx_share_set = ice_devlink_rate_node_tx_share_set, + .rate_node_tx_priority_set = ice_devlink_rate_node_tx_priority_set, + .rate_node_tx_weight_set = ice_devlink_rate_node_tx_weight_set, + + .rate_leaf_parent_set = ice_devlink_set_parent, + .rate_node_parent_set = ice_devlink_set_parent, +}; + +static int +ice_devlink_enable_roce_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct ice_pf *pf = devlink_priv(devlink); + + ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? true : false; + + return 0; +} + +static int +ice_devlink_enable_roce_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct ice_pf *pf = devlink_priv(devlink); + bool roce_ena = ctx->val.vbool; + int ret; + + if (!roce_ena) { + ice_unplug_aux_dev(pf); + pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2; + return 0; + } + + pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2; + ret = ice_plug_aux_dev(pf); + if (ret) + pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2; + + return ret; +} + +static int +ice_devlink_enable_roce_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + + if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) + return -EOPNOTSUPP; + + if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP) { + NL_SET_ERR_MSG_MOD(extack, "iWARP is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int +ice_devlink_enable_iw_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct ice_pf *pf = devlink_priv(devlink); + + ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP; + + return 0; +} + +static int +ice_devlink_enable_iw_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct ice_pf *pf = devlink_priv(devlink); + bool iw_ena = ctx->val.vbool; + int ret; + + if (!iw_ena) { + ice_unplug_aux_dev(pf); + pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP; + return 0; + } + + pf->rdma_mode |= IIDC_RDMA_PROTOCOL_IWARP; + ret = ice_plug_aux_dev(pf); + if (ret) + pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP; + + return ret; +} + +static int +ice_devlink_enable_iw_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + + if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) + return -EOPNOTSUPP; + + if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2) { + NL_SET_ERR_MSG_MOD(extack, "RoCEv2 is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously"); + return -EOPNOTSUPP; + } + + return 0; +} + +static const struct devlink_param ice_devlink_params[] = { + DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_RUNTIME), + ice_devlink_enable_roce_get, + ice_devlink_enable_roce_set, + ice_devlink_enable_roce_validate), + DEVLINK_PARAM_GENERIC(ENABLE_IWARP, BIT(DEVLINK_PARAM_CMODE_RUNTIME), + ice_devlink_enable_iw_get, + ice_devlink_enable_iw_set, + ice_devlink_enable_iw_validate), + +}; + +static void ice_devlink_free(void *devlink_ptr) +{ + devlink_free((struct devlink *)devlink_ptr); +} + +/** + * ice_allocate_pf - Allocate devlink and return PF structure pointer + * @dev: the device to allocate for + * + * Allocate a devlink instance for this device and return the private area as + * the PF structure. The devlink memory is kept track of through devres by + * adding an action to remove it when unwinding. + */ +struct ice_pf *ice_allocate_pf(struct device *dev) +{ + struct devlink *devlink; + + devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev); + if (!devlink) + return NULL; + + /* Add an action to teardown the devlink when unwinding the driver */ + if (devm_add_action_or_reset(dev, ice_devlink_free, devlink)) + return NULL; + + return devlink_priv(devlink); +} + +/** + * ice_devlink_register - Register devlink interface for this PF + * @pf: the PF to register the devlink for. + * + * Register the devlink instance associated with this physical function. + * + * Return: zero on success or an error code on failure. + */ +void ice_devlink_register(struct ice_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + + devlink_register(devlink); +} + +/** + * ice_devlink_unregister - Unregister devlink resources for this PF. + * @pf: the PF structure to cleanup + * + * Releases resources used by devlink and cleans up associated memory. + */ +void ice_devlink_unregister(struct ice_pf *pf) +{ + devlink_unregister(priv_to_devlink(pf)); +} + +/** + * ice_devlink_set_switch_id - Set unique switch id based on pci dsn + * @pf: the PF to create a devlink port for + * @ppid: struct with switch id information + */ +static void +ice_devlink_set_switch_id(struct ice_pf *pf, struct netdev_phys_item_id *ppid) +{ + struct pci_dev *pdev = pf->pdev; + u64 id; + + id = pci_get_dsn(pdev); + + ppid->id_len = sizeof(id); + put_unaligned_be64(id, &ppid->id); +} + +int ice_devlink_register_params(struct ice_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + + return devlink_params_register(devlink, ice_devlink_params, + ARRAY_SIZE(ice_devlink_params)); +} + +void ice_devlink_unregister_params(struct ice_pf *pf) +{ + devlink_params_unregister(priv_to_devlink(pf), ice_devlink_params, + ARRAY_SIZE(ice_devlink_params)); +} + +/** + * ice_devlink_set_port_split_options - Set port split options + * @pf: the PF to set port split options + * @attrs: devlink attributes + * + * Sets devlink port split options based on available FW port options + */ +static void +ice_devlink_set_port_split_options(struct ice_pf *pf, + struct devlink_port_attrs *attrs) +{ + struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX]; + u8 i, active_idx, pending_idx, option_count = ICE_AQC_PORT_OPT_MAX; + bool active_valid, pending_valid; + int status; + + status = ice_aq_get_port_options(&pf->hw, options, &option_count, + 0, true, &active_idx, &active_valid, + &pending_idx, &pending_valid); + if (status) { + dev_dbg(ice_pf_to_dev(pf), "Couldn't read port split options, err = %d\n", + status); + return; + } + + /* find the biggest available port split count */ + for (i = 0; i < option_count; i++) + attrs->lanes = max_t(int, attrs->lanes, options[i].pmd); + + attrs->splittable = attrs->lanes ? 1 : 0; + ice_active_port_option = active_idx; +} + +static const struct devlink_port_ops ice_devlink_port_ops = { + .port_split = ice_devlink_port_split, + .port_unsplit = ice_devlink_port_unsplit, +}; + +/** + * ice_devlink_create_pf_port - Create a devlink port for this PF + * @pf: the PF to create a devlink port for + * + * Create and register a devlink_port for this PF. + * This function has to be called under devl_lock. + * + * Return: zero on success or an error code on failure. + */ +int ice_devlink_create_pf_port(struct ice_pf *pf) +{ + struct devlink_port_attrs attrs = {}; + struct devlink_port *devlink_port; + struct devlink *devlink; + struct ice_vsi *vsi; + struct device *dev; + int err; + + devlink = priv_to_devlink(pf); + + dev = ice_pf_to_dev(pf); + + devlink_port = &pf->devlink_port; + + vsi = ice_get_main_vsi(pf); + if (!vsi) + return -EIO; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = pf->hw.bus.func; + + /* As FW supports only port split options for whole device, + * set port split options only for first PF. + */ + if (pf->hw.pf_id == 0) + ice_devlink_set_port_split_options(pf, &attrs); + + ice_devlink_set_switch_id(pf, &attrs.switch_id); + + devlink_port_attrs_set(devlink_port, &attrs); + + err = devl_port_register_with_ops(devlink, devlink_port, vsi->idx, + &ice_devlink_port_ops); + if (err) { + dev_err(dev, "Failed to create devlink port for PF %d, error %d\n", + pf->hw.pf_id, err); + return err; + } + + return 0; +} + +/** + * ice_devlink_destroy_pf_port - Destroy the devlink_port for this PF + * @pf: the PF to cleanup + * + * Unregisters the devlink_port structure associated with this PF. + * This function has to be called under devl_lock. + */ +void ice_devlink_destroy_pf_port(struct ice_pf *pf) +{ + devl_port_unregister(&pf->devlink_port); +} + +/** + * ice_devlink_create_vf_port - Create a devlink port for this VF + * @vf: the VF to create a port for + * + * Create and register a devlink_port for this VF. + * + * Return: zero on success or an error code on failure. + */ +int ice_devlink_create_vf_port(struct ice_vf *vf) +{ + struct devlink_port_attrs attrs = {}; + struct devlink_port *devlink_port; + struct devlink *devlink; + struct ice_vsi *vsi; + struct device *dev; + struct ice_pf *pf; + int err; + + pf = vf->pf; + dev = ice_pf_to_dev(pf); + devlink_port = &vf->devlink_port; + + vsi = ice_get_vf_vsi(vf); + if (!vsi) + return -EINVAL; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; + attrs.pci_vf.pf = pf->hw.bus.func; + attrs.pci_vf.vf = vf->vf_id; + + ice_devlink_set_switch_id(pf, &attrs.switch_id); + + devlink_port_attrs_set(devlink_port, &attrs); + devlink = priv_to_devlink(pf); + + err = devlink_port_register(devlink, devlink_port, vsi->idx); + if (err) { + dev_err(dev, "Failed to create devlink port for VF %d, error %d\n", + vf->vf_id, err); + return err; + } + + return 0; +} + +/** + * ice_devlink_destroy_vf_port - Destroy the devlink_port for this VF + * @vf: the VF to cleanup + * + * Unregisters the devlink_port structure associated with this VF. + */ +void ice_devlink_destroy_vf_port(struct ice_vf *vf) +{ + devl_rate_leaf_destroy(&vf->devlink_port); + devlink_port_unregister(&vf->devlink_port); +} + +#define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024) + +static const struct devlink_region_ops ice_nvm_region_ops; +static const struct devlink_region_ops ice_sram_region_ops; + +/** + * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents + * @devlink: the devlink instance + * @ops: the devlink region to snapshot + * @extack: extended ACK response structure + * @data: on exit points to snapshot data buffer + * + * This function is called in response to a DEVLINK_CMD_REGION_NEW for either + * the nvm-flash or shadow-ram region. + * + * It captures a snapshot of the NVM or Shadow RAM flash contents. This + * snapshot can then later be viewed via the DEVLINK_CMD_REGION_READ netlink + * interface. + * + * @returns zero on success, and updates the data pointer. Returns a non-zero + * error code on failure. + */ +static int ice_devlink_nvm_snapshot(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, u8 **data) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + bool read_shadow_ram; + u8 *nvm_data, *tmp, i; + u32 nvm_size, left; + s8 num_blks; + int status; + + if (ops == &ice_nvm_region_ops) { + read_shadow_ram = false; + nvm_size = hw->flash.flash_size; + } else if (ops == &ice_sram_region_ops) { + read_shadow_ram = true; + nvm_size = hw->flash.sr_words * 2u; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); + return -EOPNOTSUPP; + } + + nvm_data = vzalloc(nvm_size); + if (!nvm_data) + return -ENOMEM; + + num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE); + tmp = nvm_data; + left = nvm_size; + + /* Some systems take longer to read the NVM than others which causes the + * FW to reclaim the NVM lock before the entire NVM has been read. Fix + * this by breaking the reads of the NVM into smaller chunks that will + * probably not take as long. This has some overhead since we are + * increasing the number of AQ commands, but it should always work + */ + for (i = 0; i < num_blks; i++) { + u32 read_sz = min_t(u32, ICE_DEVLINK_READ_BLK_SIZE, left); + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) { + dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); + vfree(nvm_data); + return -EIO; + } + + status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE, + &read_sz, tmp, read_shadow_ram); + if (status) { + dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", + read_sz, status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); + ice_release_nvm(hw); + vfree(nvm_data); + return -EIO; + } + ice_release_nvm(hw); + + tmp += read_sz; + left -= read_sz; + } + + *data = nvm_data; + + return 0; +} + +/** + * ice_devlink_nvm_read - Read a portion of NVM flash contents + * @devlink: the devlink instance + * @ops: the devlink region to snapshot + * @extack: extended ACK response structure + * @offset: the offset to start at + * @size: the amount to read + * @data: the data buffer to read into + * + * This function is called in response to DEVLINK_CMD_REGION_READ to directly + * read a section of the NVM contents. + * + * It reads from either the nvm-flash or shadow-ram region contents. + * + * @returns zero on success, and updates the data pointer. Returns a non-zero + * error code on failure. + */ +static int ice_devlink_nvm_read(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + bool read_shadow_ram; + u64 nvm_size; + int status; + + if (ops == &ice_nvm_region_ops) { + read_shadow_ram = false; + nvm_size = hw->flash.flash_size; + } else if (ops == &ice_sram_region_ops) { + read_shadow_ram = true; + nvm_size = hw->flash.sr_words * 2u; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); + return -EOPNOTSUPP; + } + + if (offset + size >= nvm_size) { + NL_SET_ERR_MSG_MOD(extack, "Cannot read beyond the region size"); + return -ERANGE; + } + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) { + dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); + return -EIO; + } + + status = ice_read_flat_nvm(hw, (u32)offset, &size, data, + read_shadow_ram); + if (status) { + dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", + size, status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); + ice_release_nvm(hw); + return -EIO; + } + ice_release_nvm(hw); + + return 0; +} + +/** + * ice_devlink_devcaps_snapshot - Capture snapshot of device capabilities + * @devlink: the devlink instance + * @ops: the devlink region being snapshotted + * @extack: extended ACK response structure + * @data: on exit points to snapshot data buffer + * + * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for + * the device-caps devlink region. It captures a snapshot of the device + * capabilities reported by firmware. + * + * @returns zero on success, and updates the data pointer. Returns a non-zero + * error code on failure. + */ +static int +ice_devlink_devcaps_snapshot(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, u8 **data) +{ + struct ice_pf *pf = devlink_priv(devlink); + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + void *devcaps; + int status; + + devcaps = vzalloc(ICE_AQ_MAX_BUF_LEN); + if (!devcaps) + return -ENOMEM; + + status = ice_aq_list_caps(hw, devcaps, ICE_AQ_MAX_BUF_LEN, NULL, + ice_aqc_opc_list_dev_caps, NULL); + if (status) { + dev_dbg(dev, "ice_aq_list_caps: failed to read device capabilities, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read device capabilities"); + vfree(devcaps); + return status; + } + + *data = (u8 *)devcaps; + + return 0; +} + +static const struct devlink_region_ops ice_nvm_region_ops = { + .name = "nvm-flash", + .destructor = vfree, + .snapshot = ice_devlink_nvm_snapshot, + .read = ice_devlink_nvm_read, +}; + +static const struct devlink_region_ops ice_sram_region_ops = { + .name = "shadow-ram", + .destructor = vfree, + .snapshot = ice_devlink_nvm_snapshot, + .read = ice_devlink_nvm_read, +}; + +static const struct devlink_region_ops ice_devcaps_region_ops = { + .name = "device-caps", + .destructor = vfree, + .snapshot = ice_devlink_devcaps_snapshot, +}; + +/** + * ice_devlink_init_regions - Initialize devlink regions + * @pf: the PF device structure + * + * Create devlink regions used to enable access to dump the contents of the + * flash memory on the device. + */ +void ice_devlink_init_regions(struct ice_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct device *dev = ice_pf_to_dev(pf); + u64 nvm_size, sram_size; + + nvm_size = pf->hw.flash.flash_size; + pf->nvm_region = devlink_region_create(devlink, &ice_nvm_region_ops, 1, + nvm_size); + if (IS_ERR(pf->nvm_region)) { + dev_err(dev, "failed to create NVM devlink region, err %ld\n", + PTR_ERR(pf->nvm_region)); + pf->nvm_region = NULL; + } + + sram_size = pf->hw.flash.sr_words * 2u; + pf->sram_region = devlink_region_create(devlink, &ice_sram_region_ops, + 1, sram_size); + if (IS_ERR(pf->sram_region)) { + dev_err(dev, "failed to create shadow-ram devlink region, err %ld\n", + PTR_ERR(pf->sram_region)); + pf->sram_region = NULL; + } + + pf->devcaps_region = devlink_region_create(devlink, + &ice_devcaps_region_ops, 10, + ICE_AQ_MAX_BUF_LEN); + if (IS_ERR(pf->devcaps_region)) { + dev_err(dev, "failed to create device-caps devlink region, err %ld\n", + PTR_ERR(pf->devcaps_region)); + pf->devcaps_region = NULL; + } +} + +/** + * ice_devlink_destroy_regions - Destroy devlink regions + * @pf: the PF device structure + * + * Remove previously created regions for this PF. + */ +void ice_devlink_destroy_regions(struct ice_pf *pf) +{ + if (pf->nvm_region) + devlink_region_destroy(pf->nvm_region); + + if (pf->sram_region) + devlink_region_destroy(pf->sram_region); + + if (pf->devcaps_region) + devlink_region_destroy(pf->devcaps_region); +} diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.h b/drivers/net/ethernet/intel/ice/devlink/devlink.h new file mode 100644 index 000000000000..d291c0e2e17b --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_DEVLINK_H_ +#define _ICE_DEVLINK_H_ + +struct ice_pf *ice_allocate_pf(struct device *dev); + +void ice_devlink_register(struct ice_pf *pf); +void ice_devlink_unregister(struct ice_pf *pf); +int ice_devlink_register_params(struct ice_pf *pf); +void ice_devlink_unregister_params(struct ice_pf *pf); +int ice_devlink_create_pf_port(struct ice_pf *pf); +void ice_devlink_destroy_pf_port(struct ice_pf *pf); +int ice_devlink_create_vf_port(struct ice_vf *vf); +void ice_devlink_destroy_vf_port(struct ice_vf *vf); + +void ice_devlink_init_regions(struct ice_pf *pf); +void ice_devlink_destroy_regions(struct ice_pf *pf); + +int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi); +void ice_tear_down_devlink_rate_tree(struct ice_pf *pf); +void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi); + +#endif /* _ICE_DEVLINK_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index ceb17c004d79..a94e7072b570 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -3,7 +3,7 @@ #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" -#include "ice_devlink.h" +#include "devlink/devlink.h" /** * ice_dcb_get_ena_tc - return bitmap of enabled TCs diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c deleted file mode 100644 index 3c3616f0f811..000000000000 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ /dev/null @@ -1,2022 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2020, Intel Corporation. */ - -#include - -#include "ice.h" -#include "ice_lib.h" -#include "ice_devlink.h" -#include "ice_eswitch.h" -#include "ice_fw_update.h" -#include "ice_dcb_lib.h" - -static int ice_active_port_option = -1; - -/* context for devlink info version reporting */ -struct ice_info_ctx { - char buf[128]; - struct ice_orom_info pending_orom; - struct ice_nvm_info pending_nvm; - struct ice_netlist_info pending_netlist; - struct ice_hw_dev_caps dev_caps; -}; - -/* The following functions are used to format specific strings for various - * devlink info versions. The ctx parameter is used to provide the storage - * buffer, as well as any ancillary information calculated when the info - * request was made. - * - * If a version does not exist, for example when attempting to get the - * inactive version of flash when there is no pending update, the function - * should leave the buffer in the ctx structure empty. - */ - -static void ice_info_get_dsn(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - u8 dsn[8]; - - /* Copy the DSN into an array in Big Endian format */ - put_unaligned_be64(pci_get_dsn(pf->pdev), dsn); - - snprintf(ctx->buf, sizeof(ctx->buf), "%8phD", dsn); -} - -static void ice_info_pba(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - struct ice_hw *hw = &pf->hw; - int status; - - status = ice_read_pba_string(hw, (u8 *)ctx->buf, sizeof(ctx->buf)); - if (status) - /* We failed to locate the PBA, so just skip this entry */ - dev_dbg(ice_pf_to_dev(pf), "Failed to read Product Board Assembly string, status %d\n", - status); -} - -static void ice_info_fw_mgmt(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - struct ice_hw *hw = &pf->hw; - - snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", - hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch); -} - -static void ice_info_fw_api(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - struct ice_hw *hw = &pf->hw; - - snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", hw->api_maj_ver, - hw->api_min_ver, hw->api_patch); -} - -static void ice_info_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - struct ice_hw *hw = &pf->hw; - - snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", hw->fw_build); -} - -static void ice_info_orom_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - struct ice_orom_info *orom = &pf->hw.flash.orom; - - snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", - orom->major, orom->build, orom->patch); -} - -static void -ice_info_pending_orom_ver(struct ice_pf __always_unused *pf, - struct ice_info_ctx *ctx) -{ - struct ice_orom_info *orom = &ctx->pending_orom; - - if (ctx->dev_caps.common_cap.nvm_update_pending_orom) - snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", - orom->major, orom->build, orom->patch); -} - -static void ice_info_nvm_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - struct ice_nvm_info *nvm = &pf->hw.flash.nvm; - - snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor); -} - -static void -ice_info_pending_nvm_ver(struct ice_pf __always_unused *pf, - struct ice_info_ctx *ctx) -{ - struct ice_nvm_info *nvm = &ctx->pending_nvm; - - if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) - snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", - nvm->major, nvm->minor); -} - -static void ice_info_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - struct ice_nvm_info *nvm = &pf->hw.flash.nvm; - - snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack); -} - -static void -ice_info_pending_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - struct ice_nvm_info *nvm = &ctx->pending_nvm; - - if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) - snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack); -} - -static void ice_info_ddp_pkg_name(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - struct ice_hw *hw = &pf->hw; - - snprintf(ctx->buf, sizeof(ctx->buf), "%s", hw->active_pkg_name); -} - -static void -ice_info_ddp_pkg_version(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - struct ice_pkg_ver *pkg = &pf->hw.active_pkg_ver; - - snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u.%u", - pkg->major, pkg->minor, pkg->update, pkg->draft); -} - -static void -ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", pf->hw.active_track_id); -} - -static void ice_info_netlist_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - struct ice_netlist_info *netlist = &pf->hw.flash.netlist; - - /* The netlist version fields are BCD formatted */ - snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x", - netlist->major, netlist->minor, - netlist->type >> 16, netlist->type & 0xFFFF, - netlist->rev, netlist->cust_ver); -} - -static void ice_info_netlist_build(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - struct ice_netlist_info *netlist = &pf->hw.flash.netlist; - - snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash); -} - -static void -ice_info_pending_netlist_ver(struct ice_pf __always_unused *pf, - struct ice_info_ctx *ctx) -{ - struct ice_netlist_info *netlist = &ctx->pending_netlist; - - /* The netlist version fields are BCD formatted */ - if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) - snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x", - netlist->major, netlist->minor, - netlist->type >> 16, netlist->type & 0xFFFF, - netlist->rev, netlist->cust_ver); -} - -static void -ice_info_pending_netlist_build(struct ice_pf __always_unused *pf, - struct ice_info_ctx *ctx) -{ - struct ice_netlist_info *netlist = &ctx->pending_netlist; - - if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) - snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash); -} - -static void ice_info_cgu_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - u32 id, cfg_ver, fw_ver; - - if (!ice_is_feature_supported(pf, ICE_F_CGU)) - return; - if (ice_aq_get_cgu_info(&pf->hw, &id, &cfg_ver, &fw_ver)) - return; - snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", id, cfg_ver, fw_ver); -} - -static void ice_info_cgu_id(struct ice_pf *pf, struct ice_info_ctx *ctx) -{ - if (!ice_is_feature_supported(pf, ICE_F_CGU)) - return; - snprintf(ctx->buf, sizeof(ctx->buf), "%u", pf->hw.cgu_part_number); -} - -#define fixed(key, getter) { ICE_VERSION_FIXED, key, getter, NULL } -#define running(key, getter) { ICE_VERSION_RUNNING, key, getter, NULL } -#define stored(key, getter, fallback) { ICE_VERSION_STORED, key, getter, fallback } - -/* The combined() macro inserts both the running entry as well as a stored - * entry. The running entry will always report the version from the active - * handler. The stored entry will first try the pending handler, and fallback - * to the active handler if the pending function does not report a version. - * The pending handler should check the status of a pending update for the - * relevant flash component. It should only fill in the buffer in the case - * where a valid pending version is available. This ensures that the related - * stored and running versions remain in sync, and that stored versions are - * correctly reported as expected. - */ -#define combined(key, active, pending) \ - running(key, active), \ - stored(key, pending, active) - -enum ice_version_type { - ICE_VERSION_FIXED, - ICE_VERSION_RUNNING, - ICE_VERSION_STORED, -}; - -static const struct ice_devlink_version { - enum ice_version_type type; - const char *key; - void (*getter)(struct ice_pf *pf, struct ice_info_ctx *ctx); - void (*fallback)(struct ice_pf *pf, struct ice_info_ctx *ctx); -} ice_devlink_versions[] = { - fixed(DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, ice_info_pba), - running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt), - running("fw.mgmt.api", ice_info_fw_api), - running("fw.mgmt.build", ice_info_fw_build), - combined(DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, ice_info_orom_ver, ice_info_pending_orom_ver), - combined("fw.psid.api", ice_info_nvm_ver, ice_info_pending_nvm_ver), - combined(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack, ice_info_pending_eetrack), - running("fw.app.name", ice_info_ddp_pkg_name), - running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version), - running("fw.app.bundle_id", ice_info_ddp_pkg_bundle_id), - combined("fw.netlist", ice_info_netlist_ver, ice_info_pending_netlist_ver), - combined("fw.netlist.build", ice_info_netlist_build, ice_info_pending_netlist_build), - fixed("cgu.id", ice_info_cgu_id), - running("fw.cgu", ice_info_cgu_fw_build), -}; - -/** - * ice_devlink_info_get - .info_get devlink handler - * @devlink: devlink instance structure - * @req: the devlink info request - * @extack: extended netdev ack structure - * - * Callback for the devlink .info_get operation. Reports information about the - * device. - * - * Return: zero on success or an error code on failure. - */ -static int ice_devlink_info_get(struct devlink *devlink, - struct devlink_info_req *req, - struct netlink_ext_ack *extack) -{ - struct ice_pf *pf = devlink_priv(devlink); - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - struct ice_info_ctx *ctx; - size_t i; - int err; - - err = ice_wait_for_reset(pf, 10 * HZ); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Device is busy resetting"); - return err; - } - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - /* discover capabilities first */ - err = ice_discover_dev_caps(hw, &ctx->dev_caps); - if (err) { - dev_dbg(dev, "Failed to discover device capabilities, status %d aq_err %s\n", - err, ice_aq_str(hw->adminq.sq_last_status)); - NL_SET_ERR_MSG_MOD(extack, "Unable to discover device capabilities"); - goto out_free_ctx; - } - - if (ctx->dev_caps.common_cap.nvm_update_pending_orom) { - err = ice_get_inactive_orom_ver(hw, &ctx->pending_orom); - if (err) { - dev_dbg(dev, "Unable to read inactive Option ROM version data, status %d aq_err %s\n", - err, ice_aq_str(hw->adminq.sq_last_status)); - - /* disable display of pending Option ROM */ - ctx->dev_caps.common_cap.nvm_update_pending_orom = false; - } - } - - if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) { - err = ice_get_inactive_nvm_ver(hw, &ctx->pending_nvm); - if (err) { - dev_dbg(dev, "Unable to read inactive NVM version data, status %d aq_err %s\n", - err, ice_aq_str(hw->adminq.sq_last_status)); - - /* disable display of pending Option ROM */ - ctx->dev_caps.common_cap.nvm_update_pending_nvm = false; - } - } - - if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) { - err = ice_get_inactive_netlist_ver(hw, &ctx->pending_netlist); - if (err) { - dev_dbg(dev, "Unable to read inactive Netlist version data, status %d aq_err %s\n", - err, ice_aq_str(hw->adminq.sq_last_status)); - - /* disable display of pending Option ROM */ - ctx->dev_caps.common_cap.nvm_update_pending_netlist = false; - } - } - - ice_info_get_dsn(pf, ctx); - - err = devlink_info_serial_number_put(req, ctx->buf); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Unable to set serial number"); - goto out_free_ctx; - } - - for (i = 0; i < ARRAY_SIZE(ice_devlink_versions); i++) { - enum ice_version_type type = ice_devlink_versions[i].type; - const char *key = ice_devlink_versions[i].key; - - memset(ctx->buf, 0, sizeof(ctx->buf)); - - ice_devlink_versions[i].getter(pf, ctx); - - /* If the default getter doesn't report a version, use the - * fallback function. This is primarily useful in the case of - * "stored" versions that want to report the same value as the - * running version in the normal case of no pending update. - */ - if (ctx->buf[0] == '\0' && ice_devlink_versions[i].fallback) - ice_devlink_versions[i].fallback(pf, ctx); - - /* Do not report missing versions */ - if (ctx->buf[0] == '\0') - continue; - - switch (type) { - case ICE_VERSION_FIXED: - err = devlink_info_version_fixed_put(req, key, ctx->buf); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Unable to set fixed version"); - goto out_free_ctx; - } - break; - case ICE_VERSION_RUNNING: - err = devlink_info_version_running_put(req, key, ctx->buf); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Unable to set running version"); - goto out_free_ctx; - } - break; - case ICE_VERSION_STORED: - err = devlink_info_version_stored_put(req, key, ctx->buf); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Unable to set stored version"); - goto out_free_ctx; - } - break; - } - } - -out_free_ctx: - kfree(ctx); - return err; -} - -/** - * ice_devlink_reload_empr_start - Start EMP reset to activate new firmware - * @pf: pointer to the pf instance - * @extack: netlink extended ACK structure - * - * Allow user to activate new Embedded Management Processor firmware by - * issuing device specific EMP reset. Called in response to - * a DEVLINK_CMD_RELOAD with the DEVLINK_RELOAD_ACTION_FW_ACTIVATE. - * - * Note that teardown and rebuild of the driver state happens automatically as - * part of an interrupt and watchdog task. This is because all physical - * functions on the device must be able to reset when an EMP reset occurs from - * any source. - */ -static int -ice_devlink_reload_empr_start(struct ice_pf *pf, - struct netlink_ext_ack *extack) -{ - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - u8 pending; - int err; - - err = ice_get_pending_updates(pf, &pending, extack); - if (err) - return err; - - /* pending is a bitmask of which flash banks have a pending update, - * including the main NVM bank, the Option ROM bank, and the netlist - * bank. If any of these bits are set, then there is a pending update - * waiting to be activated. - */ - if (!pending) { - NL_SET_ERR_MSG_MOD(extack, "No pending firmware update"); - return -ECANCELED; - } - - if (pf->fw_emp_reset_disabled) { - NL_SET_ERR_MSG_MOD(extack, "EMP reset is not available. To activate firmware, a reboot or power cycle is needed"); - return -ECANCELED; - } - - dev_dbg(dev, "Issuing device EMP reset to activate firmware\n"); - - err = ice_aq_nvm_update_empr(hw); - if (err) { - dev_err(dev, "Failed to trigger EMP device reset to reload firmware, err %d aq_err %s\n", - err, ice_aq_str(hw->adminq.sq_last_status)); - NL_SET_ERR_MSG_MOD(extack, "Failed to trigger EMP device reset to reload firmware"); - return err; - } - - return 0; -} - -/** - * ice_devlink_reinit_down - unload given PF - * @pf: pointer to the PF struct - */ -static void ice_devlink_reinit_down(struct ice_pf *pf) -{ - /* No need to take devl_lock, it's already taken by devlink API */ - ice_unload(pf); - rtnl_lock(); - ice_vsi_decfg(ice_get_main_vsi(pf)); - rtnl_unlock(); - ice_deinit_dev(pf); -} - -/** - * ice_devlink_reload_down - prepare for reload - * @devlink: pointer to the devlink instance to reload - * @netns_change: if true, the network namespace is changing - * @action: the action to perform - * @limit: limits on what reload should do, such as not resetting - * @extack: netlink extended ACK structure - */ -static int -ice_devlink_reload_down(struct devlink *devlink, bool netns_change, - enum devlink_reload_action action, - enum devlink_reload_limit limit, - struct netlink_ext_ack *extack) -{ - struct ice_pf *pf = devlink_priv(devlink); - - switch (action) { - case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: - if (ice_is_eswitch_mode_switchdev(pf)) { - NL_SET_ERR_MSG_MOD(extack, - "Go to legacy mode before doing reinit"); - return -EOPNOTSUPP; - } - if (ice_is_adq_active(pf)) { - NL_SET_ERR_MSG_MOD(extack, - "Turn off ADQ before doing reinit"); - return -EOPNOTSUPP; - } - if (ice_has_vfs(pf)) { - NL_SET_ERR_MSG_MOD(extack, - "Remove all VFs before doing reinit"); - return -EOPNOTSUPP; - } - ice_devlink_reinit_down(pf); - return 0; - case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: - return ice_devlink_reload_empr_start(pf, extack); - default: - WARN_ON(1); - return -EOPNOTSUPP; - } -} - -/** - * ice_devlink_reload_empr_finish - Wait for EMP reset to finish - * @pf: pointer to the pf instance - * @extack: netlink extended ACK structure - * - * Wait for driver to finish rebuilding after EMP reset is completed. This - * includes time to wait for both the actual device reset as well as the time - * for the driver's rebuild to complete. - */ -static int -ice_devlink_reload_empr_finish(struct ice_pf *pf, - struct netlink_ext_ack *extack) -{ - int err; - - err = ice_wait_for_reset(pf, 60 * HZ); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Device still resetting after 1 minute"); - return err; - } - - return 0; -} - -/** - * ice_devlink_port_opt_speed_str - convert speed to a string - * @speed: speed value - */ -static const char *ice_devlink_port_opt_speed_str(u8 speed) -{ - switch (speed & ICE_AQC_PORT_OPT_MAX_LANE_M) { - case ICE_AQC_PORT_OPT_MAX_LANE_100M: - return "0.1"; - case ICE_AQC_PORT_OPT_MAX_LANE_1G: - return "1"; - case ICE_AQC_PORT_OPT_MAX_LANE_2500M: - return "2.5"; - case ICE_AQC_PORT_OPT_MAX_LANE_5G: - return "5"; - case ICE_AQC_PORT_OPT_MAX_LANE_10G: - return "10"; - case ICE_AQC_PORT_OPT_MAX_LANE_25G: - return "25"; - case ICE_AQC_PORT_OPT_MAX_LANE_50G: - return "50"; - case ICE_AQC_PORT_OPT_MAX_LANE_100G: - return "100"; - } - - return "-"; -} - -#define ICE_PORT_OPT_DESC_LEN 50 -/** - * ice_devlink_port_options_print - Print available port split options - * @pf: the PF to print split port options - * - * Prints a table with available port split options and max port speeds - */ -static void ice_devlink_port_options_print(struct ice_pf *pf) -{ - u8 i, j, options_count, cnt, speed, pending_idx, active_idx; - struct ice_aqc_get_port_options_elem *options, *opt; - struct device *dev = ice_pf_to_dev(pf); - bool active_valid, pending_valid; - char desc[ICE_PORT_OPT_DESC_LEN]; - const char *str; - int status; - - options = kcalloc(ICE_AQC_PORT_OPT_MAX * ICE_MAX_PORT_PER_PCI_DEV, - sizeof(*options), GFP_KERNEL); - if (!options) - return; - - for (i = 0; i < ICE_MAX_PORT_PER_PCI_DEV; i++) { - opt = options + i * ICE_AQC_PORT_OPT_MAX; - options_count = ICE_AQC_PORT_OPT_MAX; - active_valid = 0; - - status = ice_aq_get_port_options(&pf->hw, opt, &options_count, - i, true, &active_idx, - &active_valid, &pending_idx, - &pending_valid); - if (status) { - dev_dbg(dev, "Couldn't read port option for port %d, err %d\n", - i, status); - goto err; - } - } - - dev_dbg(dev, "Available port split options and max port speeds (Gbps):\n"); - dev_dbg(dev, "Status Split Quad 0 Quad 1\n"); - dev_dbg(dev, " count L0 L1 L2 L3 L4 L5 L6 L7\n"); - - for (i = 0; i < options_count; i++) { - cnt = 0; - - if (i == ice_active_port_option) - str = "Active"; - else if ((i == pending_idx) && pending_valid) - str = "Pending"; - else - str = ""; - - cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, - "%-8s", str); - - cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, - "%-6u", options[i].pmd); - - for (j = 0; j < ICE_MAX_PORT_PER_PCI_DEV; ++j) { - speed = options[i + j * ICE_AQC_PORT_OPT_MAX].max_lane_speed; - str = ice_devlink_port_opt_speed_str(speed); - cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, - "%3s ", str); - } - - dev_dbg(dev, "%s\n", desc); - } - -err: - kfree(options); -} - -/** - * ice_devlink_aq_set_port_option - Send set port option admin queue command - * @pf: the PF to print split port options - * @option_idx: selected port option - * @extack: extended netdev ack structure - * - * Sends set port option admin queue command with selected port option and - * calls NVM write activate. - */ -static int -ice_devlink_aq_set_port_option(struct ice_pf *pf, u8 option_idx, - struct netlink_ext_ack *extack) -{ - struct device *dev = ice_pf_to_dev(pf); - int status; - - status = ice_aq_set_port_option(&pf->hw, 0, true, option_idx); - if (status) { - dev_dbg(dev, "ice_aq_set_port_option, err %d aq_err %d\n", - status, pf->hw.adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Port split request failed"); - return -EIO; - } - - status = ice_acquire_nvm(&pf->hw, ICE_RES_WRITE); - if (status) { - dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", - status, pf->hw.adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); - return -EIO; - } - - status = ice_nvm_write_activate(&pf->hw, ICE_AQC_NVM_ACTIV_REQ_EMPR, NULL); - if (status) { - dev_dbg(dev, "ice_nvm_write_activate failed, err %d aq_err %d\n", - status, pf->hw.adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Port split request failed to save data"); - ice_release_nvm(&pf->hw); - return -EIO; - } - - ice_release_nvm(&pf->hw); - - NL_SET_ERR_MSG_MOD(extack, "Reboot required to finish port split"); - return 0; -} - -/** - * ice_devlink_port_split - .port_split devlink handler - * @devlink: devlink instance structure - * @port: devlink port structure - * @count: number of ports to split to - * @extack: extended netdev ack structure - * - * Callback for the devlink .port_split operation. - * - * Unfortunately, the devlink expression of available options is limited - * to just a number, so search for an FW port option which supports - * the specified number. As there could be multiple FW port options with - * the same port split count, allow switching between them. When the same - * port split count request is issued again, switch to the next FW port - * option with the same port split count. - * - * Return: zero on success or an error code on failure. - */ -static int -ice_devlink_port_split(struct devlink *devlink, struct devlink_port *port, - unsigned int count, struct netlink_ext_ack *extack) -{ - struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX]; - u8 i, j, active_idx, pending_idx, new_option; - struct ice_pf *pf = devlink_priv(devlink); - u8 option_count = ICE_AQC_PORT_OPT_MAX; - struct device *dev = ice_pf_to_dev(pf); - bool active_valid, pending_valid; - int status; - - status = ice_aq_get_port_options(&pf->hw, options, &option_count, - 0, true, &active_idx, &active_valid, - &pending_idx, &pending_valid); - if (status) { - dev_dbg(dev, "Couldn't read port split options, err = %d\n", - status); - NL_SET_ERR_MSG_MOD(extack, "Failed to get available port split options"); - return -EIO; - } - - new_option = ICE_AQC_PORT_OPT_MAX; - active_idx = pending_valid ? pending_idx : active_idx; - for (i = 1; i <= option_count; i++) { - /* In order to allow switching between FW port options with - * the same port split count, search for a new option starting - * from the active/pending option (with array wrap around). - */ - j = (active_idx + i) % option_count; - - if (count == options[j].pmd) { - new_option = j; - break; - } - } - - if (new_option == active_idx) { - dev_dbg(dev, "request to split: count: %u is already set and there are no other options\n", - count); - NL_SET_ERR_MSG_MOD(extack, "Requested split count is already set"); - ice_devlink_port_options_print(pf); - return -EINVAL; - } - - if (new_option == ICE_AQC_PORT_OPT_MAX) { - dev_dbg(dev, "request to split: count: %u not found\n", count); - NL_SET_ERR_MSG_MOD(extack, "Port split requested unsupported port config"); - ice_devlink_port_options_print(pf); - return -EINVAL; - } - - status = ice_devlink_aq_set_port_option(pf, new_option, extack); - if (status) - return status; - - ice_devlink_port_options_print(pf); - - return 0; -} - -/** - * ice_devlink_port_unsplit - .port_unsplit devlink handler - * @devlink: devlink instance structure - * @port: devlink port structure - * @extack: extended netdev ack structure - * - * Callback for the devlink .port_unsplit operation. - * Calls ice_devlink_port_split with split count set to 1. - * There could be no FW option available with split count 1. - * - * Return: zero on success or an error code on failure. - */ -static int -ice_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port, - struct netlink_ext_ack *extack) -{ - return ice_devlink_port_split(devlink, port, 1, extack); -} - -/** - * ice_tear_down_devlink_rate_tree - removes devlink-rate exported tree - * @pf: pf struct - * - * This function tears down tree exported during VF's creation. - */ -void ice_tear_down_devlink_rate_tree(struct ice_pf *pf) -{ - struct devlink *devlink; - struct ice_vf *vf; - unsigned int bkt; - - devlink = priv_to_devlink(pf); - - devl_lock(devlink); - mutex_lock(&pf->vfs.table_lock); - ice_for_each_vf(pf, bkt, vf) { - if (vf->devlink_port.devlink_rate) - devl_rate_leaf_destroy(&vf->devlink_port); - } - mutex_unlock(&pf->vfs.table_lock); - - devl_rate_nodes_destroy(devlink); - devl_unlock(devlink); -} - -/** - * ice_enable_custom_tx - try to enable custom Tx feature - * @pf: pf struct - * - * This function tries to enable custom Tx feature, - * it's not possible to enable it, if DCB or ADQ is active. - */ -static bool ice_enable_custom_tx(struct ice_pf *pf) -{ - struct ice_port_info *pi = ice_get_main_vsi(pf)->port_info; - struct device *dev = ice_pf_to_dev(pf); - - if (pi->is_custom_tx_enabled) - /* already enabled, return true */ - return true; - - if (ice_is_adq_active(pf)) { - dev_err(dev, "ADQ active, can't modify Tx scheduler tree\n"); - return false; - } - - if (ice_is_dcb_active(pf)) { - dev_err(dev, "DCB active, can't modify Tx scheduler tree\n"); - return false; - } - - pi->is_custom_tx_enabled = true; - - return true; -} - -/** - * ice_traverse_tx_tree - traverse Tx scheduler tree - * @devlink: devlink struct - * @node: current node, used for recursion - * @tc_node: tc_node struct, that is treated as a root - * @pf: pf struct - * - * This function traverses Tx scheduler tree and exports - * entire structure to the devlink-rate. - */ -static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node *node, - struct ice_sched_node *tc_node, struct ice_pf *pf) -{ - struct devlink_rate *rate_node = NULL; - struct ice_vf *vf; - int i; - - if (node->rate_node) - /* already added, skip to the next */ - goto traverse_children; - - if (node->parent == tc_node) { - /* create root node */ - rate_node = devl_rate_node_create(devlink, node, node->name, NULL); - } else if (node->vsi_handle && - pf->vsi[node->vsi_handle]->vf) { - vf = pf->vsi[node->vsi_handle]->vf; - if (!vf->devlink_port.devlink_rate) - /* leaf nodes doesn't have children - * so we don't set rate_node - */ - devl_rate_leaf_create(&vf->devlink_port, node, - node->parent->rate_node); - } else if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF && - node->parent->rate_node) { - rate_node = devl_rate_node_create(devlink, node, node->name, - node->parent->rate_node); - } - - if (rate_node && !IS_ERR(rate_node)) - node->rate_node = rate_node; - -traverse_children: - for (i = 0; i < node->num_children; i++) - ice_traverse_tx_tree(devlink, node->children[i], tc_node, pf); -} - -/** - * ice_devlink_rate_init_tx_topology - export Tx scheduler tree to devlink rate - * @devlink: devlink struct - * @vsi: main vsi struct - * - * This function finds a root node, then calls ice_traverse_tx tree, which - * traverses the tree and exports it's contents to devlink rate. - */ -int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi) -{ - struct ice_port_info *pi = vsi->port_info; - struct ice_sched_node *tc_node; - struct ice_pf *pf = vsi->back; - int i; - - tc_node = pi->root->children[0]; - mutex_lock(&pi->sched_lock); - devl_lock(devlink); - for (i = 0; i < tc_node->num_children; i++) - ice_traverse_tx_tree(devlink, tc_node->children[i], tc_node, pf); - devl_unlock(devlink); - mutex_unlock(&pi->sched_lock); - - return 0; -} - -static void ice_clear_rate_nodes(struct ice_sched_node *node) -{ - node->rate_node = NULL; - - for (int i = 0; i < node->num_children; i++) - ice_clear_rate_nodes(node->children[i]); -} - -/** - * ice_devlink_rate_clear_tx_topology - clear node->rate_node - * @vsi: main vsi struct - * - * Clear rate_node to cleanup creation of Tx topology. - * - */ -void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi) -{ - struct ice_port_info *pi = vsi->port_info; - - mutex_lock(&pi->sched_lock); - ice_clear_rate_nodes(pi->root->children[0]); - mutex_unlock(&pi->sched_lock); -} - -/** - * ice_set_object_tx_share - sets node scheduling parameter - * @pi: devlink struct instance - * @node: node struct instance - * @bw: bandwidth in bytes per second - * @extack: extended netdev ack structure - * - * This function sets ICE_MIN_BW scheduling BW limit. - */ -static int ice_set_object_tx_share(struct ice_port_info *pi, struct ice_sched_node *node, - u64 bw, struct netlink_ext_ack *extack) -{ - int status; - - mutex_lock(&pi->sched_lock); - /* converts bytes per second to kilo bits per second */ - node->tx_share = div_u64(bw, 125); - status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW, node->tx_share); - mutex_unlock(&pi->sched_lock); - - if (status) - NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_share"); - - return status; -} - -/** - * ice_set_object_tx_max - sets node scheduling parameter - * @pi: devlink struct instance - * @node: node struct instance - * @bw: bandwidth in bytes per second - * @extack: extended netdev ack structure - * - * This function sets ICE_MAX_BW scheduling BW limit. - */ -static int ice_set_object_tx_max(struct ice_port_info *pi, struct ice_sched_node *node, - u64 bw, struct netlink_ext_ack *extack) -{ - int status; - - mutex_lock(&pi->sched_lock); - /* converts bytes per second value to kilo bits per second */ - node->tx_max = div_u64(bw, 125); - status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW, node->tx_max); - mutex_unlock(&pi->sched_lock); - - if (status) - NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_max"); - - return status; -} - -/** - * ice_set_object_tx_priority - sets node scheduling parameter - * @pi: devlink struct instance - * @node: node struct instance - * @priority: value representing priority for strict priority arbitration - * @extack: extended netdev ack structure - * - * This function sets priority of node among siblings. - */ -static int ice_set_object_tx_priority(struct ice_port_info *pi, struct ice_sched_node *node, - u32 priority, struct netlink_ext_ack *extack) -{ - int status; - - if (priority >= 8) { - NL_SET_ERR_MSG_MOD(extack, "Priority should be less than 8"); - return -EINVAL; - } - - mutex_lock(&pi->sched_lock); - node->tx_priority = priority; - status = ice_sched_set_node_priority(pi, node, node->tx_priority); - mutex_unlock(&pi->sched_lock); - - if (status) - NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_priority"); - - return status; -} - -/** - * ice_set_object_tx_weight - sets node scheduling parameter - * @pi: devlink struct instance - * @node: node struct instance - * @weight: value represeting relative weight for WFQ arbitration - * @extack: extended netdev ack structure - * - * This function sets node weight for WFQ algorithm. - */ -static int ice_set_object_tx_weight(struct ice_port_info *pi, struct ice_sched_node *node, - u32 weight, struct netlink_ext_ack *extack) -{ - int status; - - if (weight > 200 || weight < 1) { - NL_SET_ERR_MSG_MOD(extack, "Weight must be between 1 and 200"); - return -EINVAL; - } - - mutex_lock(&pi->sched_lock); - node->tx_weight = weight; - status = ice_sched_set_node_weight(pi, node, node->tx_weight); - mutex_unlock(&pi->sched_lock); - - if (status) - NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_weight"); - - return status; -} - -/** - * ice_get_pi_from_dev_rate - get port info from devlink_rate - * @rate_node: devlink struct instance - * - * This function returns corresponding port_info struct of devlink_rate - */ -static struct ice_port_info *ice_get_pi_from_dev_rate(struct devlink_rate *rate_node) -{ - struct ice_pf *pf = devlink_priv(rate_node->devlink); - - return ice_get_main_vsi(pf)->port_info; -} - -static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, - struct netlink_ext_ack *extack) -{ - struct ice_sched_node *node; - struct ice_port_info *pi; - - pi = ice_get_pi_from_dev_rate(rate_node); - - if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) - return -EBUSY; - - /* preallocate memory for ice_sched_node */ - node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL); - *priv = node; - - return 0; -} - -static int ice_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, - struct netlink_ext_ack *extack) -{ - struct ice_sched_node *node, *tc_node; - struct ice_port_info *pi; - - pi = ice_get_pi_from_dev_rate(rate_node); - tc_node = pi->root->children[0]; - node = priv; - - if (!rate_node->parent || !node || tc_node == node || !extack) - return 0; - - if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) - return -EBUSY; - - /* can't allow to delete a node with children */ - if (node->num_children) - return -EINVAL; - - mutex_lock(&pi->sched_lock); - ice_free_sched_node(pi, node); - mutex_unlock(&pi->sched_lock); - - return 0; -} - -static int ice_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, - u64 tx_max, struct netlink_ext_ack *extack) -{ - struct ice_sched_node *node = priv; - - if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) - return -EBUSY; - - if (!node) - return 0; - - return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_leaf), - node, tx_max, extack); -} - -static int ice_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, - u64 tx_share, struct netlink_ext_ack *extack) -{ - struct ice_sched_node *node = priv; - - if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) - return -EBUSY; - - if (!node) - return 0; - - return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_leaf), node, - tx_share, extack); -} - -static int ice_devlink_rate_leaf_tx_priority_set(struct devlink_rate *rate_leaf, void *priv, - u32 tx_priority, struct netlink_ext_ack *extack) -{ - struct ice_sched_node *node = priv; - - if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) - return -EBUSY; - - if (!node) - return 0; - - return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_leaf), node, - tx_priority, extack); -} - -static int ice_devlink_rate_leaf_tx_weight_set(struct devlink_rate *rate_leaf, void *priv, - u32 tx_weight, struct netlink_ext_ack *extack) -{ - struct ice_sched_node *node = priv; - - if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) - return -EBUSY; - - if (!node) - return 0; - - return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_leaf), node, - tx_weight, extack); -} - -static int ice_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, - u64 tx_max, struct netlink_ext_ack *extack) -{ - struct ice_sched_node *node = priv; - - if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) - return -EBUSY; - - if (!node) - return 0; - - return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_node), - node, tx_max, extack); -} - -static int ice_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, - u64 tx_share, struct netlink_ext_ack *extack) -{ - struct ice_sched_node *node = priv; - - if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) - return -EBUSY; - - if (!node) - return 0; - - return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_node), - node, tx_share, extack); -} - -static int ice_devlink_rate_node_tx_priority_set(struct devlink_rate *rate_node, void *priv, - u32 tx_priority, struct netlink_ext_ack *extack) -{ - struct ice_sched_node *node = priv; - - if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) - return -EBUSY; - - if (!node) - return 0; - - return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_node), - node, tx_priority, extack); -} - -static int ice_devlink_rate_node_tx_weight_set(struct devlink_rate *rate_node, void *priv, - u32 tx_weight, struct netlink_ext_ack *extack) -{ - struct ice_sched_node *node = priv; - - if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) - return -EBUSY; - - if (!node) - return 0; - - return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_node), - node, tx_weight, extack); -} - -static int ice_devlink_set_parent(struct devlink_rate *devlink_rate, - struct devlink_rate *parent, - void *priv, void *parent_priv, - struct netlink_ext_ack *extack) -{ - struct ice_port_info *pi = ice_get_pi_from_dev_rate(devlink_rate); - struct ice_sched_node *tc_node, *node, *parent_node; - u16 num_nodes_added; - u32 first_node_teid; - u32 node_teid; - int status; - - tc_node = pi->root->children[0]; - node = priv; - - if (!extack) - return 0; - - if (!ice_enable_custom_tx(devlink_priv(devlink_rate->devlink))) - return -EBUSY; - - if (!parent) { - if (!node || tc_node == node || node->num_children) - return -EINVAL; - - mutex_lock(&pi->sched_lock); - ice_free_sched_node(pi, node); - mutex_unlock(&pi->sched_lock); - - return 0; - } - - parent_node = parent_priv; - - /* if the node doesn't exist, create it */ - if (!node->parent) { - mutex_lock(&pi->sched_lock); - status = ice_sched_add_elems(pi, tc_node, parent_node, - parent_node->tx_sched_layer + 1, - 1, &num_nodes_added, &first_node_teid, - &node); - mutex_unlock(&pi->sched_lock); - - if (status) { - NL_SET_ERR_MSG_MOD(extack, "Can't add a new node"); - return status; - } - - if (devlink_rate->tx_share) - ice_set_object_tx_share(pi, node, devlink_rate->tx_share, extack); - if (devlink_rate->tx_max) - ice_set_object_tx_max(pi, node, devlink_rate->tx_max, extack); - if (devlink_rate->tx_priority) - ice_set_object_tx_priority(pi, node, devlink_rate->tx_priority, extack); - if (devlink_rate->tx_weight) - ice_set_object_tx_weight(pi, node, devlink_rate->tx_weight, extack); - } else { - node_teid = le32_to_cpu(node->info.node_teid); - mutex_lock(&pi->sched_lock); - status = ice_sched_move_nodes(pi, parent_node, 1, &node_teid); - mutex_unlock(&pi->sched_lock); - - if (status) - NL_SET_ERR_MSG_MOD(extack, "Can't move existing node to a new parent"); - } - - return status; -} - -/** - * ice_devlink_reinit_up - do reinit of the given PF - * @pf: pointer to the PF struct - */ -static int ice_devlink_reinit_up(struct ice_pf *pf) -{ - struct ice_vsi *vsi = ice_get_main_vsi(pf); - struct ice_vsi_cfg_params params; - int err; - - err = ice_init_dev(pf); - if (err) - return err; - - params = ice_vsi_to_params(vsi); - params.flags = ICE_VSI_FLAG_INIT; - - rtnl_lock(); - err = ice_vsi_cfg(vsi, ¶ms); - rtnl_unlock(); - if (err) - goto err_vsi_cfg; - - /* No need to take devl_lock, it's already taken by devlink API */ - err = ice_load(pf); - if (err) - goto err_load; - - return 0; - -err_load: - rtnl_lock(); - ice_vsi_decfg(vsi); - rtnl_unlock(); -err_vsi_cfg: - ice_deinit_dev(pf); - return err; -} - -/** - * ice_devlink_reload_up - do reload up after reinit - * @devlink: pointer to the devlink instance reloading - * @action: the action requested - * @limit: limits imposed by userspace, such as not resetting - * @actions_performed: on return, indicate what actions actually performed - * @extack: netlink extended ACK structure - */ -static int -ice_devlink_reload_up(struct devlink *devlink, - enum devlink_reload_action action, - enum devlink_reload_limit limit, - u32 *actions_performed, - struct netlink_ext_ack *extack) -{ - struct ice_pf *pf = devlink_priv(devlink); - - switch (action) { - case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: - *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); - return ice_devlink_reinit_up(pf); - case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: - *actions_performed = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE); - return ice_devlink_reload_empr_finish(pf, extack); - default: - WARN_ON(1); - return -EOPNOTSUPP; - } -} - -static const struct devlink_ops ice_devlink_ops = { - .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, - .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | - BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), - .reload_down = ice_devlink_reload_down, - .reload_up = ice_devlink_reload_up, - .eswitch_mode_get = ice_eswitch_mode_get, - .eswitch_mode_set = ice_eswitch_mode_set, - .info_get = ice_devlink_info_get, - .flash_update = ice_devlink_flash_update, - - .rate_node_new = ice_devlink_rate_node_new, - .rate_node_del = ice_devlink_rate_node_del, - - .rate_leaf_tx_max_set = ice_devlink_rate_leaf_tx_max_set, - .rate_leaf_tx_share_set = ice_devlink_rate_leaf_tx_share_set, - .rate_leaf_tx_priority_set = ice_devlink_rate_leaf_tx_priority_set, - .rate_leaf_tx_weight_set = ice_devlink_rate_leaf_tx_weight_set, - - .rate_node_tx_max_set = ice_devlink_rate_node_tx_max_set, - .rate_node_tx_share_set = ice_devlink_rate_node_tx_share_set, - .rate_node_tx_priority_set = ice_devlink_rate_node_tx_priority_set, - .rate_node_tx_weight_set = ice_devlink_rate_node_tx_weight_set, - - .rate_leaf_parent_set = ice_devlink_set_parent, - .rate_node_parent_set = ice_devlink_set_parent, -}; - -static int -ice_devlink_enable_roce_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct ice_pf *pf = devlink_priv(devlink); - - ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? true : false; - - return 0; -} - -static int -ice_devlink_enable_roce_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct ice_pf *pf = devlink_priv(devlink); - bool roce_ena = ctx->val.vbool; - int ret; - - if (!roce_ena) { - ice_unplug_aux_dev(pf); - pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2; - return 0; - } - - pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2; - ret = ice_plug_aux_dev(pf); - if (ret) - pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2; - - return ret; -} - -static int -ice_devlink_enable_roce_validate(struct devlink *devlink, u32 id, - union devlink_param_value val, - struct netlink_ext_ack *extack) -{ - struct ice_pf *pf = devlink_priv(devlink); - - if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) - return -EOPNOTSUPP; - - if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP) { - NL_SET_ERR_MSG_MOD(extack, "iWARP is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously"); - return -EOPNOTSUPP; - } - - return 0; -} - -static int -ice_devlink_enable_iw_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct ice_pf *pf = devlink_priv(devlink); - - ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP; - - return 0; -} - -static int -ice_devlink_enable_iw_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct ice_pf *pf = devlink_priv(devlink); - bool iw_ena = ctx->val.vbool; - int ret; - - if (!iw_ena) { - ice_unplug_aux_dev(pf); - pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP; - return 0; - } - - pf->rdma_mode |= IIDC_RDMA_PROTOCOL_IWARP; - ret = ice_plug_aux_dev(pf); - if (ret) - pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP; - - return ret; -} - -static int -ice_devlink_enable_iw_validate(struct devlink *devlink, u32 id, - union devlink_param_value val, - struct netlink_ext_ack *extack) -{ - struct ice_pf *pf = devlink_priv(devlink); - - if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) - return -EOPNOTSUPP; - - if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2) { - NL_SET_ERR_MSG_MOD(extack, "RoCEv2 is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously"); - return -EOPNOTSUPP; - } - - return 0; -} - -static const struct devlink_param ice_devlink_params[] = { - DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_RUNTIME), - ice_devlink_enable_roce_get, - ice_devlink_enable_roce_set, - ice_devlink_enable_roce_validate), - DEVLINK_PARAM_GENERIC(ENABLE_IWARP, BIT(DEVLINK_PARAM_CMODE_RUNTIME), - ice_devlink_enable_iw_get, - ice_devlink_enable_iw_set, - ice_devlink_enable_iw_validate), - -}; - -static void ice_devlink_free(void *devlink_ptr) -{ - devlink_free((struct devlink *)devlink_ptr); -} - -/** - * ice_allocate_pf - Allocate devlink and return PF structure pointer - * @dev: the device to allocate for - * - * Allocate a devlink instance for this device and return the private area as - * the PF structure. The devlink memory is kept track of through devres by - * adding an action to remove it when unwinding. - */ -struct ice_pf *ice_allocate_pf(struct device *dev) -{ - struct devlink *devlink; - - devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev); - if (!devlink) - return NULL; - - /* Add an action to teardown the devlink when unwinding the driver */ - if (devm_add_action_or_reset(dev, ice_devlink_free, devlink)) - return NULL; - - return devlink_priv(devlink); -} - -/** - * ice_devlink_register - Register devlink interface for this PF - * @pf: the PF to register the devlink for. - * - * Register the devlink instance associated with this physical function. - * - * Return: zero on success or an error code on failure. - */ -void ice_devlink_register(struct ice_pf *pf) -{ - struct devlink *devlink = priv_to_devlink(pf); - - devlink_register(devlink); -} - -/** - * ice_devlink_unregister - Unregister devlink resources for this PF. - * @pf: the PF structure to cleanup - * - * Releases resources used by devlink and cleans up associated memory. - */ -void ice_devlink_unregister(struct ice_pf *pf) -{ - devlink_unregister(priv_to_devlink(pf)); -} - -/** - * ice_devlink_set_switch_id - Set unique switch id based on pci dsn - * @pf: the PF to create a devlink port for - * @ppid: struct with switch id information - */ -static void -ice_devlink_set_switch_id(struct ice_pf *pf, struct netdev_phys_item_id *ppid) -{ - struct pci_dev *pdev = pf->pdev; - u64 id; - - id = pci_get_dsn(pdev); - - ppid->id_len = sizeof(id); - put_unaligned_be64(id, &ppid->id); -} - -int ice_devlink_register_params(struct ice_pf *pf) -{ - struct devlink *devlink = priv_to_devlink(pf); - - return devlink_params_register(devlink, ice_devlink_params, - ARRAY_SIZE(ice_devlink_params)); -} - -void ice_devlink_unregister_params(struct ice_pf *pf) -{ - devlink_params_unregister(priv_to_devlink(pf), ice_devlink_params, - ARRAY_SIZE(ice_devlink_params)); -} - -/** - * ice_devlink_set_port_split_options - Set port split options - * @pf: the PF to set port split options - * @attrs: devlink attributes - * - * Sets devlink port split options based on available FW port options - */ -static void -ice_devlink_set_port_split_options(struct ice_pf *pf, - struct devlink_port_attrs *attrs) -{ - struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX]; - u8 i, active_idx, pending_idx, option_count = ICE_AQC_PORT_OPT_MAX; - bool active_valid, pending_valid; - int status; - - status = ice_aq_get_port_options(&pf->hw, options, &option_count, - 0, true, &active_idx, &active_valid, - &pending_idx, &pending_valid); - if (status) { - dev_dbg(ice_pf_to_dev(pf), "Couldn't read port split options, err = %d\n", - status); - return; - } - - /* find the biggest available port split count */ - for (i = 0; i < option_count; i++) - attrs->lanes = max_t(int, attrs->lanes, options[i].pmd); - - attrs->splittable = attrs->lanes ? 1 : 0; - ice_active_port_option = active_idx; -} - -static const struct devlink_port_ops ice_devlink_port_ops = { - .port_split = ice_devlink_port_split, - .port_unsplit = ice_devlink_port_unsplit, -}; - -/** - * ice_devlink_create_pf_port - Create a devlink port for this PF - * @pf: the PF to create a devlink port for - * - * Create and register a devlink_port for this PF. - * This function has to be called under devl_lock. - * - * Return: zero on success or an error code on failure. - */ -int ice_devlink_create_pf_port(struct ice_pf *pf) -{ - struct devlink_port_attrs attrs = {}; - struct devlink_port *devlink_port; - struct devlink *devlink; - struct ice_vsi *vsi; - struct device *dev; - int err; - - devlink = priv_to_devlink(pf); - - dev = ice_pf_to_dev(pf); - - devlink_port = &pf->devlink_port; - - vsi = ice_get_main_vsi(pf); - if (!vsi) - return -EIO; - - attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = pf->hw.bus.func; - - /* As FW supports only port split options for whole device, - * set port split options only for first PF. - */ - if (pf->hw.pf_id == 0) - ice_devlink_set_port_split_options(pf, &attrs); - - ice_devlink_set_switch_id(pf, &attrs.switch_id); - - devlink_port_attrs_set(devlink_port, &attrs); - - err = devl_port_register_with_ops(devlink, devlink_port, vsi->idx, - &ice_devlink_port_ops); - if (err) { - dev_err(dev, "Failed to create devlink port for PF %d, error %d\n", - pf->hw.pf_id, err); - return err; - } - - return 0; -} - -/** - * ice_devlink_destroy_pf_port - Destroy the devlink_port for this PF - * @pf: the PF to cleanup - * - * Unregisters the devlink_port structure associated with this PF. - * This function has to be called under devl_lock. - */ -void ice_devlink_destroy_pf_port(struct ice_pf *pf) -{ - devl_port_unregister(&pf->devlink_port); -} - -/** - * ice_devlink_create_vf_port - Create a devlink port for this VF - * @vf: the VF to create a port for - * - * Create and register a devlink_port for this VF. - * - * Return: zero on success or an error code on failure. - */ -int ice_devlink_create_vf_port(struct ice_vf *vf) -{ - struct devlink_port_attrs attrs = {}; - struct devlink_port *devlink_port; - struct devlink *devlink; - struct ice_vsi *vsi; - struct device *dev; - struct ice_pf *pf; - int err; - - pf = vf->pf; - dev = ice_pf_to_dev(pf); - devlink_port = &vf->devlink_port; - - vsi = ice_get_vf_vsi(vf); - if (!vsi) - return -EINVAL; - - attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; - attrs.pci_vf.pf = pf->hw.bus.func; - attrs.pci_vf.vf = vf->vf_id; - - ice_devlink_set_switch_id(pf, &attrs.switch_id); - - devlink_port_attrs_set(devlink_port, &attrs); - devlink = priv_to_devlink(pf); - - err = devlink_port_register(devlink, devlink_port, vsi->idx); - if (err) { - dev_err(dev, "Failed to create devlink port for VF %d, error %d\n", - vf->vf_id, err); - return err; - } - - return 0; -} - -/** - * ice_devlink_destroy_vf_port - Destroy the devlink_port for this VF - * @vf: the VF to cleanup - * - * Unregisters the devlink_port structure associated with this VF. - */ -void ice_devlink_destroy_vf_port(struct ice_vf *vf) -{ - devl_rate_leaf_destroy(&vf->devlink_port); - devlink_port_unregister(&vf->devlink_port); -} - -#define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024) - -static const struct devlink_region_ops ice_nvm_region_ops; -static const struct devlink_region_ops ice_sram_region_ops; - -/** - * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents - * @devlink: the devlink instance - * @ops: the devlink region to snapshot - * @extack: extended ACK response structure - * @data: on exit points to snapshot data buffer - * - * This function is called in response to a DEVLINK_CMD_REGION_NEW for either - * the nvm-flash or shadow-ram region. - * - * It captures a snapshot of the NVM or Shadow RAM flash contents. This - * snapshot can then later be viewed via the DEVLINK_CMD_REGION_READ netlink - * interface. - * - * @returns zero on success, and updates the data pointer. Returns a non-zero - * error code on failure. - */ -static int ice_devlink_nvm_snapshot(struct devlink *devlink, - const struct devlink_region_ops *ops, - struct netlink_ext_ack *extack, u8 **data) -{ - struct ice_pf *pf = devlink_priv(devlink); - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - bool read_shadow_ram; - u8 *nvm_data, *tmp, i; - u32 nvm_size, left; - s8 num_blks; - int status; - - if (ops == &ice_nvm_region_ops) { - read_shadow_ram = false; - nvm_size = hw->flash.flash_size; - } else if (ops == &ice_sram_region_ops) { - read_shadow_ram = true; - nvm_size = hw->flash.sr_words * 2u; - } else { - NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); - return -EOPNOTSUPP; - } - - nvm_data = vzalloc(nvm_size); - if (!nvm_data) - return -ENOMEM; - - num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE); - tmp = nvm_data; - left = nvm_size; - - /* Some systems take longer to read the NVM than others which causes the - * FW to reclaim the NVM lock before the entire NVM has been read. Fix - * this by breaking the reads of the NVM into smaller chunks that will - * probably not take as long. This has some overhead since we are - * increasing the number of AQ commands, but it should always work - */ - for (i = 0; i < num_blks; i++) { - u32 read_sz = min_t(u32, ICE_DEVLINK_READ_BLK_SIZE, left); - - status = ice_acquire_nvm(hw, ICE_RES_READ); - if (status) { - dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", - status, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); - vfree(nvm_data); - return -EIO; - } - - status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE, - &read_sz, tmp, read_shadow_ram); - if (status) { - dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", - read_sz, status, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); - ice_release_nvm(hw); - vfree(nvm_data); - return -EIO; - } - ice_release_nvm(hw); - - tmp += read_sz; - left -= read_sz; - } - - *data = nvm_data; - - return 0; -} - -/** - * ice_devlink_nvm_read - Read a portion of NVM flash contents - * @devlink: the devlink instance - * @ops: the devlink region to snapshot - * @extack: extended ACK response structure - * @offset: the offset to start at - * @size: the amount to read - * @data: the data buffer to read into - * - * This function is called in response to DEVLINK_CMD_REGION_READ to directly - * read a section of the NVM contents. - * - * It reads from either the nvm-flash or shadow-ram region contents. - * - * @returns zero on success, and updates the data pointer. Returns a non-zero - * error code on failure. - */ -static int ice_devlink_nvm_read(struct devlink *devlink, - const struct devlink_region_ops *ops, - struct netlink_ext_ack *extack, - u64 offset, u32 size, u8 *data) -{ - struct ice_pf *pf = devlink_priv(devlink); - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - bool read_shadow_ram; - u64 nvm_size; - int status; - - if (ops == &ice_nvm_region_ops) { - read_shadow_ram = false; - nvm_size = hw->flash.flash_size; - } else if (ops == &ice_sram_region_ops) { - read_shadow_ram = true; - nvm_size = hw->flash.sr_words * 2u; - } else { - NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); - return -EOPNOTSUPP; - } - - if (offset + size >= nvm_size) { - NL_SET_ERR_MSG_MOD(extack, "Cannot read beyond the region size"); - return -ERANGE; - } - - status = ice_acquire_nvm(hw, ICE_RES_READ); - if (status) { - dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", - status, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); - return -EIO; - } - - status = ice_read_flat_nvm(hw, (u32)offset, &size, data, - read_shadow_ram); - if (status) { - dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", - size, status, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); - ice_release_nvm(hw); - return -EIO; - } - ice_release_nvm(hw); - - return 0; -} - -/** - * ice_devlink_devcaps_snapshot - Capture snapshot of device capabilities - * @devlink: the devlink instance - * @ops: the devlink region being snapshotted - * @extack: extended ACK response structure - * @data: on exit points to snapshot data buffer - * - * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for - * the device-caps devlink region. It captures a snapshot of the device - * capabilities reported by firmware. - * - * @returns zero on success, and updates the data pointer. Returns a non-zero - * error code on failure. - */ -static int -ice_devlink_devcaps_snapshot(struct devlink *devlink, - const struct devlink_region_ops *ops, - struct netlink_ext_ack *extack, u8 **data) -{ - struct ice_pf *pf = devlink_priv(devlink); - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - void *devcaps; - int status; - - devcaps = vzalloc(ICE_AQ_MAX_BUF_LEN); - if (!devcaps) - return -ENOMEM; - - status = ice_aq_list_caps(hw, devcaps, ICE_AQ_MAX_BUF_LEN, NULL, - ice_aqc_opc_list_dev_caps, NULL); - if (status) { - dev_dbg(dev, "ice_aq_list_caps: failed to read device capabilities, err %d aq_err %d\n", - status, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to read device capabilities"); - vfree(devcaps); - return status; - } - - *data = (u8 *)devcaps; - - return 0; -} - -static const struct devlink_region_ops ice_nvm_region_ops = { - .name = "nvm-flash", - .destructor = vfree, - .snapshot = ice_devlink_nvm_snapshot, - .read = ice_devlink_nvm_read, -}; - -static const struct devlink_region_ops ice_sram_region_ops = { - .name = "shadow-ram", - .destructor = vfree, - .snapshot = ice_devlink_nvm_snapshot, - .read = ice_devlink_nvm_read, -}; - -static const struct devlink_region_ops ice_devcaps_region_ops = { - .name = "device-caps", - .destructor = vfree, - .snapshot = ice_devlink_devcaps_snapshot, -}; - -/** - * ice_devlink_init_regions - Initialize devlink regions - * @pf: the PF device structure - * - * Create devlink regions used to enable access to dump the contents of the - * flash memory on the device. - */ -void ice_devlink_init_regions(struct ice_pf *pf) -{ - struct devlink *devlink = priv_to_devlink(pf); - struct device *dev = ice_pf_to_dev(pf); - u64 nvm_size, sram_size; - - nvm_size = pf->hw.flash.flash_size; - pf->nvm_region = devlink_region_create(devlink, &ice_nvm_region_ops, 1, - nvm_size); - if (IS_ERR(pf->nvm_region)) { - dev_err(dev, "failed to create NVM devlink region, err %ld\n", - PTR_ERR(pf->nvm_region)); - pf->nvm_region = NULL; - } - - sram_size = pf->hw.flash.sr_words * 2u; - pf->sram_region = devlink_region_create(devlink, &ice_sram_region_ops, - 1, sram_size); - if (IS_ERR(pf->sram_region)) { - dev_err(dev, "failed to create shadow-ram devlink region, err %ld\n", - PTR_ERR(pf->sram_region)); - pf->sram_region = NULL; - } - - pf->devcaps_region = devlink_region_create(devlink, - &ice_devcaps_region_ops, 10, - ICE_AQ_MAX_BUF_LEN); - if (IS_ERR(pf->devcaps_region)) { - dev_err(dev, "failed to create device-caps devlink region, err %ld\n", - PTR_ERR(pf->devcaps_region)); - pf->devcaps_region = NULL; - } -} - -/** - * ice_devlink_destroy_regions - Destroy devlink regions - * @pf: the PF device structure - * - * Remove previously created regions for this PF. - */ -void ice_devlink_destroy_regions(struct ice_pf *pf) -{ - if (pf->nvm_region) - devlink_region_destroy(pf->nvm_region); - - if (pf->sram_region) - devlink_region_destroy(pf->sram_region); - - if (pf->devcaps_region) - devlink_region_destroy(pf->devcaps_region); -} diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h deleted file mode 100644 index d291c0e2e17b..000000000000 --- a/drivers/net/ethernet/intel/ice/ice_devlink.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2019, Intel Corporation. */ - -#ifndef _ICE_DEVLINK_H_ -#define _ICE_DEVLINK_H_ - -struct ice_pf *ice_allocate_pf(struct device *dev); - -void ice_devlink_register(struct ice_pf *pf); -void ice_devlink_unregister(struct ice_pf *pf); -int ice_devlink_register_params(struct ice_pf *pf); -void ice_devlink_unregister_params(struct ice_pf *pf); -int ice_devlink_create_pf_port(struct ice_pf *pf); -void ice_devlink_destroy_pf_port(struct ice_pf *pf); -int ice_devlink_create_vf_port(struct ice_vf *vf); -void ice_devlink_destroy_vf_port(struct ice_vf *vf); - -void ice_devlink_init_regions(struct ice_pf *pf); -void ice_devlink_destroy_regions(struct ice_pf *pf); - -int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi); -void ice_tear_down_devlink_rate_tree(struct ice_pf *pf); -void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi); - -#endif /* _ICE_DEVLINK_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index af4e9530eb48..c902848cf88e 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -7,7 +7,7 @@ #include "ice_eswitch_br.h" #include "ice_fltr.h" #include "ice_repr.h" -#include "ice_devlink.h" +#include "devlink/devlink.h" #include "ice_tc_lib.h" /** diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 327de754922c..66eeaf345032 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -7,7 +7,7 @@ #include "ice_lib.h" #include "ice_fltr.h" #include "ice_dcb_lib.h" -#include "ice_devlink.h" +#include "devlink/devlink.h" #include "ice_vsi_vlan_ops.h" /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d85736f700dd..eba1c18ea52e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -13,7 +13,7 @@ #include "ice_fltr.h" #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" -#include "ice_devlink.h" +#include "devlink/devlink.h" #include "ice_hwmon.h" /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the * ice tracepoint functions. This must be done exactly once across the diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 2429727d5562..3b4ecbf0aa8e 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -3,7 +3,7 @@ #include "ice.h" #include "ice_eswitch.h" -#include "ice_devlink.h" +#include "devlink/devlink.h" #include "ice_sriov.h" #include "ice_tc_lib.h" #include "ice_dcb_lib.h" -- cgit v1.2.3 From 4ebc5f25d0819f3a89f3664ffdaae104bf4d9483 Mon Sep 17 00:00:00 2001 From: Piotr Raczynski Date: Mon, 25 Mar 2024 22:34:32 +0100 Subject: ice: move devlink port code to a separate file Keep devlink related code in a separate file. More devlink port code and handlers will be added here for other port operations. Remove no longer needed include of our devlink.h in ice_lib.c. Reviewed-by: Przemek Kitszel Reviewed-by: Wojciech Drewek Signed-off-by: Piotr Raczynski Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/Makefile | 1 + drivers/net/ethernet/intel/ice/devlink/devlink.c | 423 -------------------- .../net/ethernet/intel/ice/devlink/devlink_port.c | 430 +++++++++++++++++++++ .../net/ethernet/intel/ice/devlink/devlink_port.h | 12 + drivers/net/ethernet/intel/ice/ice_lib.c | 1 - drivers/net/ethernet/intel/ice/ice_main.c | 1 + drivers/net/ethernet/intel/ice/ice_repr.c | 1 + 7 files changed, 445 insertions(+), 424 deletions(-) create mode 100644 drivers/net/ethernet/intel/ice/devlink/devlink_port.c create mode 100644 drivers/net/ethernet/intel/ice/devlink/devlink_port.h (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 736b1cae2033..03500e28ac99 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -30,6 +30,7 @@ ice-y := ice_main.o \ ice_flow.o \ ice_idc.o \ devlink/devlink.o \ + devlink/devlink_port.o \ ice_ddp.o \ ice_fw_update.o \ ice_lag.o \ diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index 82983eab52e0..926a14ac0d6f 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -10,8 +10,6 @@ #include "ice_fw_update.h" #include "ice_dcb_lib.h" -static int ice_active_port_option = -1; - /* context for devlink info version reporting */ struct ice_info_ctx { char buf[128]; @@ -525,251 +523,6 @@ ice_devlink_reload_empr_finish(struct ice_pf *pf, return 0; } -/** - * ice_devlink_port_opt_speed_str - convert speed to a string - * @speed: speed value - */ -static const char *ice_devlink_port_opt_speed_str(u8 speed) -{ - switch (speed & ICE_AQC_PORT_OPT_MAX_LANE_M) { - case ICE_AQC_PORT_OPT_MAX_LANE_100M: - return "0.1"; - case ICE_AQC_PORT_OPT_MAX_LANE_1G: - return "1"; - case ICE_AQC_PORT_OPT_MAX_LANE_2500M: - return "2.5"; - case ICE_AQC_PORT_OPT_MAX_LANE_5G: - return "5"; - case ICE_AQC_PORT_OPT_MAX_LANE_10G: - return "10"; - case ICE_AQC_PORT_OPT_MAX_LANE_25G: - return "25"; - case ICE_AQC_PORT_OPT_MAX_LANE_50G: - return "50"; - case ICE_AQC_PORT_OPT_MAX_LANE_100G: - return "100"; - } - - return "-"; -} - -#define ICE_PORT_OPT_DESC_LEN 50 -/** - * ice_devlink_port_options_print - Print available port split options - * @pf: the PF to print split port options - * - * Prints a table with available port split options and max port speeds - */ -static void ice_devlink_port_options_print(struct ice_pf *pf) -{ - u8 i, j, options_count, cnt, speed, pending_idx, active_idx; - struct ice_aqc_get_port_options_elem *options, *opt; - struct device *dev = ice_pf_to_dev(pf); - bool active_valid, pending_valid; - char desc[ICE_PORT_OPT_DESC_LEN]; - const char *str; - int status; - - options = kcalloc(ICE_AQC_PORT_OPT_MAX * ICE_MAX_PORT_PER_PCI_DEV, - sizeof(*options), GFP_KERNEL); - if (!options) - return; - - for (i = 0; i < ICE_MAX_PORT_PER_PCI_DEV; i++) { - opt = options + i * ICE_AQC_PORT_OPT_MAX; - options_count = ICE_AQC_PORT_OPT_MAX; - active_valid = 0; - - status = ice_aq_get_port_options(&pf->hw, opt, &options_count, - i, true, &active_idx, - &active_valid, &pending_idx, - &pending_valid); - if (status) { - dev_dbg(dev, "Couldn't read port option for port %d, err %d\n", - i, status); - goto err; - } - } - - dev_dbg(dev, "Available port split options and max port speeds (Gbps):\n"); - dev_dbg(dev, "Status Split Quad 0 Quad 1\n"); - dev_dbg(dev, " count L0 L1 L2 L3 L4 L5 L6 L7\n"); - - for (i = 0; i < options_count; i++) { - cnt = 0; - - if (i == ice_active_port_option) - str = "Active"; - else if ((i == pending_idx) && pending_valid) - str = "Pending"; - else - str = ""; - - cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, - "%-8s", str); - - cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, - "%-6u", options[i].pmd); - - for (j = 0; j < ICE_MAX_PORT_PER_PCI_DEV; ++j) { - speed = options[i + j * ICE_AQC_PORT_OPT_MAX].max_lane_speed; - str = ice_devlink_port_opt_speed_str(speed); - cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, - "%3s ", str); - } - - dev_dbg(dev, "%s\n", desc); - } - -err: - kfree(options); -} - -/** - * ice_devlink_aq_set_port_option - Send set port option admin queue command - * @pf: the PF to print split port options - * @option_idx: selected port option - * @extack: extended netdev ack structure - * - * Sends set port option admin queue command with selected port option and - * calls NVM write activate. - */ -static int -ice_devlink_aq_set_port_option(struct ice_pf *pf, u8 option_idx, - struct netlink_ext_ack *extack) -{ - struct device *dev = ice_pf_to_dev(pf); - int status; - - status = ice_aq_set_port_option(&pf->hw, 0, true, option_idx); - if (status) { - dev_dbg(dev, "ice_aq_set_port_option, err %d aq_err %d\n", - status, pf->hw.adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Port split request failed"); - return -EIO; - } - - status = ice_acquire_nvm(&pf->hw, ICE_RES_WRITE); - if (status) { - dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", - status, pf->hw.adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); - return -EIO; - } - - status = ice_nvm_write_activate(&pf->hw, ICE_AQC_NVM_ACTIV_REQ_EMPR, NULL); - if (status) { - dev_dbg(dev, "ice_nvm_write_activate failed, err %d aq_err %d\n", - status, pf->hw.adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Port split request failed to save data"); - ice_release_nvm(&pf->hw); - return -EIO; - } - - ice_release_nvm(&pf->hw); - - NL_SET_ERR_MSG_MOD(extack, "Reboot required to finish port split"); - return 0; -} - -/** - * ice_devlink_port_split - .port_split devlink handler - * @devlink: devlink instance structure - * @port: devlink port structure - * @count: number of ports to split to - * @extack: extended netdev ack structure - * - * Callback for the devlink .port_split operation. - * - * Unfortunately, the devlink expression of available options is limited - * to just a number, so search for an FW port option which supports - * the specified number. As there could be multiple FW port options with - * the same port split count, allow switching between them. When the same - * port split count request is issued again, switch to the next FW port - * option with the same port split count. - * - * Return: zero on success or an error code on failure. - */ -static int -ice_devlink_port_split(struct devlink *devlink, struct devlink_port *port, - unsigned int count, struct netlink_ext_ack *extack) -{ - struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX]; - u8 i, j, active_idx, pending_idx, new_option; - struct ice_pf *pf = devlink_priv(devlink); - u8 option_count = ICE_AQC_PORT_OPT_MAX; - struct device *dev = ice_pf_to_dev(pf); - bool active_valid, pending_valid; - int status; - - status = ice_aq_get_port_options(&pf->hw, options, &option_count, - 0, true, &active_idx, &active_valid, - &pending_idx, &pending_valid); - if (status) { - dev_dbg(dev, "Couldn't read port split options, err = %d\n", - status); - NL_SET_ERR_MSG_MOD(extack, "Failed to get available port split options"); - return -EIO; - } - - new_option = ICE_AQC_PORT_OPT_MAX; - active_idx = pending_valid ? pending_idx : active_idx; - for (i = 1; i <= option_count; i++) { - /* In order to allow switching between FW port options with - * the same port split count, search for a new option starting - * from the active/pending option (with array wrap around). - */ - j = (active_idx + i) % option_count; - - if (count == options[j].pmd) { - new_option = j; - break; - } - } - - if (new_option == active_idx) { - dev_dbg(dev, "request to split: count: %u is already set and there are no other options\n", - count); - NL_SET_ERR_MSG_MOD(extack, "Requested split count is already set"); - ice_devlink_port_options_print(pf); - return -EINVAL; - } - - if (new_option == ICE_AQC_PORT_OPT_MAX) { - dev_dbg(dev, "request to split: count: %u not found\n", count); - NL_SET_ERR_MSG_MOD(extack, "Port split requested unsupported port config"); - ice_devlink_port_options_print(pf); - return -EINVAL; - } - - status = ice_devlink_aq_set_port_option(pf, new_option, extack); - if (status) - return status; - - ice_devlink_port_options_print(pf); - - return 0; -} - -/** - * ice_devlink_port_unsplit - .port_unsplit devlink handler - * @devlink: devlink instance structure - * @port: devlink port structure - * @extack: extended netdev ack structure - * - * Callback for the devlink .port_unsplit operation. - * Calls ice_devlink_port_split with split count set to 1. - * There could be no FW option available with split count 1. - * - * Return: zero on success or an error code on failure. - */ -static int -ice_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port, - struct netlink_ext_ack *extack) -{ - return ice_devlink_port_split(devlink, port, 1, extack); -} - /** * ice_tear_down_devlink_rate_tree - removes devlink-rate exported tree * @pf: pf struct @@ -1548,23 +1301,6 @@ void ice_devlink_unregister(struct ice_pf *pf) devlink_unregister(priv_to_devlink(pf)); } -/** - * ice_devlink_set_switch_id - Set unique switch id based on pci dsn - * @pf: the PF to create a devlink port for - * @ppid: struct with switch id information - */ -static void -ice_devlink_set_switch_id(struct ice_pf *pf, struct netdev_phys_item_id *ppid) -{ - struct pci_dev *pdev = pf->pdev; - u64 id; - - id = pci_get_dsn(pdev); - - ppid->id_len = sizeof(id); - put_unaligned_be64(id, &ppid->id); -} - int ice_devlink_register_params(struct ice_pf *pf) { struct devlink *devlink = priv_to_devlink(pf); @@ -1579,165 +1315,6 @@ void ice_devlink_unregister_params(struct ice_pf *pf) ARRAY_SIZE(ice_devlink_params)); } -/** - * ice_devlink_set_port_split_options - Set port split options - * @pf: the PF to set port split options - * @attrs: devlink attributes - * - * Sets devlink port split options based on available FW port options - */ -static void -ice_devlink_set_port_split_options(struct ice_pf *pf, - struct devlink_port_attrs *attrs) -{ - struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX]; - u8 i, active_idx, pending_idx, option_count = ICE_AQC_PORT_OPT_MAX; - bool active_valid, pending_valid; - int status; - - status = ice_aq_get_port_options(&pf->hw, options, &option_count, - 0, true, &active_idx, &active_valid, - &pending_idx, &pending_valid); - if (status) { - dev_dbg(ice_pf_to_dev(pf), "Couldn't read port split options, err = %d\n", - status); - return; - } - - /* find the biggest available port split count */ - for (i = 0; i < option_count; i++) - attrs->lanes = max_t(int, attrs->lanes, options[i].pmd); - - attrs->splittable = attrs->lanes ? 1 : 0; - ice_active_port_option = active_idx; -} - -static const struct devlink_port_ops ice_devlink_port_ops = { - .port_split = ice_devlink_port_split, - .port_unsplit = ice_devlink_port_unsplit, -}; - -/** - * ice_devlink_create_pf_port - Create a devlink port for this PF - * @pf: the PF to create a devlink port for - * - * Create and register a devlink_port for this PF. - * This function has to be called under devl_lock. - * - * Return: zero on success or an error code on failure. - */ -int ice_devlink_create_pf_port(struct ice_pf *pf) -{ - struct devlink_port_attrs attrs = {}; - struct devlink_port *devlink_port; - struct devlink *devlink; - struct ice_vsi *vsi; - struct device *dev; - int err; - - devlink = priv_to_devlink(pf); - - dev = ice_pf_to_dev(pf); - - devlink_port = &pf->devlink_port; - - vsi = ice_get_main_vsi(pf); - if (!vsi) - return -EIO; - - attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = pf->hw.bus.func; - - /* As FW supports only port split options for whole device, - * set port split options only for first PF. - */ - if (pf->hw.pf_id == 0) - ice_devlink_set_port_split_options(pf, &attrs); - - ice_devlink_set_switch_id(pf, &attrs.switch_id); - - devlink_port_attrs_set(devlink_port, &attrs); - - err = devl_port_register_with_ops(devlink, devlink_port, vsi->idx, - &ice_devlink_port_ops); - if (err) { - dev_err(dev, "Failed to create devlink port for PF %d, error %d\n", - pf->hw.pf_id, err); - return err; - } - - return 0; -} - -/** - * ice_devlink_destroy_pf_port - Destroy the devlink_port for this PF - * @pf: the PF to cleanup - * - * Unregisters the devlink_port structure associated with this PF. - * This function has to be called under devl_lock. - */ -void ice_devlink_destroy_pf_port(struct ice_pf *pf) -{ - devl_port_unregister(&pf->devlink_port); -} - -/** - * ice_devlink_create_vf_port - Create a devlink port for this VF - * @vf: the VF to create a port for - * - * Create and register a devlink_port for this VF. - * - * Return: zero on success or an error code on failure. - */ -int ice_devlink_create_vf_port(struct ice_vf *vf) -{ - struct devlink_port_attrs attrs = {}; - struct devlink_port *devlink_port; - struct devlink *devlink; - struct ice_vsi *vsi; - struct device *dev; - struct ice_pf *pf; - int err; - - pf = vf->pf; - dev = ice_pf_to_dev(pf); - devlink_port = &vf->devlink_port; - - vsi = ice_get_vf_vsi(vf); - if (!vsi) - return -EINVAL; - - attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; - attrs.pci_vf.pf = pf->hw.bus.func; - attrs.pci_vf.vf = vf->vf_id; - - ice_devlink_set_switch_id(pf, &attrs.switch_id); - - devlink_port_attrs_set(devlink_port, &attrs); - devlink = priv_to_devlink(pf); - - err = devlink_port_register(devlink, devlink_port, vsi->idx); - if (err) { - dev_err(dev, "Failed to create devlink port for VF %d, error %d\n", - vf->vf_id, err); - return err; - } - - return 0; -} - -/** - * ice_devlink_destroy_vf_port - Destroy the devlink_port for this VF - * @vf: the VF to cleanup - * - * Unregisters the devlink_port structure associated with this VF. - */ -void ice_devlink_destroy_vf_port(struct ice_vf *vf) -{ - devl_rate_leaf_destroy(&vf->devlink_port); - devlink_port_unregister(&vf->devlink_port); -} - #define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024) static const struct devlink_region_ops ice_nvm_region_ops; diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c new file mode 100644 index 000000000000..13e6790d3cae --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c @@ -0,0 +1,430 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Intel Corporation. */ + +#include + +#include "ice.h" +#include "devlink.h" + +static int ice_active_port_option = -1; + +/** + * ice_devlink_port_opt_speed_str - convert speed to a string + * @speed: speed value + */ +static const char *ice_devlink_port_opt_speed_str(u8 speed) +{ + switch (speed & ICE_AQC_PORT_OPT_MAX_LANE_M) { + case ICE_AQC_PORT_OPT_MAX_LANE_100M: + return "0.1"; + case ICE_AQC_PORT_OPT_MAX_LANE_1G: + return "1"; + case ICE_AQC_PORT_OPT_MAX_LANE_2500M: + return "2.5"; + case ICE_AQC_PORT_OPT_MAX_LANE_5G: + return "5"; + case ICE_AQC_PORT_OPT_MAX_LANE_10G: + return "10"; + case ICE_AQC_PORT_OPT_MAX_LANE_25G: + return "25"; + case ICE_AQC_PORT_OPT_MAX_LANE_50G: + return "50"; + case ICE_AQC_PORT_OPT_MAX_LANE_100G: + return "100"; + } + + return "-"; +} + +#define ICE_PORT_OPT_DESC_LEN 50 +/** + * ice_devlink_port_options_print - Print available port split options + * @pf: the PF to print split port options + * + * Prints a table with available port split options and max port speeds + */ +static void ice_devlink_port_options_print(struct ice_pf *pf) +{ + u8 i, j, options_count, cnt, speed, pending_idx, active_idx; + struct ice_aqc_get_port_options_elem *options, *opt; + struct device *dev = ice_pf_to_dev(pf); + bool active_valid, pending_valid; + char desc[ICE_PORT_OPT_DESC_LEN]; + const char *str; + int status; + + options = kcalloc(ICE_AQC_PORT_OPT_MAX * ICE_MAX_PORT_PER_PCI_DEV, + sizeof(*options), GFP_KERNEL); + if (!options) + return; + + for (i = 0; i < ICE_MAX_PORT_PER_PCI_DEV; i++) { + opt = options + i * ICE_AQC_PORT_OPT_MAX; + options_count = ICE_AQC_PORT_OPT_MAX; + active_valid = 0; + + status = ice_aq_get_port_options(&pf->hw, opt, &options_count, + i, true, &active_idx, + &active_valid, &pending_idx, + &pending_valid); + if (status) { + dev_dbg(dev, "Couldn't read port option for port %d, err %d\n", + i, status); + goto err; + } + } + + dev_dbg(dev, "Available port split options and max port speeds (Gbps):\n"); + dev_dbg(dev, "Status Split Quad 0 Quad 1\n"); + dev_dbg(dev, " count L0 L1 L2 L3 L4 L5 L6 L7\n"); + + for (i = 0; i < options_count; i++) { + cnt = 0; + + if (i == ice_active_port_option) + str = "Active"; + else if ((i == pending_idx) && pending_valid) + str = "Pending"; + else + str = ""; + + cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, + "%-8s", str); + + cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, + "%-6u", options[i].pmd); + + for (j = 0; j < ICE_MAX_PORT_PER_PCI_DEV; ++j) { + speed = options[i + j * ICE_AQC_PORT_OPT_MAX].max_lane_speed; + str = ice_devlink_port_opt_speed_str(speed); + cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, + "%3s ", str); + } + + dev_dbg(dev, "%s\n", desc); + } + +err: + kfree(options); +} + +/** + * ice_devlink_aq_set_port_option - Send set port option admin queue command + * @pf: the PF to print split port options + * @option_idx: selected port option + * @extack: extended netdev ack structure + * + * Sends set port option admin queue command with selected port option and + * calls NVM write activate. + */ +static int +ice_devlink_aq_set_port_option(struct ice_pf *pf, u8 option_idx, + struct netlink_ext_ack *extack) +{ + struct device *dev = ice_pf_to_dev(pf); + int status; + + status = ice_aq_set_port_option(&pf->hw, 0, true, option_idx); + if (status) { + dev_dbg(dev, "ice_aq_set_port_option, err %d aq_err %d\n", + status, pf->hw.adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Port split request failed"); + return -EIO; + } + + status = ice_acquire_nvm(&pf->hw, ICE_RES_WRITE); + if (status) { + dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", + status, pf->hw.adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); + return -EIO; + } + + status = ice_nvm_write_activate(&pf->hw, ICE_AQC_NVM_ACTIV_REQ_EMPR, NULL); + if (status) { + dev_dbg(dev, "ice_nvm_write_activate failed, err %d aq_err %d\n", + status, pf->hw.adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Port split request failed to save data"); + ice_release_nvm(&pf->hw); + return -EIO; + } + + ice_release_nvm(&pf->hw); + + NL_SET_ERR_MSG_MOD(extack, "Reboot required to finish port split"); + return 0; +} + +/** + * ice_devlink_port_split - .port_split devlink handler + * @devlink: devlink instance structure + * @port: devlink port structure + * @count: number of ports to split to + * @extack: extended netdev ack structure + * + * Callback for the devlink .port_split operation. + * + * Unfortunately, the devlink expression of available options is limited + * to just a number, so search for an FW port option which supports + * the specified number. As there could be multiple FW port options with + * the same port split count, allow switching between them. When the same + * port split count request is issued again, switch to the next FW port + * option with the same port split count. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_split(struct devlink *devlink, struct devlink_port *port, + unsigned int count, struct netlink_ext_ack *extack) +{ + struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX]; + u8 i, j, active_idx, pending_idx, new_option; + struct ice_pf *pf = devlink_priv(devlink); + u8 option_count = ICE_AQC_PORT_OPT_MAX; + struct device *dev = ice_pf_to_dev(pf); + bool active_valid, pending_valid; + int status; + + status = ice_aq_get_port_options(&pf->hw, options, &option_count, + 0, true, &active_idx, &active_valid, + &pending_idx, &pending_valid); + if (status) { + dev_dbg(dev, "Couldn't read port split options, err = %d\n", + status); + NL_SET_ERR_MSG_MOD(extack, "Failed to get available port split options"); + return -EIO; + } + + new_option = ICE_AQC_PORT_OPT_MAX; + active_idx = pending_valid ? pending_idx : active_idx; + for (i = 1; i <= option_count; i++) { + /* In order to allow switching between FW port options with + * the same port split count, search for a new option starting + * from the active/pending option (with array wrap around). + */ + j = (active_idx + i) % option_count; + + if (count == options[j].pmd) { + new_option = j; + break; + } + } + + if (new_option == active_idx) { + dev_dbg(dev, "request to split: count: %u is already set and there are no other options\n", + count); + NL_SET_ERR_MSG_MOD(extack, "Requested split count is already set"); + ice_devlink_port_options_print(pf); + return -EINVAL; + } + + if (new_option == ICE_AQC_PORT_OPT_MAX) { + dev_dbg(dev, "request to split: count: %u not found\n", count); + NL_SET_ERR_MSG_MOD(extack, "Port split requested unsupported port config"); + ice_devlink_port_options_print(pf); + return -EINVAL; + } + + status = ice_devlink_aq_set_port_option(pf, new_option, extack); + if (status) + return status; + + ice_devlink_port_options_print(pf); + + return 0; +} + +/** + * ice_devlink_port_unsplit - .port_unsplit devlink handler + * @devlink: devlink instance structure + * @port: devlink port structure + * @extack: extended netdev ack structure + * + * Callback for the devlink .port_unsplit operation. + * Calls ice_devlink_port_split with split count set to 1. + * There could be no FW option available with split count 1. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port, + struct netlink_ext_ack *extack) +{ + return ice_devlink_port_split(devlink, port, 1, extack); +} + +/** + * ice_devlink_set_port_split_options - Set port split options + * @pf: the PF to set port split options + * @attrs: devlink attributes + * + * Sets devlink port split options based on available FW port options + */ +static void +ice_devlink_set_port_split_options(struct ice_pf *pf, + struct devlink_port_attrs *attrs) +{ + struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX]; + u8 i, active_idx, pending_idx, option_count = ICE_AQC_PORT_OPT_MAX; + bool active_valid, pending_valid; + int status; + + status = ice_aq_get_port_options(&pf->hw, options, &option_count, + 0, true, &active_idx, &active_valid, + &pending_idx, &pending_valid); + if (status) { + dev_dbg(ice_pf_to_dev(pf), "Couldn't read port split options, err = %d\n", + status); + return; + } + + /* find the biggest available port split count */ + for (i = 0; i < option_count; i++) + attrs->lanes = max_t(int, attrs->lanes, options[i].pmd); + + attrs->splittable = attrs->lanes ? 1 : 0; + ice_active_port_option = active_idx; +} + +static const struct devlink_port_ops ice_devlink_port_ops = { + .port_split = ice_devlink_port_split, + .port_unsplit = ice_devlink_port_unsplit, +}; + +/** + * ice_devlink_set_switch_id - Set unique switch id based on pci dsn + * @pf: the PF to create a devlink port for + * @ppid: struct with switch id information + */ +static void +ice_devlink_set_switch_id(struct ice_pf *pf, struct netdev_phys_item_id *ppid) +{ + struct pci_dev *pdev = pf->pdev; + u64 id; + + id = pci_get_dsn(pdev); + + ppid->id_len = sizeof(id); + put_unaligned_be64(id, &ppid->id); +} + +/** + * ice_devlink_create_pf_port - Create a devlink port for this PF + * @pf: the PF to create a devlink port for + * + * Create and register a devlink_port for this PF. + * This function has to be called under devl_lock. + * + * Return: zero on success or an error code on failure. + */ +int ice_devlink_create_pf_port(struct ice_pf *pf) +{ + struct devlink_port_attrs attrs = {}; + struct devlink_port *devlink_port; + struct devlink *devlink; + struct ice_vsi *vsi; + struct device *dev; + int err; + + devlink = priv_to_devlink(pf); + + dev = ice_pf_to_dev(pf); + + devlink_port = &pf->devlink_port; + + vsi = ice_get_main_vsi(pf); + if (!vsi) + return -EIO; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = pf->hw.bus.func; + + /* As FW supports only port split options for whole device, + * set port split options only for first PF. + */ + if (pf->hw.pf_id == 0) + ice_devlink_set_port_split_options(pf, &attrs); + + ice_devlink_set_switch_id(pf, &attrs.switch_id); + + devlink_port_attrs_set(devlink_port, &attrs); + + err = devl_port_register_with_ops(devlink, devlink_port, vsi->idx, + &ice_devlink_port_ops); + if (err) { + dev_err(dev, "Failed to create devlink port for PF %d, error %d\n", + pf->hw.pf_id, err); + return err; + } + + return 0; +} + +/** + * ice_devlink_destroy_pf_port - Destroy the devlink_port for this PF + * @pf: the PF to cleanup + * + * Unregisters the devlink_port structure associated with this PF. + * This function has to be called under devl_lock. + */ +void ice_devlink_destroy_pf_port(struct ice_pf *pf) +{ + devl_port_unregister(&pf->devlink_port); +} + +/** + * ice_devlink_create_vf_port - Create a devlink port for this VF + * @vf: the VF to create a port for + * + * Create and register a devlink_port for this VF. + * + * Return: zero on success or an error code on failure. + */ +int ice_devlink_create_vf_port(struct ice_vf *vf) +{ + struct devlink_port_attrs attrs = {}; + struct devlink_port *devlink_port; + struct devlink *devlink; + struct ice_vsi *vsi; + struct device *dev; + struct ice_pf *pf; + int err; + + pf = vf->pf; + dev = ice_pf_to_dev(pf); + devlink_port = &vf->devlink_port; + + vsi = ice_get_vf_vsi(vf); + if (!vsi) + return -EINVAL; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; + attrs.pci_vf.pf = pf->hw.bus.func; + attrs.pci_vf.vf = vf->vf_id; + + ice_devlink_set_switch_id(pf, &attrs.switch_id); + + devlink_port_attrs_set(devlink_port, &attrs); + devlink = priv_to_devlink(pf); + + err = devlink_port_register(devlink, devlink_port, vsi->idx); + if (err) { + dev_err(dev, "Failed to create devlink port for VF %d, error %d\n", + vf->vf_id, err); + return err; + } + + return 0; +} + +/** + * ice_devlink_destroy_vf_port - Destroy the devlink_port for this VF + * @vf: the VF to cleanup + * + * Unregisters the devlink_port structure associated with this VF. + */ +void ice_devlink_destroy_vf_port(struct ice_vf *vf) +{ + devl_rate_leaf_destroy(&vf->devlink_port); + devlink_port_unregister(&vf->devlink_port); +} diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.h b/drivers/net/ethernet/intel/ice/devlink/devlink_port.h new file mode 100644 index 000000000000..9223bcdb6444 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2024, Intel Corporation. */ + +#ifndef _DEVLINK_PORT_H_ +#define _DEVLINK_PORT_H_ + +int ice_devlink_create_pf_port(struct ice_pf *pf); +void ice_devlink_destroy_pf_port(struct ice_pf *pf); +int ice_devlink_create_vf_port(struct ice_vf *vf); +void ice_devlink_destroy_vf_port(struct ice_vf *vf); + +#endif /* _DEVLINK_PORT_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 66eeaf345032..d06e7c82c433 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -7,7 +7,6 @@ #include "ice_lib.h" #include "ice_fltr.h" #include "ice_dcb_lib.h" -#include "devlink/devlink.h" #include "ice_vsi_vlan_ops.h" /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index eba1c18ea52e..1784382fe1ff 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -14,6 +14,7 @@ #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" #include "devlink/devlink.h" +#include "devlink/devlink_port.h" #include "ice_hwmon.h" /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the * ice tracepoint functions. This must be done exactly once across the diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 3b4ecbf0aa8e..b8009a301e39 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -4,6 +4,7 @@ #include "ice.h" #include "ice_eswitch.h" #include "devlink/devlink.h" +#include "devlink/devlink_port.h" #include "ice_sriov.h" #include "ice_tc_lib.h" #include "ice_dcb_lib.h" -- cgit v1.2.3 From 118c6bde78fe4173da1b86c51de26cdf0f9b9153 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 25 Mar 2024 22:34:33 +0100 Subject: ice: hold devlink lock for whole init/cleanup Simplify devlink lock code in driver by taking it for whole init/cleanup path. Instead of calling devlink functions that taking lock call the lockless versions. Suggested-by: Jiri Pirko Reviewed-by: Jesse Brandeburg Reviewed-by: Wojciech Drewek Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/devlink/devlink.c | 32 ++++++++++++------------ drivers/net/ethernet/intel/ice/ice_main.c | 7 +++--- 2 files changed, 19 insertions(+), 20 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index 926a14ac0d6f..be9244bb8bbc 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -1287,7 +1287,7 @@ void ice_devlink_register(struct ice_pf *pf) { struct devlink *devlink = priv_to_devlink(pf); - devlink_register(devlink); + devl_register(devlink); } /** @@ -1298,21 +1298,21 @@ void ice_devlink_register(struct ice_pf *pf) */ void ice_devlink_unregister(struct ice_pf *pf) { - devlink_unregister(priv_to_devlink(pf)); + devl_unregister(priv_to_devlink(pf)); } int ice_devlink_register_params(struct ice_pf *pf) { struct devlink *devlink = priv_to_devlink(pf); - return devlink_params_register(devlink, ice_devlink_params, - ARRAY_SIZE(ice_devlink_params)); + return devl_params_register(devlink, ice_devlink_params, + ARRAY_SIZE(ice_devlink_params)); } void ice_devlink_unregister_params(struct ice_pf *pf) { - devlink_params_unregister(priv_to_devlink(pf), ice_devlink_params, - ARRAY_SIZE(ice_devlink_params)); + devl_params_unregister(priv_to_devlink(pf), ice_devlink_params, + ARRAY_SIZE(ice_devlink_params)); } #define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024) @@ -1553,8 +1553,8 @@ void ice_devlink_init_regions(struct ice_pf *pf) u64 nvm_size, sram_size; nvm_size = pf->hw.flash.flash_size; - pf->nvm_region = devlink_region_create(devlink, &ice_nvm_region_ops, 1, - nvm_size); + pf->nvm_region = devl_region_create(devlink, &ice_nvm_region_ops, 1, + nvm_size); if (IS_ERR(pf->nvm_region)) { dev_err(dev, "failed to create NVM devlink region, err %ld\n", PTR_ERR(pf->nvm_region)); @@ -1562,17 +1562,17 @@ void ice_devlink_init_regions(struct ice_pf *pf) } sram_size = pf->hw.flash.sr_words * 2u; - pf->sram_region = devlink_region_create(devlink, &ice_sram_region_ops, - 1, sram_size); + pf->sram_region = devl_region_create(devlink, &ice_sram_region_ops, + 1, sram_size); if (IS_ERR(pf->sram_region)) { dev_err(dev, "failed to create shadow-ram devlink region, err %ld\n", PTR_ERR(pf->sram_region)); pf->sram_region = NULL; } - pf->devcaps_region = devlink_region_create(devlink, - &ice_devcaps_region_ops, 10, - ICE_AQ_MAX_BUF_LEN); + pf->devcaps_region = devl_region_create(devlink, + &ice_devcaps_region_ops, 10, + ICE_AQ_MAX_BUF_LEN); if (IS_ERR(pf->devcaps_region)) { dev_err(dev, "failed to create device-caps devlink region, err %ld\n", PTR_ERR(pf->devcaps_region)); @@ -1589,11 +1589,11 @@ void ice_devlink_init_regions(struct ice_pf *pf) void ice_devlink_destroy_regions(struct ice_pf *pf) { if (pf->nvm_region) - devlink_region_destroy(pf->nvm_region); + devl_region_destroy(pf->nvm_region); if (pf->sram_region) - devlink_region_destroy(pf->sram_region); + devl_region_destroy(pf->sram_region); if (pf->devcaps_region) - devlink_region_destroy(pf->devcaps_region); + devl_region_destroy(pf->devcaps_region); } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1784382fe1ff..b68a4ad864b1 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5186,21 +5186,20 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) devl_lock(priv_to_devlink(pf)); err = ice_load(pf); - devl_unlock(priv_to_devlink(pf)); if (err) goto err_load; err = ice_init_devlink(pf); if (err) goto err_init_devlink; + devl_unlock(priv_to_devlink(pf)); return 0; err_init_devlink: - devl_lock(priv_to_devlink(pf)); ice_unload(pf); - devl_unlock(priv_to_devlink(pf)); err_load: + devl_unlock(priv_to_devlink(pf)); ice_deinit(pf); err_init: ice_adapter_put(pdev); @@ -5298,9 +5297,9 @@ static void ice_remove(struct pci_dev *pdev) if (!ice_is_safe_mode(pf)) ice_remove_arfs(pf); + devl_lock(priv_to_devlink(pf)); ice_deinit_devlink(pf); - devl_lock(priv_to_devlink(pf)); ice_unload(pf); devl_unlock(priv_to_devlink(pf)); -- cgit v1.2.3 From d88cabfd9abcd01c7729e5383919357da732ada9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 Mar 2024 19:17:10 -0600 Subject: nfp: Avoid -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end is coming in GCC-14, and we are getting ready to enable it globally. There is currently an object (`tl`), at the beginning of multiple structures, that contains a flexible structure (`struct nfp_dump_tl`), for example: struct nfp_dumpspec_csr { struct nfp_dump_tl tl; ... __be32 register_width; /* in bits */ }; So, in order to avoid ending up with flexible-array members in the middle of multiple other structs, we use the `struct_group_tagged()` helper to separate the flexible array from the rest of the members in the flexible structure: struct nfp_dump_tl { struct_group_tagged(nfp_dump_tl_hdr, hdr, ... the rest of members ); char data[]; }; With the change described above, we now declare objects of the type of the tagged struct, in this case `struct nfp_dump_tl_hdr`, without embedding flexible arrays in the middle of another struct: struct nfp_dumpspec_csr { struct nfp_dump_tl_hdr tl; ... __be32 register_width; /* in bits */ }; Also, use `container_of()` whenever we need to retrieve a pointer to the flexible structure, through which we can access the flexible array if needed. So, with these changes, fix 33 of the following warnings: drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c:58:28: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c:64:28: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c:70:28: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c:78:28: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c:87:28: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c:92:28: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Link: https://github.com/KSPP/linux/issues/202 Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/ZgYWlkxdrrieDYIu@neat Signed-off-by: Jakub Kicinski --- .../net/ethernet/netronome/nfp/nfp_net_debugdump.c | 41 +++++++++++++--------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c index a614df095b08..2dd37557185e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c @@ -34,8 +34,11 @@ enum nfp_dumpspec_type { /* generic type plus length */ struct nfp_dump_tl { - __be32 type; - __be32 length; /* chunk length to follow, aligned to 8 bytes */ + /* New members must be added within the struct_group() macro below. */ + struct_group_tagged(nfp_dump_tl_hdr, hdr, + __be32 type; + __be32 length; /* chunk length to follow, aligned to 8 bytes */ + ); char data[]; }; @@ -55,19 +58,19 @@ struct nfp_dump_common_cpp { /* CSR dumpables */ struct nfp_dumpspec_csr { - struct nfp_dump_tl tl; + struct nfp_dump_tl_hdr tl; struct nfp_dump_common_cpp cpp; __be32 register_width; /* in bits */ }; struct nfp_dumpspec_rtsym { - struct nfp_dump_tl tl; + struct nfp_dump_tl_hdr tl; char rtsym[]; }; /* header for register dumpable */ struct nfp_dump_csr { - struct nfp_dump_tl tl; + struct nfp_dump_tl_hdr tl; struct nfp_dump_common_cpp cpp; __be32 register_width; /* in bits */ __be32 error; /* error code encountered while reading */ @@ -75,7 +78,7 @@ struct nfp_dump_csr { }; struct nfp_dump_rtsym { - struct nfp_dump_tl tl; + struct nfp_dump_tl_hdr tl; struct nfp_dump_common_cpp cpp; __be32 error; /* error code encountered while reading */ u8 padded_name_length; /* pad so data starts at 8 byte boundary */ @@ -84,12 +87,12 @@ struct nfp_dump_rtsym { }; struct nfp_dump_prolog { - struct nfp_dump_tl tl; + struct nfp_dump_tl_hdr tl; __be32 dump_level; }; struct nfp_dump_error { - struct nfp_dump_tl tl; + struct nfp_dump_tl_hdr tl; __be32 error; char padding[4]; char spec[]; @@ -449,6 +452,8 @@ static int nfp_dump_csr_range(struct nfp_pf *pf, struct nfp_dumpspec_csr *spec_csr, struct nfp_dump_state *dump) { + struct nfp_dump_tl *spec_csr_tl = + container_of(&spec_csr->tl, struct nfp_dump_tl, hdr); struct nfp_dump_csr *dump_header = dump->p; u32 reg_sz, header_size, total_size; u32 cpp_rd_addr, max_rd_addr; @@ -458,7 +463,7 @@ nfp_dump_csr_range(struct nfp_pf *pf, struct nfp_dumpspec_csr *spec_csr, int err; if (!nfp_csr_spec_valid(spec_csr)) - return nfp_dump_error_tlv(&spec_csr->tl, -EINVAL, dump); + return nfp_dump_error_tlv(spec_csr_tl, -EINVAL, dump); reg_sz = be32_to_cpu(spec_csr->register_width) / BITS_PER_BYTE; header_size = ALIGN8(sizeof(*dump_header)); @@ -466,7 +471,7 @@ nfp_dump_csr_range(struct nfp_pf *pf, struct nfp_dumpspec_csr *spec_csr, ALIGN8(be32_to_cpu(spec_csr->cpp.dump_length)); dest = dump->p + header_size; - err = nfp_add_tlv(be32_to_cpu(spec_csr->tl.type), total_size, dump); + err = nfp_add_tlv(be32_to_cpu(spec_csr_tl->type), total_size, dump); if (err) return err; @@ -552,6 +557,8 @@ nfp_dump_indirect_csr_range(struct nfp_pf *pf, struct nfp_dumpspec_csr *spec_csr, struct nfp_dump_state *dump) { + struct nfp_dump_tl *spec_csr_tl = + container_of(&spec_csr->tl, struct nfp_dump_tl, hdr); struct nfp_dump_csr *dump_header = dump->p; u32 reg_sz, header_size, total_size; u32 cpp_rd_addr, max_rd_addr; @@ -560,7 +567,7 @@ nfp_dump_indirect_csr_range(struct nfp_pf *pf, int err; if (!nfp_csr_spec_valid(spec_csr)) - return nfp_dump_error_tlv(&spec_csr->tl, -EINVAL, dump); + return nfp_dump_error_tlv(spec_csr_tl, -EINVAL, dump); reg_sz = be32_to_cpu(spec_csr->register_width) / BITS_PER_BYTE; header_size = ALIGN8(sizeof(*dump_header)); @@ -569,7 +576,7 @@ nfp_dump_indirect_csr_range(struct nfp_pf *pf, total_size = header_size + ALIGN8(reg_data_length); dest = dump->p + header_size; - err = nfp_add_tlv(be32_to_cpu(spec_csr->tl.type), total_size, dump); + err = nfp_add_tlv(be32_to_cpu(spec_csr_tl->type), total_size, dump); if (err) return err; @@ -597,6 +604,8 @@ static int nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec, struct nfp_dump_state *dump) { + struct nfp_dump_tl *spec_tl = + container_of(&spec->tl, struct nfp_dump_tl, hdr); struct nfp_dump_rtsym *dump_header = dump->p; struct nfp_dumpspec_cpp_isl_id cpp_params; struct nfp_rtsym_table *rtbl = pf->rtbl; @@ -607,14 +616,14 @@ nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec, void *dest; int err; - tl_len = be32_to_cpu(spec->tl.length); + tl_len = be32_to_cpu(spec_tl->length); key_len = strnlen(spec->rtsym, tl_len); if (key_len == tl_len) - return nfp_dump_error_tlv(&spec->tl, -EINVAL, dump); + return nfp_dump_error_tlv(spec_tl, -EINVAL, dump); sym = nfp_rtsym_lookup(rtbl, spec->rtsym); if (!sym) - return nfp_dump_error_tlv(&spec->tl, -ENOENT, dump); + return nfp_dump_error_tlv(spec_tl, -ENOENT, dump); sym_size = nfp_rtsym_size(sym); header_size = @@ -622,7 +631,7 @@ nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec, total_size = header_size + ALIGN8(sym_size); dest = dump->p + header_size; - err = nfp_add_tlv(be32_to_cpu(spec->tl.type), total_size, dump); + err = nfp_add_tlv(be32_to_cpu(spec_tl->type), total_size, dump); if (err) return err; -- cgit v1.2.3 From d33fe1714a44ff540629b149d8fab4ac6967585c Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Sun, 31 Mar 2024 13:34:41 +0800 Subject: net/dpaa2: Avoid explicit cpumask var allocation on stack For CONFIG_CPUMASK_OFFSTACK=y kernel, explicit allocation of cpumask variable on stack is not recommended since it can cause potential stack overflow. Instead, kernel code should always use *cpumask_var API(s) to allocate cpumask var in config-neutral way, leaving allocation strategy to CONFIG_CPUMASK_OFFSTACK. Use *cpumask_var API(s) to address it. Signed-off-by: Dawei Li Link: https://lore.kernel.org/r/20240331053441.1276826-3-dawei.li@shingroup.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 888509cf1f21..40e881829595 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -2896,11 +2896,14 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n, static int update_xps(struct dpaa2_eth_priv *priv) { struct net_device *net_dev = priv->net_dev; - struct cpumask xps_mask; - struct dpaa2_eth_fq *fq; int i, num_queues, netdev_queues; + struct dpaa2_eth_fq *fq; + cpumask_var_t xps_mask; int err = 0; + if (!alloc_cpumask_var(&xps_mask, GFP_KERNEL)) + return -ENOMEM; + num_queues = dpaa2_eth_queue_count(priv); netdev_queues = (net_dev->num_tc ? : 1) * num_queues; @@ -2910,16 +2913,17 @@ static int update_xps(struct dpaa2_eth_priv *priv) for (i = 0; i < netdev_queues; i++) { fq = &priv->fq[i % num_queues]; - cpumask_clear(&xps_mask); - cpumask_set_cpu(fq->target_cpu, &xps_mask); + cpumask_clear(xps_mask); + cpumask_set_cpu(fq->target_cpu, xps_mask); - err = netif_set_xps_queue(net_dev, &xps_mask, i); + err = netif_set_xps_queue(net_dev, xps_mask, i); if (err) { netdev_warn_once(net_dev, "Error setting XPS queue\n"); break; } } + free_cpumask_var(xps_mask); return err; } -- cgit v1.2.3 From 4c6ce450a8bb4bdf71959fd226414b079f0f0e02 Mon Sep 17 00:00:00 2001 From: Sai Krishna Date: Sun, 31 Mar 2024 18:18:19 +0530 Subject: octeontx2-pf: Reset MAC stats during probe Reset CGX/RPM MAC HW statistics at the time of driver probe() Signed-off-by: Hariprasad Kelam Signed-off-by: Sai Krishna Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 27 ++++++++++++++++++++ drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 1 + .../ethernet/marvell/octeontx2/af/lmac_common.h | 1 + drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 1 + drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 17 +++++++++++++ drivers/net/ethernet/marvell/octeontx2/af/rpm.h | 3 +++ .../net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 29 ++++++++++++++++++++++ .../ethernet/marvell/octeontx2/nic/otx2_common.h | 1 + .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 20 +++++++++++++++ 9 files changed, 100 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index b86f3224f0b7..27935c54b91b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -24,6 +24,8 @@ #define DRV_NAME "Marvell-CGX/RPM" #define DRV_STRING "Marvell CGX/RPM Driver" +#define CGX_RX_STAT_GLOBAL_INDEX 9 + static LIST_HEAD(cgx_list); /* Convert firmware speed encoding to user format(Mbps) */ @@ -701,6 +703,30 @@ u64 cgx_features_get(void *cgxd) return ((struct cgx *)cgxd)->hw_features; } +int cgx_stats_reset(void *cgxd, int lmac_id) +{ + struct cgx *cgx = cgxd; + int stat_id; + + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + + for (stat_id = 0 ; stat_id < CGX_RX_STATS_COUNT; stat_id++) { + if (stat_id >= CGX_RX_STAT_GLOBAL_INDEX) + /* pass lmac as 0 for CGX_CMR_RX_STAT9-12 */ + cgx_write(cgx, 0, + (CGXX_CMRX_RX_STAT0 + (stat_id * 8)), 0); + else + cgx_write(cgx, lmac_id, + (CGXX_CMRX_RX_STAT0 + (stat_id * 8)), 0); + } + + for (stat_id = 0 ; stat_id < CGX_TX_STATS_COUNT; stat_id++) + cgx_write(cgx, lmac_id, CGXX_CMRX_TX_STAT0 + (stat_id * 8), 0); + + return 0; +} + static int cgx_set_fec_stats_count(struct cgx_link_user_info *linfo) { if (!linfo->fec) @@ -1788,6 +1814,7 @@ static struct mac_ops cgx_mac_ops = { .pfc_config = cgx_lmac_pfc_config, .mac_get_pfc_frm_cfg = cgx_lmac_get_pfc_frm_cfg, .mac_reset = cgx_lmac_reset, + .mac_stats_reset = cgx_stats_reset, }; static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 6f7d1dee5830..dc9ace30554a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -141,6 +141,7 @@ int cgx_lmac_evh_register(struct cgx_event_cb *cb, void *cgxd, int lmac_id); int cgx_lmac_evh_unregister(void *cgxd, int lmac_id); int cgx_get_tx_stats(void *cgxd, int lmac_id, int idx, u64 *tx_stat); int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat); +int cgx_stats_reset(void *cgxd, int lmac_id); int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable); int cgx_lmac_tx_enable(void *cgxd, int lmac_id, bool enable); int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index 0b4cba03f2e8..9ffc6790c513 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -132,6 +132,7 @@ struct mac_ops { /* FEC stats */ int (*get_fec_stats)(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp); + int (*mac_stats_reset)(void *cgxd, int lmac_id); }; struct cgx { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 3d801a1a4f70..4a77f6fe2622 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -174,6 +174,7 @@ M(CGX_FEC_STATS, 0x217, cgx_fec_stats, msg_req, cgx_fec_stats_rsp) \ M(CGX_SET_LINK_MODE, 0x218, cgx_set_link_mode, cgx_set_link_mode_req,\ cgx_set_link_mode_rsp) \ M(CGX_GET_PHY_FEC_STATS, 0x219, cgx_get_phy_fec_stats, msg_req, msg_rsp) \ +M(CGX_STATS_RST, 0x21A, cgx_stats_rst, msg_req, msg_rsp) \ M(CGX_FEATURES_GET, 0x21B, cgx_features_get, msg_req, \ cgx_features_info_msg) \ M(RPM_STATS, 0x21C, rpm_stats, msg_req, rpm_stats_rsp) \ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index 76218f1cb459..1b34cf9c9703 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -38,6 +38,7 @@ static struct mac_ops rpm_mac_ops = { .pfc_config = rpm_lmac_pfc_config, .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, .mac_reset = rpm_lmac_reset, + .mac_stats_reset = rpm_stats_reset, }; static struct mac_ops rpm2_mac_ops = { @@ -70,6 +71,7 @@ static struct mac_ops rpm2_mac_ops = { .pfc_config = rpm_lmac_pfc_config, .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, .mac_reset = rpm_lmac_reset, + .mac_stats_reset = rpm_stats_reset, }; bool is_dev_rpm2(void *rpmd) @@ -443,6 +445,21 @@ int rpm_get_tx_stats(void *rpmd, int lmac_id, int idx, u64 *tx_stat) return 0; } +int rpm_stats_reset(void *rpmd, int lmac_id) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + cfg = rpm_read(rpm, 0, RPMX_MTI_STAT_STATN_CONTROL); + cfg |= RPMX_CMD_CLEAR_TX | RPMX_CMD_CLEAR_RX | BIT_ULL(lmac_id); + rpm_write(rpm, 0, RPMX_MTI_STAT_STATN_CONTROL, cfg); + + return 0; +} + u8 rpm_get_lmac_type(void *rpmd, int lmac_id) { rpm_t *rpm = rpmd; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index b79cfbc6f877..34b11deb0f3c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -85,6 +85,8 @@ #define RPMX_MTI_STAT_STATN_CONTROL 0x10018 #define RPMX_MTI_STAT_DATA_HI_CDC 0x10038 #define RPMX_RSFEC_RX_CAPTURE BIT_ULL(27) +#define RPMX_CMD_CLEAR_RX BIT_ULL(30) +#define RPMX_CMD_CLEAR_TX BIT_ULL(31) #define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_2 0x40050 #define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_3 0x40058 #define RPMX_MTI_FCFECX_VL0_CCW_LO 0x38618 @@ -134,4 +136,5 @@ int rpm2_get_nr_lmacs(void *rpmd); bool is_dev_rpm2(void *rpmd); int rpm_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp); int rpm_lmac_reset(void *rpmd, int lmac_id, u8 pf_req_flr); +int rpm_stats_reset(void *rpmd, int lmac_id); #endif /* RPM_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 72e060cf6b61..9d2836077d9f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -602,6 +602,35 @@ int rvu_mbox_handler_rpm_stats(struct rvu *rvu, struct msg_req *req, return rvu_lmac_get_stats(rvu, req, (void *)rsp); } +int rvu_mbox_handler_cgx_stats_rst(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + struct rvu_pfvf *parent_pf; + struct mac_ops *mac_ops; + u8 cgx_idx, lmac; + void *cgxd; + + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) + return LMAC_AF_ERR_PERM_DENIED; + + parent_pf = &rvu->pf[pf]; + /* To ensure reset cgx stats won't affect VF stats, + * check if it used by only PF interface. + * If not, return + */ + if (parent_pf->cgx_users > 1) { + dev_info(rvu->dev, "CGX busy, could not reset statistics\n"); + return 0; + } + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac); + cgxd = rvu_cgx_pdata(cgx_idx, rvu); + mac_ops = get_mac_ops(cgxd); + + return mac_ops->mac_stats_reset(cgxd, lmac); +} + int rvu_mbox_handler_cgx_fec_stats(struct rvu *rvu, struct msg_req *req, struct cgx_fec_stats_rsp *rsp) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 7e16a341ec58..c5de3ba33e2f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -961,6 +961,7 @@ void otx2_get_mac_from_af(struct net_device *netdev); void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx); int otx2_config_pause_frm(struct otx2_nic *pfvf); void otx2_setup_segmentation(struct otx2_nic *pfvf); +int otx2_reset_mac_stats(struct otx2_nic *pfvf); /* RVU block related APIs */ int otx2_attach_npa_nix(struct otx2_nic *pfvf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 5d8359b54098..9afeefea3b65 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1146,6 +1146,23 @@ static int otx2_cgx_config_linkevents(struct otx2_nic *pf, bool enable) return err; } +int otx2_reset_mac_stats(struct otx2_nic *pfvf) +{ + struct msg_req *req; + int err; + + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_cgx_stats_rst(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + err = otx2_sync_mbox_msg(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); + return err; +} + static int otx2_cgx_config_loopback(struct otx2_nic *pf, bool enable) { struct msg_req *msg; @@ -3034,6 +3051,9 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) netdev->min_mtu = OTX2_MIN_MTU; netdev->max_mtu = otx2_get_max_mtu(pf); + /* reset CGX/RPM MAC stats */ + otx2_reset_mac_stats(pf); + err = register_netdev(netdev); if (err) { dev_err(dev, "Failed to register netdevice\n"); -- cgit v1.2.3 From 4cbc70f6ec5e06f93e617d3f2d7f8dc47c0c9067 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 1 Apr 2024 23:45:26 +0000 Subject: gve: simplify setting decriptor count defaults Combine the gve_set_desc_cnt and gve_set_desc_cnt_dqo into one function which sets the counts after checking the queue format. Both the functions in the previous code and the new combined function never return an error so make the new function void and remove the goto on error. Also rename the new function to gve_set_default_desc_cnt to be clearer about its intention. Reviewed-by: Praveen Kaligineedi Reviewed-by: Willem de Bruijn Signed-off-by: Harshitha Ramamurthy Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_adminq.c | 44 ++++++++++------------------ 1 file changed, 15 insertions(+), 29 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index ae12ac38e18b..50affa11a59c 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -745,31 +745,19 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues) return gve_adminq_kick_and_wait(priv); } -static int gve_set_desc_cnt(struct gve_priv *priv, - struct gve_device_descriptor *descriptor) +static void gve_set_default_desc_cnt(struct gve_priv *priv, + const struct gve_device_descriptor *descriptor, + const struct gve_device_option_dqo_rda *dev_op_dqo_rda) { priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); - return 0; -} - -static int -gve_set_desc_cnt_dqo(struct gve_priv *priv, - const struct gve_device_descriptor *descriptor, - const struct gve_device_option_dqo_rda *dev_op_dqo_rda) -{ - priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); - priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); - - if (priv->queue_format == GVE_DQO_QPL_FORMAT) - return 0; - - priv->options_dqo_rda.tx_comp_ring_entries = - be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries); - priv->options_dqo_rda.rx_buff_ring_entries = - be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries); - return 0; + if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + priv->options_dqo_rda.tx_comp_ring_entries = + be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries); + priv->options_dqo_rda.rx_buff_ring_entries = + be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries); + } } static void gve_enable_supported_features(struct gve_priv *priv, @@ -888,15 +876,13 @@ int gve_adminq_describe_device(struct gve_priv *priv) dev_info(&priv->pdev->dev, "Driver is running with GQI QPL queue format.\n"); } - if (gve_is_gqi(priv)) { - err = gve_set_desc_cnt(priv, descriptor); - } else { - /* DQO supports LRO. */ + + /* set default descriptor counts */ + gve_set_default_desc_cnt(priv, descriptor, dev_op_dqo_rda); + + /* DQO supports LRO. */ + if (!gve_is_gqi(priv)) priv->dev->hw_features |= NETIF_F_LRO; - err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda); - } - if (err) - goto free_device_descriptor; priv->max_registered_pages = be64_to_cpu(descriptor->max_registered_pages); -- cgit v1.2.3 From 5dee3c702c20b62bc12b72edb495740bf06d97b3 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 1 Apr 2024 23:45:27 +0000 Subject: gve: make the completion and buffer ring size equal for DQO For the DQO queue format, the gve driver stores two ring sizes for both TX and RX - one for completion queue ring and one for data buffer ring. This is supposed to enable asymmetric sizes for these two rings but that is not supported. Make both fields reference the same single variable. This change renders reading supported TX completion ring size and RX buffer ring size for DQO from the device useless, so change those fields to reserved and remove related code. Reviewed-by: Praveen Kaligineedi Reviewed-by: Willem de Bruijn Signed-off-by: Harshitha Ramamurthy Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 6 ----- drivers/net/ethernet/google/gve/gve_adminq.c | 40 +++++++--------------------- drivers/net/ethernet/google/gve/gve_adminq.h | 3 +-- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 3 +-- drivers/net/ethernet/google/gve/gve_tx_dqo.c | 4 +-- 5 files changed, 13 insertions(+), 43 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 4814c96d5fe7..f009f7b3e68b 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -621,11 +621,6 @@ struct gve_qpl_config { unsigned long *qpl_id_map; /* bitmap of used qpl ids */ }; -struct gve_options_dqo_rda { - u16 tx_comp_ring_entries; /* number of tx_comp descriptors */ - u16 rx_buff_ring_entries; /* number of rx_buff descriptors */ -}; - struct gve_irq_db { __be32 index; } ____cacheline_aligned; @@ -792,7 +787,6 @@ struct gve_priv { u64 link_speed; bool up_before_suspend; /* True if dev was up before suspend */ - struct gve_options_dqo_rda options_dqo_rda; struct gve_ptype_lut *ptype_lut_dqo; /* Must be a power of two. */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 50affa11a59c..2ff9327ec056 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -565,6 +565,7 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) cpu_to_be64(tx->q_resources_bus), .tx_ring_addr = cpu_to_be64(tx->bus), .ntfy_id = cpu_to_be32(tx->ntfy_id), + .tx_ring_size = cpu_to_be16(priv->tx_desc_cnt), }; if (gve_is_gqi(priv)) { @@ -573,24 +574,17 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id); } else { - u16 comp_ring_size; u32 qpl_id = 0; - if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + if (priv->queue_format == GVE_DQO_RDA_FORMAT) qpl_id = GVE_RAW_ADDRESSING_QPL_ID; - comp_ring_size = - priv->options_dqo_rda.tx_comp_ring_entries; - } else { + else qpl_id = tx->dqo.qpl->id; - comp_ring_size = priv->tx_desc_cnt; - } cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_tx_queue.tx_ring_size = - cpu_to_be16(priv->tx_desc_cnt); cmd.create_tx_queue.tx_comp_ring_addr = cpu_to_be64(tx->complq_bus_dqo); cmd.create_tx_queue.tx_comp_ring_size = - cpu_to_be16(comp_ring_size); + cpu_to_be16(priv->tx_desc_cnt); } return gve_adminq_issue_cmd(priv, &cmd); @@ -621,6 +615,7 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) .queue_id = cpu_to_be32(queue_index), .ntfy_id = cpu_to_be32(rx->ntfy_id), .queue_resources_addr = cpu_to_be64(rx->q_resources_bus), + .rx_ring_size = cpu_to_be16(priv->rx_desc_cnt), }; if (gve_is_gqi(priv)) { @@ -635,20 +630,13 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size); } else { - u16 rx_buff_ring_entries; u32 qpl_id = 0; - if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + if (priv->queue_format == GVE_DQO_RDA_FORMAT) qpl_id = GVE_RAW_ADDRESSING_QPL_ID; - rx_buff_ring_entries = - priv->options_dqo_rda.rx_buff_ring_entries; - } else { + else qpl_id = rx->dqo.qpl->id; - rx_buff_ring_entries = priv->rx_desc_cnt; - } cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_rx_queue.rx_ring_size = - cpu_to_be16(priv->rx_desc_cnt); cmd.create_rx_queue.rx_desc_ring_addr = cpu_to_be64(rx->dqo.complq.bus); cmd.create_rx_queue.rx_data_ring_addr = @@ -656,7 +644,7 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(priv->data_buffer_size_dqo); cmd.create_rx_queue.rx_buff_ring_size = - cpu_to_be16(rx_buff_ring_entries); + cpu_to_be16(priv->rx_desc_cnt); cmd.create_rx_queue.enable_rsc = !!(priv->dev->features & NETIF_F_LRO); if (priv->header_split_enabled) @@ -746,18 +734,10 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues) } static void gve_set_default_desc_cnt(struct gve_priv *priv, - const struct gve_device_descriptor *descriptor, - const struct gve_device_option_dqo_rda *dev_op_dqo_rda) + const struct gve_device_descriptor *descriptor) { priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); - - if (priv->queue_format == GVE_DQO_RDA_FORMAT) { - priv->options_dqo_rda.tx_comp_ring_entries = - be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries); - priv->options_dqo_rda.rx_buff_ring_entries = - be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries); - } } static void gve_enable_supported_features(struct gve_priv *priv, @@ -878,7 +858,7 @@ int gve_adminq_describe_device(struct gve_priv *priv) } /* set default descriptor counts */ - gve_set_default_desc_cnt(priv, descriptor, dev_op_dqo_rda); + gve_set_default_desc_cnt(priv, descriptor); /* DQO supports LRO. */ if (!gve_is_gqi(priv)) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 5ac972e45ff8..3ff2028a7472 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -103,8 +103,7 @@ static_assert(sizeof(struct gve_device_option_gqi_qpl) == 4); struct gve_device_option_dqo_rda { __be32 supported_features_mask; - __be16 tx_comp_ring_entries; - __be16 rx_buff_ring_entries; + __be32 reserved; }; static_assert(sizeof(struct gve_device_option_dqo_rda) == 8); diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 8e8071308aeb..7c2ab1edfcb2 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -305,8 +305,7 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, size_t size; int i; - const u32 buffer_queue_slots = cfg->raw_addressing ? - priv->options_dqo_rda.rx_buff_ring_entries : cfg->ring_size; + const u32 buffer_queue_slots = cfg->ring_size; const u32 completion_queue_slots = cfg->ring_size; netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n"); diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index bc34b6cd3a3e..70f29b90a982 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -295,9 +295,7 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, /* Queue sizes must be a power of 2 */ tx->mask = cfg->ring_size - 1; - tx->dqo.complq_mask = priv->queue_format == GVE_DQO_RDA_FORMAT ? - priv->options_dqo_rda.tx_comp_ring_entries - 1 : - tx->mask; + tx->dqo.complq_mask = tx->mask; /* The max number of pending packets determines the maximum number of * descriptors which maybe written to the completion queue. -- cgit v1.2.3 From b94d3703c1a6a57323256a687f4cbdabfffbe408 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 1 Apr 2024 23:45:28 +0000 Subject: gve: set page count for RX QPL for GQI and DQO queue formats Fulfill the requirement that for GQI, the number of pages per RX QPL is equal to the ring size. Set this value to be equal to ring size. Because of this change, the rx_data_slot_cnt and rx_pages_per_qpl fields stored in the priv structure are not needed, so remove their usage. And for DQO, the number of pages per RX QPL is more than ring size to account for out-of-order completions. So set it to two times of rx ring size. Reviewed-by: Praveen Kaligineedi Reviewed-by: Willem de Bruijn Signed-off-by: Harshitha Ramamurthy Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 11 ++++++++--- drivers/net/ethernet/google/gve/gve_adminq.c | 11 ----------- drivers/net/ethernet/google/gve/gve_main.c | 14 +++++++++----- drivers/net/ethernet/google/gve/gve_rx.c | 2 +- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 4 ++-- 5 files changed, 20 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index f009f7b3e68b..693d4b7d818b 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -63,7 +63,6 @@ #define GVE_DEFAULT_HEADER_BUFFER_SIZE 128 #define DQO_QPL_DEFAULT_TX_PAGES 512 -#define DQO_QPL_DEFAULT_RX_PAGES 2048 /* Maximum TSO size supported on DQO */ #define GVE_DQO_TX_MAX 0x3FFFF @@ -714,8 +713,6 @@ struct gve_priv { u16 tx_desc_cnt; /* num desc per ring */ u16 rx_desc_cnt; /* num desc per ring */ u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */ - u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */ - u16 rx_data_slot_cnt; /* rx buffer length */ u64 max_registered_pages; u64 num_registered_pages; /* num pages registered with NIC */ struct bpf_prog *xdp_prog; /* XDP BPF program */ @@ -1038,6 +1035,14 @@ static inline u32 gve_rx_start_qpl_id(const struct gve_queue_config *tx_cfg) return gve_get_rx_qpl_id(tx_cfg, 0); } +static inline u32 gve_get_rx_pages_per_qpl_dqo(u32 rx_desc_cnt) +{ + /* For DQO, page count should be more than ring size for + * out-of-order completions. Set it to two times of ring size. + */ + return 2 * rx_desc_cnt; +} + /* Returns a pointer to the next available tx qpl in the list of qpls */ static inline struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_tx_alloc_rings_cfg *cfg, diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 2ff9327ec056..faeff20cd370 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -764,12 +764,8 @@ static void gve_enable_supported_features(struct gve_priv *priv, if (dev_op_dqo_qpl) { priv->tx_pages_per_qpl = be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl); - priv->rx_pages_per_qpl = - be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl); if (priv->tx_pages_per_qpl == 0) priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES; - if (priv->rx_pages_per_qpl == 0) - priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES; } if (dev_op_buffer_sizes && @@ -878,13 +874,6 @@ int gve_adminq_describe_device(struct gve_priv *priv) mac = descriptor->mac; dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac); priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl); - priv->rx_data_slot_cnt = be16_to_cpu(descriptor->rx_pages_per_qpl); - - if (gve_is_gqi(priv) && priv->rx_data_slot_cnt < priv->rx_desc_cnt) { - dev_err(&priv->pdev->dev, "rx_data_slot_cnt cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n", - priv->rx_data_slot_cnt); - priv->rx_desc_cnt = priv->rx_data_slot_cnt; - } priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues); gve_enable_supported_features(priv, supported_features_mask, diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 166bd827a6d7..470447c0490f 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1103,13 +1103,13 @@ free_qpls: return err; } -static int gve_alloc_qpls(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *cfg) +static int gve_alloc_qpls(struct gve_priv *priv, struct gve_qpls_alloc_cfg *cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues; int rx_start_id, tx_num_qpls, rx_num_qpls; struct gve_queue_page_list *qpls; - int page_count; + u32 page_count; int err; if (cfg->raw_addressing) @@ -1141,8 +1141,12 @@ static int gve_alloc_qpls(struct gve_priv *priv, /* For GQI_QPL number of pages allocated have 1:1 relationship with * number of descriptors. For DQO, number of pages required are * more than descriptors (because of out of order completions). + * Set it to twice the number of descriptors. */ - page_count = cfg->is_gqi ? priv->rx_data_slot_cnt : priv->rx_pages_per_qpl; + if (cfg->is_gqi) + page_count = rx_alloc_cfg->ring_size; + else + page_count = gve_get_rx_pages_per_qpl_dqo(rx_alloc_cfg->ring_size); rx_num_qpls = gve_num_rx_qpls(cfg->rx_cfg, gve_is_qpl(priv)); err = gve_alloc_n_qpls(priv, qpls, page_count, rx_start_id, rx_num_qpls); if (err) @@ -1363,7 +1367,7 @@ static int gve_queues_mem_alloc(struct gve_priv *priv, { int err; - err = gve_alloc_qpls(priv, qpls_alloc_cfg); + err = gve_alloc_qpls(priv, qpls_alloc_cfg, rx_alloc_cfg); if (err) { netif_err(priv, drv, priv->dev, "Failed to alloc QPLs\n"); return err; diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 20f5a9e7fae9..cd727e55ae0f 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -240,7 +240,7 @@ static int gve_rx_alloc_ring_gqi(struct gve_priv *priv, int idx) { struct device *hdev = &priv->pdev->dev; - u32 slots = priv->rx_data_slot_cnt; + u32 slots = cfg->ring_size; int filled_pages; size_t bytes; int err; diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 7c2ab1edfcb2..15108407b54f 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -178,7 +178,7 @@ static int gve_alloc_page_dqo(struct gve_rx_ring *rx, return err; } else { idx = rx->dqo.next_qpl_page_idx; - if (idx >= priv->rx_pages_per_qpl) { + if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { net_err_ratelimited("%s: Out of QPL pages\n", priv->dev->name); return -ENOMEM; @@ -321,7 +321,7 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, rx->dqo.num_buf_states = cfg->raw_addressing ? min_t(s16, S16_MAX, buffer_queue_slots * 4) : - priv->rx_pages_per_qpl; + gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, sizeof(rx->dqo.buf_states[0]), GFP_KERNEL); -- cgit v1.2.3 From ed4fb326947dc486f97c66168f6ac50f5d1efd19 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 1 Apr 2024 23:45:29 +0000 Subject: gve: add support to read ring size ranges from the device Add support to read ring size change capability and the min and max descriptor counts from the device and store it in the driver. Also accommodate a special case where the device does not provide minimum ring size depending on the version of the device. In that case, rely on default values for the minimums. Reviewed-by: Praveen Kaligineedi Reviewed-by: Willem de Bruijn Signed-off-by: Harshitha Ramamurthy Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 10 ++++ drivers/net/ethernet/google/gve/gve_adminq.c | 71 ++++++++++++++++++++++++---- drivers/net/ethernet/google/gve/gve_adminq.h | 45 +++++++++++------- 3 files changed, 102 insertions(+), 24 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 693d4b7d818b..669cacdae4f4 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -50,6 +50,10 @@ /* PTYPEs are always 10 bits. */ #define GVE_NUM_PTYPES 1024 +/* Default minimum ring size */ +#define GVE_DEFAULT_MIN_TX_RING_SIZE 256 +#define GVE_DEFAULT_MIN_RX_RING_SIZE 512 + #define GVE_DEFAULT_RX_BUFFER_SIZE 2048 #define GVE_MAX_RX_BUFFER_SIZE 4096 @@ -712,6 +716,12 @@ struct gve_priv { u16 num_event_counters; u16 tx_desc_cnt; /* num desc per ring */ u16 rx_desc_cnt; /* num desc per ring */ + u16 max_tx_desc_cnt; + u16 max_rx_desc_cnt; + u16 min_tx_desc_cnt; + u16 min_rx_desc_cnt; + bool modify_ring_size_enabled; + bool default_min_ring_size; u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */ u64 max_registered_pages; u64 num_registered_pages; /* num pages registered with NIC */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index faeff20cd370..b2b619aa2310 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -32,6 +32,8 @@ struct gve_device_option *gve_get_next_option(struct gve_device_descriptor *desc return option_end > descriptor_end ? NULL : (struct gve_device_option *)option_end; } +#define GVE_DEVICE_OPTION_NO_MIN_RING_SIZE 8 + static void gve_parse_device_option(struct gve_priv *priv, struct gve_device_descriptor *device_descriptor, @@ -41,7 +43,8 @@ void gve_parse_device_option(struct gve_priv *priv, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, - struct gve_device_option_buffer_sizes **dev_op_buffer_sizes) + struct gve_device_option_buffer_sizes **dev_op_buffer_sizes, + struct gve_device_option_modify_ring **dev_op_modify_ring) { u32 req_feat_mask = be32_to_cpu(option->required_features_mask); u16 option_length = be16_to_cpu(option->option_length); @@ -165,6 +168,27 @@ void gve_parse_device_option(struct gve_priv *priv, "Buffer Sizes"); *dev_op_buffer_sizes = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_MODIFY_RING: + if (option_length < GVE_DEVICE_OPTION_NO_MIN_RING_SIZE || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Modify Ring", (int)sizeof(**dev_op_modify_ring), + GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_modify_ring)) { + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, "Modify Ring"); + } + + *dev_op_modify_ring = (void *)(option + 1); + + /* device has not provided min ring size */ + if (option_length == GVE_DEVICE_OPTION_NO_MIN_RING_SIZE) + priv->default_min_ring_size = true; + break; default: /* If we don't recognize the option just continue * without doing anything. @@ -183,7 +207,8 @@ gve_process_device_options(struct gve_priv *priv, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, - struct gve_device_option_buffer_sizes **dev_op_buffer_sizes) + struct gve_device_option_buffer_sizes **dev_op_buffer_sizes, + struct gve_device_option_modify_ring **dev_op_modify_ring) { const int num_options = be16_to_cpu(descriptor->num_device_options); struct gve_device_option *dev_opt; @@ -204,7 +229,8 @@ gve_process_device_options(struct gve_priv *priv, gve_parse_device_option(priv, descriptor, dev_opt, dev_op_gqi_rda, dev_op_gqi_qpl, dev_op_dqo_rda, dev_op_jumbo_frames, - dev_op_dqo_qpl, dev_op_buffer_sizes); + dev_op_dqo_qpl, dev_op_buffer_sizes, + dev_op_modify_ring); dev_opt = next_opt; } @@ -738,6 +764,12 @@ static void gve_set_default_desc_cnt(struct gve_priv *priv, { priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); + + /* set default ranges */ + priv->max_tx_desc_cnt = priv->tx_desc_cnt; + priv->max_rx_desc_cnt = priv->rx_desc_cnt; + priv->min_tx_desc_cnt = priv->tx_desc_cnt; + priv->min_rx_desc_cnt = priv->rx_desc_cnt; } static void gve_enable_supported_features(struct gve_priv *priv, @@ -747,7 +779,9 @@ static void gve_enable_supported_features(struct gve_priv *priv, const struct gve_device_option_dqo_qpl *dev_op_dqo_qpl, const struct gve_device_option_buffer_sizes - *dev_op_buffer_sizes) + *dev_op_buffer_sizes, + const struct gve_device_option_modify_ring + *dev_op_modify_ring) { /* Before control reaches this point, the page-size-capped max MTU from * the gve_device_descriptor field has already been stored in @@ -778,12 +812,33 @@ static void gve_enable_supported_features(struct gve_priv *priv, "BUFFER SIZES device option enabled with max_rx_buffer_size of %u, header_buf_size of %u.\n", priv->max_rx_buffer_size, priv->header_buf_size); } + + /* Read and store ring size ranges given by device */ + if (dev_op_modify_ring && + (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) { + priv->modify_ring_size_enabled = true; + + /* max ring size for DQO QPL should not be overwritten because of device limit */ + if (priv->queue_format != GVE_DQO_QPL_FORMAT) { + priv->max_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_rx_ring_size); + priv->max_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_tx_ring_size); + } + if (priv->default_min_ring_size) { + /* If device hasn't provided minimums, use default minimums */ + priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE; + priv->min_rx_desc_cnt = GVE_DEFAULT_MIN_RX_RING_SIZE; + } else { + priv->min_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_rx_ring_size); + priv->min_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_tx_ring_size); + } + } } int gve_adminq_describe_device(struct gve_priv *priv) { struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL; struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; + struct gve_device_option_modify_ring *dev_op_modify_ring = NULL; struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL; struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL; struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL; @@ -815,9 +870,9 @@ int gve_adminq_describe_device(struct gve_priv *priv) err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda, &dev_op_gqi_qpl, &dev_op_dqo_rda, - &dev_op_jumbo_frames, - &dev_op_dqo_qpl, - &dev_op_buffer_sizes); + &dev_op_jumbo_frames, &dev_op_dqo_qpl, + &dev_op_buffer_sizes, + &dev_op_modify_ring); if (err) goto free_device_descriptor; @@ -878,7 +933,7 @@ int gve_adminq_describe_device(struct gve_priv *priv) gve_enable_supported_features(priv, supported_features_mask, dev_op_jumbo_frames, dev_op_dqo_qpl, - dev_op_buffer_sizes); + dev_op_buffer_sizes, dev_op_modify_ring); free_device_descriptor: dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 3ff2028a7472..beedf2353847 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -133,6 +133,16 @@ struct gve_device_option_buffer_sizes { static_assert(sizeof(struct gve_device_option_buffer_sizes) == 8); +struct gve_device_option_modify_ring { + __be32 supported_featured_mask; + __be16 max_rx_ring_size; + __be16 max_tx_ring_size; + __be16 min_rx_ring_size; + __be16 min_tx_ring_size; +}; + +static_assert(sizeof(struct gve_device_option_modify_ring) == 12); + /* Terminology: * * RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA @@ -142,28 +152,31 @@ static_assert(sizeof(struct gve_device_option_buffer_sizes) == 8); * the device for read/write and data is copied from/to SKBs. */ enum gve_dev_opt_id { - GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1, - GVE_DEV_OPT_ID_GQI_RDA = 0x2, - GVE_DEV_OPT_ID_GQI_QPL = 0x3, - GVE_DEV_OPT_ID_DQO_RDA = 0x4, - GVE_DEV_OPT_ID_DQO_QPL = 0x7, - GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, - GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa, + GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1, + GVE_DEV_OPT_ID_GQI_RDA = 0x2, + GVE_DEV_OPT_ID_GQI_QPL = 0x3, + GVE_DEV_OPT_ID_DQO_RDA = 0x4, + GVE_DEV_OPT_ID_MODIFY_RING = 0x6, + GVE_DEV_OPT_ID_DQO_QPL = 0x7, + GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, + GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa, }; enum gve_dev_opt_req_feat_mask { - GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0, }; enum gve_sup_feature_mask { - GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2, - GVE_SUP_BUFFER_SIZES_MASK = 1 << 4, + GVE_SUP_MODIFY_RING_MASK = 1 << 0, + GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2, + GVE_SUP_BUFFER_SIZES_MASK = 1 << 4, }; #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0 -- cgit v1.2.3 From 834f9458f2fdb48dfb95976934c1594d086a956d Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 1 Apr 2024 23:45:30 +0000 Subject: gve: add support to change ring size via ethtool Allow the user to change ring size via ethtool if supported by the device. The driver relies on the ring size ranges queried from device to validate ring sizes requested by the user. Reviewed-by: Praveen Kaligineedi Reviewed-by: Willem de Bruijn Signed-off-by: Harshitha Ramamurthy Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 8 +++ drivers/net/ethernet/google/gve/gve_ethtool.c | 85 +++++++++++++++++++++++++-- drivers/net/ethernet/google/gve/gve_main.c | 16 ++--- 3 files changed, 95 insertions(+), 14 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 669cacdae4f4..e97633b68e25 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -1159,6 +1159,14 @@ int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split); /* Reset */ void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); +void gve_get_curr_alloc_cfgs(struct gve_priv *priv, + struct gve_qpls_alloc_cfg *qpls_alloc_cfg, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); +int gve_adjust_config(struct gve_priv *priv, + struct gve_qpls_alloc_cfg *qpls_alloc_cfg, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); int gve_adjust_queues(struct gve_priv *priv, struct gve_queue_config new_rx_config, struct gve_queue_config new_tx_config); diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index dbe05402d40b..815dead31673 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -490,8 +490,8 @@ static void gve_get_ringparam(struct net_device *netdev, { struct gve_priv *priv = netdev_priv(netdev); - cmd->rx_max_pending = priv->rx_desc_cnt; - cmd->tx_max_pending = priv->tx_desc_cnt; + cmd->rx_max_pending = priv->max_rx_desc_cnt; + cmd->tx_max_pending = priv->max_tx_desc_cnt; cmd->rx_pending = priv->rx_desc_cnt; cmd->tx_pending = priv->tx_desc_cnt; @@ -503,20 +503,93 @@ static void gve_get_ringparam(struct net_device *netdev, kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED; } +static int gve_adjust_ring_sizes(struct gve_priv *priv, + u16 new_tx_desc_cnt, + u16 new_rx_desc_cnt) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; + struct gve_qpl_config new_qpl_cfg; + int err; + + /* get current queue configuration */ + gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, + &tx_alloc_cfg, &rx_alloc_cfg); + + /* copy over the new ring_size from ethtool */ + tx_alloc_cfg.ring_size = new_tx_desc_cnt; + rx_alloc_cfg.ring_size = new_rx_desc_cnt; + + /* qpl_cfg is not read-only, it contains a map that gets updated as + * rings are allocated, which is why we cannot use the yet unreleased + * one in priv. + */ + qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg; + tx_alloc_cfg.qpl_cfg = &new_qpl_cfg; + rx_alloc_cfg.qpl_cfg = &new_qpl_cfg; + + if (netif_running(priv->dev)) { + err = gve_adjust_config(priv, &qpls_alloc_cfg, + &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + } + + /* Set new ring_size for the next up */ + priv->tx_desc_cnt = new_tx_desc_cnt; + priv->rx_desc_cnt = new_rx_desc_cnt; + + return 0; +} + +static int gve_validate_req_ring_size(struct gve_priv *priv, u16 new_tx_desc_cnt, + u16 new_rx_desc_cnt) +{ + /* check for valid range */ + if (new_tx_desc_cnt < priv->min_tx_desc_cnt || + new_tx_desc_cnt > priv->max_tx_desc_cnt || + new_rx_desc_cnt < priv->min_rx_desc_cnt || + new_rx_desc_cnt > priv->max_rx_desc_cnt) { + dev_err(&priv->pdev->dev, "Requested descriptor count out of range\n"); + return -EINVAL; + } + + if (!is_power_of_2(new_tx_desc_cnt) || !is_power_of_2(new_rx_desc_cnt)) { + dev_err(&priv->pdev->dev, "Requested descriptor count has to be a power of 2\n"); + return -EINVAL; + } + return 0; +} + static int gve_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *cmd, struct kernel_ethtool_ringparam *kernel_cmd, struct netlink_ext_ack *extack) { struct gve_priv *priv = netdev_priv(netdev); + u16 new_tx_cnt, new_rx_cnt; + int err; + + err = gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split); + if (err) + return err; - if (priv->tx_desc_cnt != cmd->tx_pending || - priv->rx_desc_cnt != cmd->rx_pending) { - dev_info(&priv->pdev->dev, "Modify ring size is not supported.\n"); + if (cmd->tx_pending == priv->tx_desc_cnt && cmd->rx_pending == priv->rx_desc_cnt) + return 0; + + if (!priv->modify_ring_size_enabled) { + dev_err(&priv->pdev->dev, "Modify ring size is not supported.\n"); return -EOPNOTSUPP; } - return gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split); + new_tx_cnt = cmd->tx_pending; + new_rx_cnt = cmd->rx_pending; + + if (gve_validate_req_ring_size(priv, new_tx_cnt, new_rx_cnt)) + return -EINVAL; + + return gve_adjust_ring_sizes(priv, new_tx_cnt, new_rx_cnt); } static int gve_user_reset(struct net_device *netdev, u32 *flags) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 470447c0490f..a515e5af843c 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1314,10 +1314,10 @@ static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, cfg->rx = priv->rx; } -static void gve_get_curr_alloc_cfgs(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +void gve_get_curr_alloc_cfgs(struct gve_priv *priv, + struct gve_qpls_alloc_cfg *qpls_alloc_cfg, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { gve_qpls_get_curr_alloc_cfg(priv, qpls_alloc_cfg); gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); @@ -1867,10 +1867,10 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } -static int gve_adjust_config(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +int gve_adjust_config(struct gve_priv *priv, + struct gve_qpls_alloc_cfg *qpls_alloc_cfg, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int err; -- cgit v1.2.3 From 04af1d64370180f362b48154d42b299be513884f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 2 Apr 2024 20:55:50 +0200 Subject: net: fman: Remove some unused fields in some structure In "struct muram_info", the 'size' field is unused. In "struct memac_cfg", the 'fixed_link' field is unused. Remove them. Found with cppcheck, unusedStructMember. Signed-off-by: Christophe JAILLET Reviewed-by: Sean Anderson Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/425222d4f6c584e8316ccb7b2ef415a85c96e455.1712084103.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fman/fman_memac.c | 1 - drivers/net/ethernet/freescale/fman/fman_muram.c | 1 - 2 files changed, 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 758535adc9ff..92b8f4ab26f1 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -267,7 +267,6 @@ struct memac_cfg { bool reset_on_init; bool pause_ignore; bool promiscuous_mode_enable; - struct fixed_phy_status *fixed_link; u16 max_frame_length; u16 pause_quanta; u32 tx_ipg_length; diff --git a/drivers/net/ethernet/freescale/fman/fman_muram.c b/drivers/net/ethernet/freescale/fman/fman_muram.c index f557d68e5b76..1ed245a2ee01 100644 --- a/drivers/net/ethernet/freescale/fman/fman_muram.c +++ b/drivers/net/ethernet/freescale/fman/fman_muram.c @@ -12,7 +12,6 @@ struct muram_info { struct gen_pool *pool; void __iomem *vbase; - size_t size; phys_addr_t pbase; }; -- cgit v1.2.3 From e2d515eb8fcdde06da49be981e13f0c7be4f0b16 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 2 Apr 2024 16:30:33 +0300 Subject: net/mlx5e: Use ethtool_sprintf/puts() to fill priv flags strings Use ethtool_sprintf/puts() helper functions which handle the common pattern of printing a string into the ethtool strings interface and incrementing the string pointer by ETH_GSTRING_LEN. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240402133043.56322-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index cc51ce16df14..b58367909e2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -269,8 +269,7 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) switch (stringset) { case ETH_SS_PRIV_FLAGS: for (i = 0; i < MLX5E_NUM_PFLAGS; i++) - strcpy(data + i * ETH_GSTRING_LEN, - mlx5e_priv_flags[i].name); + ethtool_puts(&data, mlx5e_priv_flags[i].name); break; case ETH_SS_TEST: -- cgit v1.2.3 From 9ac9299d41f6b67c9b99f1522824572e50a292f8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 2 Apr 2024 16:30:34 +0300 Subject: net/mlx5e: Use ethtool_sprintf/puts() to fill selftests strings Use ethtool_sprintf/puts() helper functions which handle the common pattern of printing a string into the ethtool strings interface and incrementing the string pointer by ETH_GSTRING_LEN. The int return value in mlx5e_self_test_fill_strings() is not removed as it is still used to return the number of selftests. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240402133043.56322-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 08a75654f5f1..5bf8318cc48b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -359,7 +359,7 @@ int mlx5e_self_test_fill_strings(struct mlx5e_priv *priv, u8 *data) if (st.cond_func && st.cond_func(priv)) continue; if (data) - strcpy(data + count * ETH_GSTRING_LEN, st.name); + ethtool_puts(&data, st.name); count++; } return count; -- cgit v1.2.3 From 89b34322d29381c916933f4728be7cfd08271442 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 2 Apr 2024 16:30:35 +0300 Subject: net/mlx5e: Use ethtool_sprintf/puts() to fill stats strings Use ethtool_sprintf/puts() helper functions which handle the common pattern of printing a string into the ethtool strings interface and incrementing the string pointer by ETH_GSTRING_LEN. Change the fill_strings callback to accept a **data pointer, and remove the index and return value. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240402133043.56322-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ipsec_stats.c | 11 +- .../ethernet/mellanox/mlx5/core/en_accel/ktls.h | 7 +- .../mellanox/mlx5/core/en_accel/ktls_stats.c | 11 +- .../mellanox/mlx5/core/en_accel/macsec_stats.c | 9 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 10 +- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 179 ++++++++------------- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 4 +- 7 files changed, 85 insertions(+), 146 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c index dd36b04e30a0..ecf87383ecb8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -78,13 +78,10 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_hw) unsigned int i; if (!priv->ipsec) - return idx; + return; for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - mlx5e_ipsec_hw_stats_desc[i].format); - - return idx; + ethtool_puts(data, mlx5e_ipsec_hw_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_hw) @@ -115,9 +112,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_sw) if (priv->ipsec) for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - mlx5e_ipsec_sw_stats_desc[i].format); - return idx; + ethtool_puts(data, mlx5e_ipsec_sw_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index adc6d8ea0960..9b96bee194ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -95,7 +95,7 @@ int mlx5e_ktls_init(struct mlx5e_priv *priv); void mlx5e_ktls_cleanup(struct mlx5e_priv *priv); int mlx5e_ktls_get_count(struct mlx5e_priv *priv); -int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data); +void mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t **data); int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data); #else @@ -144,10 +144,7 @@ static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) static inline int mlx5e_ktls_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { } static inline int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { return 0; } -static inline int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data) -{ - return 0; -} +static inline void mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t **data) { } static inline int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c index 7c1c0eb16787..06363f2653e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c @@ -58,20 +58,17 @@ int mlx5e_ktls_get_count(struct mlx5e_priv *priv) return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc); } -int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data) +void mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t **data) { - unsigned int i, n, idx = 0; + unsigned int i, n; if (!priv->tls) - return 0; + return; n = mlx5e_ktls_get_count(priv); for (i = 0; i < n; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - mlx5e_ktls_sw_stats_desc[i].format); - - return n; + ethtool_puts(data, mlx5e_ktls_sw_stats_desc[i].format); } int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c index 4559ee16a11a..a79e2786be56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c @@ -38,16 +38,13 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(macsec_hw) unsigned int i; if (!priv->macsec) - return idx; + return; if (!mlx5e_is_macsec_device(priv->mdev)) - return idx; + return; for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - mlx5e_macsec_hw_stats_desc[i].format); - - return idx; + ethtool_puts(data, mlx5e_macsec_hw_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(macsec_hw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 05527418fa64..e41fbf377ae8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -135,9 +135,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(sw_rep) int i; for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - sw_rep_stats_desc[i].format); - return idx; + ethtool_puts(data, sw_rep_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw_rep) @@ -176,11 +174,9 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport_rep) int i; for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_rep_stats_desc[i].format); + ethtool_puts(data, vport_rep_stats_desc[i].format); for (i = 0; i < NUM_VPORT_REP_LOOPBACK_COUNTERS(priv->mdev); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - vport_rep_loopback_stats_desc[i].format); - return idx; + ethtool_puts(data, vport_rep_loopback_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport_rep) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index f3d0898bdbc6..6be0bcc9a3f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -97,10 +97,10 @@ void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data) { mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; const unsigned int num_stats_grps = stats_grps_num(priv); - int i, idx = 0; + int i; for (i = 0; i < num_stats_grps; i++) - idx = stats_grps[i]->fill_strings(priv, data, idx); + stats_grps[i]->fill_strings(priv, &data); } /* Concrete NIC Stats */ @@ -257,8 +257,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(sw) int i; for (i = 0; i < NUM_SW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format); - return idx; + ethtool_puts(data, sw_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw) @@ -591,14 +590,10 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qcnt) int i; for (i = 0; i < NUM_Q_COUNTERS && q_counter_any(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - q_stats_desc[i].format); + ethtool_puts(data, q_stats_desc[i].format); for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - drop_rq_stats_desc[i].format); - - return idx; + ethtool_puts(data, drop_rq_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qcnt) @@ -685,18 +680,13 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env) int i; for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - vnic_env_stats_steer_desc[i].format); + ethtool_puts(data, vnic_env_stats_steer_desc[i].format); for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - vnic_env_stats_dev_oob_desc[i].format); + ethtool_puts(data, vnic_env_stats_dev_oob_desc[i].format); for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - vnic_env_stats_drop_desc[i].format); - - return idx; + ethtool_puts(data, vnic_env_stats_drop_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vnic_env) @@ -798,13 +788,10 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport) int i; for (i = 0; i < NUM_VPORT_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_stats_desc[i].format); + ethtool_puts(data, vport_stats_desc[i].format); for (i = 0; i < NUM_VPORT_LOOPBACK_COUNTERS(priv->mdev); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - vport_loopback_stats_desc[i].format); - - return idx; + ethtool_puts(data, vport_loopback_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport) @@ -868,8 +855,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(802_3) int i; for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_802_3_stats_desc[i].format); - return idx; + ethtool_puts(data, pport_802_3_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(802_3) @@ -1029,8 +1015,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(2863) int i; for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2863_stats_desc[i].format); - return idx; + ethtool_puts(data, pport_2863_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2863) @@ -1088,8 +1073,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(2819) int i; for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format); - return idx; + ethtool_puts(data, pport_2819_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2819) @@ -1215,21 +1199,18 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(phy) struct mlx5_core_dev *mdev = priv->mdev; int i; - strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy"); + ethtool_puts(data, "link_down_events_phy"); if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) - return idx; + return; for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_phy_statistical_stats_desc[i].format); + ethtool_puts(data, pport_phy_statistical_stats_desc[i].format); if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_phy_statistical_err_lanes_stats_desc[i].format); - - return idx; + ethtool_puts(data, + pport_phy_statistical_err_lanes_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) @@ -1436,9 +1417,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(eth_ext) if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_eth_ext_stats_desc[i].format); - return idx; + ethtool_puts(data, pport_eth_ext_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(eth_ext) @@ -1516,19 +1495,16 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie) if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_perf_stats_desc[i].format); + ethtool_puts(data, pcie_perf_stats_desc[i].format); if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_perf_stats_desc64[i].format); + ethtool_puts(data, pcie_perf_stats_desc64[i].format); if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_perf_stall_stats_desc[i].format); - return idx; + ethtool_puts(data, + pcie_perf_stall_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie) @@ -1609,18 +1585,18 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_port_buff_congest) int i, prio; if (!MLX5_CAP_GEN(mdev, sbcam_reg)) - return idx; + return; for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_tc_prio_stats_desc[i].format, prio); + ethtool_sprintf(data, + pport_per_tc_prio_stats_desc[i].format, + prio); for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_tc_congest_prio_stats_desc[i].format, prio); + ethtool_sprintf(data, + pport_per_tc_congest_prio_stats_desc[i].format, + prio); } - - return idx; } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_port_buff_congest) @@ -1728,19 +1704,17 @@ static int mlx5e_grp_per_prio_traffic_get_num_stats(void) return NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * NUM_PPORT_PRIO; } -static int mlx5e_grp_per_prio_traffic_fill_strings(struct mlx5e_priv *priv, - u8 *data, - int idx) +static void mlx5e_grp_per_prio_traffic_fill_strings(struct mlx5e_priv *priv, + u8 **data) { int i, prio; for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_prio_traffic_stats_desc[i].format, prio); + ethtool_sprintf(data, + pport_per_prio_traffic_stats_desc[i].format, + prio); } - - return idx; } static int mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv, @@ -1816,9 +1790,8 @@ static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv) NUM_PPORT_PFC_STALL_COUNTERS(priv); } -static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv, - u8 *data, - int idx) +static void mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv, + u8 **data) { unsigned long pfc_combined; int i, prio; @@ -1829,23 +1802,22 @@ static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv, char pfc_string[ETH_GSTRING_LEN]; snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio); - sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_prio_pfc_stats_desc[i].format, pfc_string); + ethtool_sprintf(data, + pport_per_prio_pfc_stats_desc[i].format, + pfc_string); } } if (mlx5e_query_global_pause_combined(priv)) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_prio_pfc_stats_desc[i].format, "global"); + ethtool_sprintf(data, + pport_per_prio_pfc_stats_desc[i].format, + "global"); } } for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_pfc_stall_stats_desc[i].format); - - return idx; + ethtool_puts(data, pport_pfc_stall_stats_desc[i].format); } static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, @@ -1887,9 +1859,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(per_prio) static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_prio) { - idx = mlx5e_grp_per_prio_traffic_fill_strings(priv, data, idx); - idx = mlx5e_grp_per_prio_pfc_fill_strings(priv, data, idx); - return idx; + mlx5e_grp_per_prio_traffic_fill_strings(priv, data); + mlx5e_grp_per_prio_pfc_fill_strings(priv, data); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_prio) @@ -1944,12 +1915,10 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pme) int i; for (i = 0; i < NUM_PME_STATUS_STATS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format); + ethtool_puts(data, mlx5e_pme_status_desc[i].format); for (i = 0; i < NUM_PME_ERR_STATS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format); - - return idx; + ethtool_puts(data, mlx5e_pme_error_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pme) @@ -1979,7 +1948,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(tls) static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(tls) { - return idx + mlx5e_ktls_get_strings(priv, data + idx * ETH_GSTRING_LEN); + mlx5e_ktls_get_strings(priv, data); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(tls) @@ -2264,10 +2233,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos) for (qid = 0; qid < max_qos_sqs; qid++) for (i = 0; i < NUM_QOS_SQ_STATS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - qos_sq_stats_desc[i].format, qid); - - return idx; + ethtool_sprintf(data, qos_sq_stats_desc[i].format, qid); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos) @@ -2312,29 +2278,29 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) int i, tc; if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) - return idx; + return; for (i = 0; i < NUM_PTP_CH_STATS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - "%s", ptp_ch_stats_desc[i].format); + ethtool_puts(data, ptp_ch_stats_desc[i].format); if (priv->tx_ptp_opened) { for (tc = 0; tc < priv->max_opened_tc; tc++) for (i = 0; i < NUM_PTP_SQ_STATS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_sq_stats_desc[i].format, tc); + ethtool_sprintf(data, + ptp_sq_stats_desc[i].format, + tc); for (tc = 0; tc < priv->max_opened_tc; tc++) for (i = 0; i < NUM_PTP_CQ_STATS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_cq_stats_desc[i].format, tc); + ethtool_sprintf(data, + ptp_cq_stats_desc[i].format, + tc); } if (priv->rx_ptp_opened) { for (i = 0; i < NUM_PTP_RQ_STATS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_rq_stats_desc[i].format, MLX5E_PTP_CHANNEL_IX); + ethtool_sprintf(data, ptp_rq_stats_desc[i].format, + MLX5E_PTP_CHANNEL_IX); } - return idx; } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ptp) @@ -2394,38 +2360,29 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(channels) for (i = 0; i < max_nch; i++) for (j = 0; j < NUM_CH_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - ch_stats_desc[j].format, i); + ethtool_sprintf(data, ch_stats_desc[j].format, i); for (i = 0; i < max_nch; i++) { for (j = 0; j < NUM_RQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - rq_stats_desc[j].format, i); + ethtool_sprintf(data, rq_stats_desc[j].format, i); for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - xskrq_stats_desc[j].format, i); + ethtool_sprintf(data, xskrq_stats_desc[j].format, i); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - rq_xdpsq_stats_desc[j].format, i); + ethtool_sprintf(data, rq_xdpsq_stats_desc[j].format, i); } for (tc = 0; tc < priv->max_opened_tc; tc++) for (i = 0; i < max_nch; i++) for (j = 0; j < NUM_SQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - sq_stats_desc[j].format, - i + tc * max_nch); + ethtool_sprintf(data, sq_stats_desc[j].format, + i + tc * max_nch); for (i = 0; i < max_nch; i++) { for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - xsksq_stats_desc[j].format, i); + ethtool_sprintf(data, xsksq_stats_desc[j].format, i); for (j = 0; j < NUM_XDPSQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - xdpsq_stats_desc[j].format, i); + ethtool_sprintf(data, xdpsq_stats_desc[j].format, i); } - - return idx; } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 12b3607afecd..0552b56ae4f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -71,7 +71,7 @@ struct mlx5e_priv; struct mlx5e_stats_grp { u16 update_stats_mask; int (*get_num_stats)(struct mlx5e_priv *priv); - int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx); + void (*fill_strings)(struct mlx5e_priv *priv, u8 **data); int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx); void (*update_stats)(struct mlx5e_priv *priv); }; @@ -87,7 +87,7 @@ typedef const struct mlx5e_stats_grp *const mlx5e_stats_grp_t; void MLX5E_STATS_GRP_OP(grp, update_stats)(struct mlx5e_priv *priv) #define MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(grp) \ - int MLX5E_STATS_GRP_OP(grp, fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx) + void MLX5E_STATS_GRP_OP(grp, fill_strings)(struct mlx5e_priv *priv, u8 **data) #define MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(grp) \ int MLX5E_STATS_GRP_OP(grp, fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx) -- cgit v1.2.3 From 27ea84ab35f5980d3b1a71381da8d62f7a4b54be Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 2 Apr 2024 16:30:36 +0300 Subject: net/mlx5e: Make stats group fill_stats callbacks consistent with the API The fill_strings() callbacks were changed to accept a **data pointer, and not rely on propagating the index value. Make a similar change to fill_stats() callbacks to keep the API consistent. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240402133043.56322-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ipsec_stats.c | 17 +- .../ethernet/mellanox/mlx5/core/en_accel/ktls.h | 7 +- .../mellanox/mlx5/core/en_accel/ktls_stats.c | 15 +- .../mellanox/mlx5/core/en_accel/macsec_stats.c | 13 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 18 +- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 312 ++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 6 +- 7 files changed, 215 insertions(+), 173 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c index ecf87383ecb8..92bf3fa44a3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -89,14 +89,14 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_hw) int i; if (!priv->ipsec) - return idx; + return; mlx5e_accel_ipsec_fs_read_stats(priv, &priv->ipsec->hw_stats); for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->hw_stats, - mlx5e_ipsec_hw_stats_desc, i); - - return idx; + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->hw_stats, + mlx5e_ipsec_hw_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw) @@ -121,9 +121,10 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw) if (priv->ipsec) for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats, - mlx5e_ipsec_sw_stats_desc, i); - return idx; + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR_ATOMIC64( + &priv->ipsec->sw_stats, + mlx5e_ipsec_sw_stats_desc, i)); } MLX5E_DEFINE_STATS_GRP(ipsec_hw, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index 9b96bee194ef..c1844128effa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -96,7 +96,7 @@ void mlx5e_ktls_cleanup(struct mlx5e_priv *priv); int mlx5e_ktls_get_count(struct mlx5e_priv *priv); void mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t **data); -int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data); +void mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 **data); #else static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) @@ -146,10 +146,7 @@ static inline void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { } static inline int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t **data) { } -static inline int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data) -{ - return 0; -} +static inline void mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 **data) { } #endif #endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c index 06363f2653e0..7bf79973128b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c @@ -71,19 +71,18 @@ void mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t **data) ethtool_puts(data, mlx5e_ktls_sw_stats_desc[i].format); } -int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data) +void mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 **data) { - unsigned int i, n, idx = 0; + unsigned int i, n; if (!priv->tls) - return 0; + return; n = mlx5e_ktls_get_count(priv); for (i = 0; i < n; i++) - data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats, - mlx5e_ktls_sw_stats_desc, - i); - - return n; + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats, + mlx5e_ktls_sw_stats_desc, i)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c index a79e2786be56..4bb47d48061d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c @@ -53,19 +53,18 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(macsec_hw) int i; if (!priv->macsec) - return idx; + return; if (!mlx5e_is_macsec_device(priv->mdev)) - return idx; + return; macsec_fs = priv->mdev->macsec_fs; mlx5_macsec_fs_get_stats_fill(macsec_fs, mlx5_macsec_fs_get_stats(macsec_fs)); for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_macsec_fs_get_stats(macsec_fs), - mlx5e_macsec_hw_stats_desc, - i); - - return idx; + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU( + mlx5_macsec_fs_get_stats(macsec_fs), + mlx5e_macsec_hw_stats_desc, i)); } MLX5E_DEFINE_STATS_GRP(macsec_hw, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index e41fbf377ae8..55b7efe21624 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -143,9 +143,9 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw_rep) int i; for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, - sw_rep_stats_desc, i); - return idx; + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_rep_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw_rep) @@ -184,12 +184,14 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport_rep) int i; for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats, - vport_rep_stats_desc, i); + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats, + vport_rep_stats_desc, i)); for (i = 0; i < NUM_VPORT_REP_LOOPBACK_COUNTERS(priv->mdev); i++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats, - vport_rep_loopback_stats_desc, i); - return idx; + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats, + vport_rep_loopback_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 6be0bcc9a3f4..4f372cb2fc9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -41,6 +41,11 @@ #include #endif +void mlx5e_ethtool_put_stat(u64 **data, u64 val) +{ + *(*data)++ = val; +} + static unsigned int stats_grps_num(struct mlx5e_priv *priv) { return !priv->profile->stats_grps_num ? 0 : @@ -90,7 +95,7 @@ void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx) int i; for (i = 0; i < num_stats_grps; i++) - idx = stats_grps[i]->fill_stats(priv, data, idx); + stats_grps[i]->fill_stats(priv, &data); } void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data) @@ -265,8 +270,9 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw) int i; for (i = 0; i < NUM_SW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, sw_stats_desc, i); - return idx; + mlx5e_ethtool_put_stat(data, + MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_stats_desc, i)); } static void mlx5e_stats_grp_sw_update_stats_xdp_red(struct mlx5e_sw_stats *s, @@ -601,12 +607,13 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qcnt) int i; for (i = 0; i < NUM_Q_COUNTERS && q_counter_any(priv); i++) - data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, - q_stats_desc, i); + mlx5e_ethtool_put_stat(data, + MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + q_stats_desc, i)); for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++) - data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, - drop_rq_stats_desc, i); - return idx; + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + drop_rq_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qcnt) @@ -694,18 +701,22 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vnic_env) int i; for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++) - data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out, - vnic_env_stats_steer_desc, i); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_steer_desc, i)); for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) - data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, - vnic_env_stats_dev_oob_desc, i); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_dev_oob_desc, i)); for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++) - data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, - vnic_env_stats_drop_desc, i); - - return idx; + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_drop_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env) @@ -799,14 +810,16 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport) int i; for (i = 0; i < NUM_VPORT_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, - vport_stats_desc, i); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_stats_desc, i)); for (i = 0; i < NUM_VPORT_LOOPBACK_COUNTERS(priv->mdev); i++) - data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, - vport_loopback_stats_desc, i); - - return idx; + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_loopback_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport) @@ -863,9 +876,10 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(802_3) int i; for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, - pport_802_3_stats_desc, i); - return idx; + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_BE( + &priv->stats.pport.IEEE_802_3_counters, + pport_802_3_stats_desc, i)); } #define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \ @@ -1023,9 +1037,10 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2863) int i; for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, - pport_2863_stats_desc, i); - return idx; + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_BE( + &priv->stats.pport.RFC_2863_counters, + pport_2863_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2863) @@ -1081,9 +1096,10 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2819) int i; for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, - pport_2819_stats_desc, i); - return idx; + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_BE( + &priv->stats.pport.RFC_2819_counters, + pport_2819_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2819) @@ -1219,24 +1235,29 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) int i; /* link_down_events_phy has special handling since it is not stored in __be64 format */ - data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, - counter_set.phys_layer_cntrs.link_down_events); + mlx5e_ethtool_put_stat( + data, MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, + counter_set.phys_layer_cntrs.link_down_events)); if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) - return idx; + return; for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) - data[idx++] = - MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, - pport_phy_statistical_stats_desc, i); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_stats_desc, i)); if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) - data[idx++] = - MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, - pport_phy_statistical_err_lanes_stats_desc, - i); - return idx; + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &priv->stats.pport + .phy_statistical_counters, + pport_phy_statistical_err_lanes_stats_desc, + i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) @@ -1426,10 +1447,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(eth_ext) if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++) - data[idx++] = - MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters, - pport_eth_ext_stats_desc, i); - return idx; + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &priv->stats.pport.eth_ext_counters, + pport_eth_ext_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(eth_ext) @@ -1513,22 +1535,27 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie) if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) - data[idx++] = - MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, - pcie_perf_stats_desc, i); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR32_BE( + &priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc, i)); if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++) - data[idx++] = - MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters, - pcie_perf_stats_desc64, i); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc64, i)); if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++) - data[idx++] = - MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, - pcie_perf_stall_stats_desc, i); - return idx; + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR32_BE( + &priv->stats.pcie.pcie_perf_counters, + pcie_perf_stall_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie) @@ -1606,20 +1633,24 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_port_buff_congest) int i, prio; if (!MLX5_CAP_GEN(mdev, sbcam_reg)) - return idx; + return; for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++) - data[idx++] = - MLX5E_READ_CTR64_BE(&pport->per_tc_prio_counters[prio], - pport_per_tc_prio_stats_desc, i); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &pport->per_tc_prio_counters[prio], + pport_per_tc_prio_stats_desc, i)); for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS ; i++) - data[idx++] = - MLX5E_READ_CTR64_BE(&pport->per_tc_congest_prio_counters[prio], - pport_per_tc_congest_prio_stats_desc, i); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &pport->per_tc_congest_prio_counters + [prio], + pport_per_tc_congest_prio_stats_desc, + i)); } - - return idx; } static void mlx5e_grp_per_tc_prio_update_stats(struct mlx5e_priv *priv) @@ -1717,20 +1748,20 @@ static void mlx5e_grp_per_prio_traffic_fill_strings(struct mlx5e_priv *priv, } } -static int mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv, - u64 *data, - int idx) +static void mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv, + u64 **data) { int i, prio; for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) - data[idx++] = - MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], - pport_per_prio_traffic_stats_desc, i); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &priv->stats.pport + .per_prio_counters[prio], + pport_per_prio_traffic_stats_desc, i)); } - - return idx; } static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { @@ -1820,9 +1851,8 @@ static void mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv, ethtool_puts(data, pport_pfc_stall_stats_desc[i].format); } -static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, - u64 *data, - int idx) +static void mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, + u64 **data) { unsigned long pfc_combined; int i, prio; @@ -1830,25 +1860,30 @@ static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, pfc_combined = mlx5e_query_pfc_combined(priv); for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - data[idx++] = - MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], - pport_per_prio_pfc_stats_desc, i); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &priv->stats.pport + .per_prio_counters[prio], + pport_per_prio_pfc_stats_desc, i)); } } if (mlx5e_query_global_pause_combined(priv)) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - data[idx++] = - MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], - pport_per_prio_pfc_stats_desc, i); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &priv->stats.pport.per_prio_counters[0], + pport_per_prio_pfc_stats_desc, i)); } } for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], - pport_pfc_stall_stats_desc, i); - - return idx; + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_BE( + &priv->stats.pport.per_prio_counters[0], + pport_pfc_stall_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(per_prio) @@ -1865,9 +1900,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_prio) static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_prio) { - idx = mlx5e_grp_per_prio_traffic_fill_stats(priv, data, idx); - idx = mlx5e_grp_per_prio_pfc_fill_stats(priv, data, idx); - return idx; + mlx5e_grp_per_prio_traffic_fill_stats(priv, data); + mlx5e_grp_per_prio_pfc_fill_stats(priv, data); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(per_prio) @@ -1929,14 +1963,14 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pme) mlx5_get_pme_stats(priv->mdev, &pme_stats); for (i = 0; i < NUM_PME_STATUS_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters, - mlx5e_pme_status_desc, i); + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU(pme_stats.status_counters, + mlx5e_pme_status_desc, i)); for (i = 0; i < NUM_PME_ERR_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters, - mlx5e_pme_error_desc, i); - - return idx; + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU(pme_stats.error_counters, + mlx5e_pme_error_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pme) { return; } @@ -1953,7 +1987,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(tls) static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(tls) { - return idx + mlx5e_ktls_get_stats(priv, data + idx); + mlx5e_ktls_get_stats(priv, data); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(tls) { return; } @@ -2250,10 +2284,10 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos) struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]); for (i = 0; i < NUM_QOS_SQ_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i)); } - - return idx; } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; } @@ -2308,33 +2342,35 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ptp) int i, tc; if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) - return idx; + return; for (i = 0; i < NUM_PTP_CH_STATS; i++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch, - ptp_ch_stats_desc, i); + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch, + ptp_ch_stats_desc, i)); if (priv->tx_ptp_opened) { for (tc = 0; tc < priv->max_opened_tc; tc++) for (i = 0; i < NUM_PTP_SQ_STATS; i++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc], - ptp_sq_stats_desc, i); + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU( + &priv->ptp_stats.sq[tc], + ptp_sq_stats_desc, i)); for (tc = 0; tc < priv->max_opened_tc; tc++) for (i = 0; i < NUM_PTP_CQ_STATS; i++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc], - ptp_cq_stats_desc, i); + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU( + &priv->ptp_stats.cq[tc], + ptp_cq_stats_desc, i)); } if (priv->rx_ptp_opened) { for (i = 0; i < NUM_PTP_RQ_STATS; i++) - data[idx++] = + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU(&priv->ptp_stats.rq, - ptp_rq_stats_desc, i); + ptp_rq_stats_desc, i)); } - return idx; } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ptp) { return; } @@ -2393,44 +2429,50 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels) for (i = 0; i < max_nch; i++) for (j = 0; j < NUM_CH_STATS; j++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->ch, - ch_stats_desc, j); + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU( + &priv->channel_stats[i]->ch, + ch_stats_desc, j)); for (i = 0; i < max_nch; i++) { for (j = 0; j < NUM_RQ_STATS; j++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq, - rq_stats_desc, j); + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU( + &priv->channel_stats[i]->rq, + rq_stats_desc, j)); for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xskrq, - xskrq_stats_desc, j); + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU( + &priv->channel_stats[i]->xskrq, + xskrq_stats_desc, j)); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq_xdpsq, - rq_xdpsq_stats_desc, j); + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU( + &priv->channel_stats[i]->rq_xdpsq, + rq_xdpsq_stats_desc, j)); } for (tc = 0; tc < priv->max_opened_tc; tc++) for (i = 0; i < max_nch; i++) for (j = 0; j < NUM_SQ_STATS; j++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->sq[tc], - sq_stats_desc, j); + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_CPU( + &priv->channel_stats[i]->sq[tc], + sq_stats_desc, j)); for (i = 0; i < max_nch; i++) { for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xsksq, - xsksq_stats_desc, j); + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU( + &priv->channel_stats[i]->xsksq, + xsksq_stats_desc, j)); for (j = 0; j < NUM_XDPSQ_STATS; j++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xdpsq, - xdpsq_stats_desc, j); + mlx5e_ethtool_put_stat( + data, MLX5E_READ_CTR64_CPU( + &priv->channel_stats[i]->xdpsq, + xdpsq_stats_desc, j)); } - - return idx; } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(channels) { return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 0552b56ae4f4..b71e3fdf92c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -72,10 +72,12 @@ struct mlx5e_stats_grp { u16 update_stats_mask; int (*get_num_stats)(struct mlx5e_priv *priv); void (*fill_strings)(struct mlx5e_priv *priv, u8 **data); - int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx); + void (*fill_stats)(struct mlx5e_priv *priv, u64 **data); void (*update_stats)(struct mlx5e_priv *priv); }; +void mlx5e_ethtool_put_stat(u64 **data, u64 val); + typedef const struct mlx5e_stats_grp *const mlx5e_stats_grp_t; #define MLX5E_STATS_GRP_OP(grp, name) mlx5e_stats_grp_ ## grp ## _ ## name @@ -90,7 +92,7 @@ typedef const struct mlx5e_stats_grp *const mlx5e_stats_grp_t; void MLX5E_STATS_GRP_OP(grp, fill_strings)(struct mlx5e_priv *priv, u8 **data) #define MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(grp) \ - int MLX5E_STATS_GRP_OP(grp, fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx) + void MLX5E_STATS_GRP_OP(grp, fill_stats)(struct mlx5e_priv *priv, u64 **data) #define MLX5E_STATS_GRP(grp) mlx5e_stats_grp_ ## grp -- cgit v1.2.3 From 19b85f1b37ce1d97158ca2b1d7c1a3b3e640c813 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 2 Apr 2024 16:30:37 +0300 Subject: net/mlx5e: debugfs, Add reset option for command interface stats Resetting stats just before some test/debug case allows us to eliminate out the impact of previous commands. Useful in particular for the average latency calculation. The average_write() callback was unreachable, as "average" is a read-only file. Extend, rename, and use it for a newly exposed write-only "reset" file. Signed-off-by: Tariq Toukan Reviewed-by: Saeed Mahameed Reviewed-by: Gal Pressman Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240402133043.56322-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 09652dc89115..36806e813c33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -143,8 +143,8 @@ static ssize_t average_read(struct file *filp, char __user *buf, size_t count, return simple_read_from_buffer(buf, count, pos, tbuf, ret); } -static ssize_t average_write(struct file *filp, const char __user *buf, - size_t count, loff_t *pos) +static ssize_t reset_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) { struct mlx5_cmd_stats *stats; @@ -152,6 +152,11 @@ static ssize_t average_write(struct file *filp, const char __user *buf, spin_lock_irq(&stats->lock); stats->sum = 0; stats->n = 0; + stats->failed = 0; + stats->failed_mbox_status = 0; + stats->last_failed_errno = 0; + stats->last_failed_mbox_status = 0; + stats->last_failed_syndrome = 0; spin_unlock_irq(&stats->lock); *pos += count; @@ -159,11 +164,16 @@ static ssize_t average_write(struct file *filp, const char __user *buf, return count; } -static const struct file_operations stats_fops = { +static const struct file_operations reset_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = reset_write, +}; + +static const struct file_operations average_fops = { .owner = THIS_MODULE, .open = simple_open, .read = average_read, - .write = average_write, }; static ssize_t slots_read(struct file *filp, char __user *buf, size_t count, @@ -228,8 +238,10 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) continue; stats->root = debugfs_create_dir(namep, *cmd); + debugfs_create_file("reset", 0200, stats->root, stats, + &reset_fops); debugfs_create_file("average", 0400, stats->root, stats, - &stats_fops); + &average_fops); debugfs_create_u64("n", 0400, stats->root, &stats->n); debugfs_create_u64("failed", 0400, stats->root, &stats->failed); debugfs_create_u64("failed_mbox_status", 0400, stats->root, -- cgit v1.2.3 From 595f41608dbae55268f63f629acc95a1e0f08a65 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 2 Apr 2024 16:30:38 +0300 Subject: net/mlx5e: XDP, Fix an inconsistent comment Starting from commit eb9b9fdcafe2 ("net/mlx5e: Introduce extended version for mlx5e_xmit_data") sinfo is no longer passed as an argument to mlx5e_xmit_xdp_frame(), the comment is inconsistent. check_result must be zero when the packet is fragmented. Signed-off-by: Carolina Jubran Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240402133043.56322-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 82b5ca1be4f3..4610621a340e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -565,7 +565,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, linear = !!(dma_len - inline_hdr_sz); ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + linear + !!inline_hdr_sz; - /* check_result must be 0 if sinfo is passed. */ + /* check_result must be 0 if xdptxd->has_frags is true. */ if (!check_result) { int stop_room = 1; -- cgit v1.2.3 From 30f8d23814ea4bb0229a78f7f8f36500c2e2d962 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 2 Apr 2024 16:30:39 +0300 Subject: net/mlx5: Convert uintX_t to uX In the kernel, the preferred types are uX. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240402133043.56322-8-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c | 4 ++-- include/linux/mlx5/device.h | 2 +- 11 files changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 84db05fb9389..f5a3ac40f6e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1160,7 +1160,7 @@ void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv); void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo); void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, - uint32_t stringset, uint8_t *data); + u32 stringset, u8 *data); int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset); void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, struct ethtool_stats *stats, u64 *data); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index c7d191f66ad1..4f83e3172767 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -73,7 +73,7 @@ void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule) struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, struct sock *sk, u32 tirn, - uint32_t flow_tag) + u32 flow_tag) { struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs); struct mlx5_flow_destination dest = {}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h index a032bff482a6..7e899c716267 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h @@ -11,14 +11,14 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs); void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs); struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, struct sock *sk, u32 tirn, - uint32_t flow_tag); + u32 flow_tag); void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule); #else static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) { return 0; } static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) {} static inline struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, struct sock *sk, u32 tirn, - uint32_t flow_tag) + u32 flow_tag) { return ERR_PTR(-EOPNOTSUPP); } static inline void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule) {} #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index c1844128effa..07a04a142a2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -95,7 +95,7 @@ int mlx5e_ktls_init(struct mlx5e_priv *priv); void mlx5e_ktls_cleanup(struct mlx5e_priv *priv); int mlx5e_ktls_get_count(struct mlx5e_priv *priv); -void mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t **data); +void mlx5e_ktls_get_strings(struct mlx5e_priv *priv, u8 **data); void mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 **data); #else @@ -144,7 +144,7 @@ static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) static inline int mlx5e_ktls_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { } static inline int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { return 0; } -static inline void mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t **data) { } +static inline void mlx5e_ktls_get_strings(struct mlx5e_priv *priv, u8 **data) { } static inline void mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 **data) { } #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c index 7bf79973128b..60be2d72eb9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c @@ -58,7 +58,7 @@ int mlx5e_ktls_get_count(struct mlx5e_priv *priv) return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc); } -void mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t **data) +void mlx5e_ktls_get_strings(struct mlx5e_priv *priv, u8 **data) { unsigned int i, n; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 55b7efe21624..a74ee698671c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -274,7 +274,7 @@ out: } static void mlx5e_rep_get_strings(struct net_device *dev, - u32 stringset, uint8_t *data) + u32 stringset, u8 *data) { struct mlx5e_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index e7faf7e73ca4..2d95a9b7b44e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -283,7 +283,7 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return 0; } -int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id) +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, u32 *sw_owner_id) { u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {}; int i; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 58732f44940f..c38342b9f320 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -205,7 +205,7 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev); void mlx5_cmd_disable(struct mlx5_core_dev *dev); void mlx5_cmd_set_state(struct mlx5_core_dev *dev, enum mlx5_cmdif_state cmdif_state); -int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, u32 *sw_owner_id); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index f708b029425a..e9f6c7ed7a7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -1883,7 +1883,7 @@ dr_ste_v0_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, static int dr_ste_v0_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - uint8_t *tag) + u8 *tag) { struct mlx5dr_match_misc5 *misc5 = &value->misc5; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index dd856cde188d..1d49704b9542 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -1897,7 +1897,7 @@ void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, static int dr_ste_v1_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - uint8_t *tag) + u8 *tag) { struct mlx5dr_match_misc5 *misc5 = &value->misc5; @@ -2129,7 +2129,7 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, static int dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, - uint8_t *tag) + u8 *tag) { u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; struct mlx5dr_match_misc *misc = &value->misc; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 01275c6e8468..da61be87a12e 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -68,7 +68,7 @@ #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) -#define MLX5_ADDR_OF(typ, p, fld) ((void *)((uint8_t *)(p) + MLX5_BYTE_OFF(typ, fld))) +#define MLX5_ADDR_OF(typ, p, fld) ((void *)((u8 *)(p) + MLX5_BYTE_OFF(typ, fld))) /* insert a value to a struct */ #define MLX5_SET(typ, p, fld, v) do { \ -- cgit v1.2.3 From 8c54c89ad45a36fa5f0a3426b5e2c504e9c338ee Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 2 Apr 2024 16:30:40 +0300 Subject: net/mlx5e: Add support for 800Gbps link modes Add support for 800Gbps speed, link modes of 100Gbps per lane. Signed-off-by: Gal Pressman Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240402133043.56322-9-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index b58367909e2c..69f6a6aa7c55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -219,6 +219,13 @@ void mlx5e_build_ptys2ethtool_map(void) ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT, ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT, ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_800GAUI_8_800GBASE_CR8_KR8, ext, + ETHTOOL_LINK_MODE_800000baseCR8_Full_BIT, + ETHTOOL_LINK_MODE_800000baseKR8_Full_BIT, + ETHTOOL_LINK_MODE_800000baseDR8_Full_BIT, + ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT, + ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT, + ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT); } static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, -- cgit v1.2.3 From 137f3d50ad2a0f2e1ebe5181d6b32a5541786b99 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 2 Apr 2024 16:30:41 +0300 Subject: net/mlx5: Support matching on l4_type for ttc_table Replace matching on TCP and UDP protocols with new l4_type field which is parsed by steering for ttc_table. It is enabled by the outer_l4_type or inner_l4_type bits in nic_rx or port_sel flow table capabilities and used only if pcc_ifa2 bit in HCA capabilities is set. Signed-off-by: Jianbo Liu Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240402133043.56322-10-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 6 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +- .../net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 8 +- .../net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 254 +++++++++++++++++---- .../net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h | 2 +- include/linux/mlx5/device.h | 6 + include/linux/mlx5/mlx5_ifc.h | 32 ++- 7 files changed, 244 insertions(+), 67 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 777d311d44ef..8c5b291a171f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -896,8 +896,7 @@ static void mlx5e_set_inner_ttc_params(struct mlx5e_flow_steering *fs, int tt; memset(ttc_params, 0, sizeof(*ttc_params)); - ttc_params->ns = mlx5_get_flow_namespace(fs->mdev, - MLX5_FLOW_NAMESPACE_KERNEL); + ttc_params->ns_type = MLX5_FLOW_NAMESPACE_KERNEL; ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL; ft_attr->prio = MLX5E_NIC_PRIO; @@ -920,8 +919,7 @@ void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs, int tt; memset(ttc_params, 0, sizeof(*ttc_params)); - ttc_params->ns = mlx5_get_flow_namespace(fs->mdev, - MLX5_FLOW_NAMESPACE_KERNEL); + ttc_params->ns_type = MLX5_FLOW_NAMESPACE_KERNEL; ft_attr->level = MLX5E_TTC_FT_LEVEL; ft_attr->prio = MLX5E_NIC_PRIO; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f2f19fa37d91..aeb32cb27182 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -835,8 +835,7 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, memset(ttc_params, 0, sizeof(*ttc_params)); - ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev, - MLX5_FLOW_NAMESPACE_KERNEL); + ttc_params->ns_type = MLX5_FLOW_NAMESPACE_KERNEL; for (tt = 0; tt < MLX5_NUM_TT; tt++) { ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; ttc_params->dests[tt].tir_num = diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index 101b3bb90863..c16b462ddedf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -449,13 +449,11 @@ static void set_tt_map(struct mlx5_lag_port_sel *port_sel, static void mlx5_lag_set_inner_ttc_params(struct mlx5_lag *ldev, struct ttc_params *ttc_params) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; struct mlx5_flow_table_attr *ft_attr; int tt; - ttc_params->ns = mlx5_get_flow_namespace(dev, - MLX5_FLOW_NAMESPACE_PORT_SEL); + ttc_params->ns_type = MLX5_FLOW_NAMESPACE_PORT_SEL; ft_attr = &ttc_params->ft_attr; ft_attr->level = MLX5_LAG_FT_LEVEL_INNER_TTC; @@ -470,13 +468,11 @@ static void mlx5_lag_set_inner_ttc_params(struct mlx5_lag *ldev, static void mlx5_lag_set_outer_ttc_params(struct mlx5_lag *ldev, struct ttc_params *ttc_params) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; struct mlx5_flow_table_attr *ft_attr; int tt; - ttc_params->ns = mlx5_get_flow_namespace(dev, - MLX5_FLOW_NAMESPACE_PORT_SEL); + ttc_params->ns_type = MLX5_FLOW_NAMESPACE_PORT_SEL; ft_attr = &ttc_params->ft_attr; ft_attr->level = MLX5_LAG_FT_LEVEL_TTC; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index b78f2ba25c19..9f13cea16446 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -9,21 +9,24 @@ #include "mlx5_core.h" #include "lib/fs_ttc.h" -#define MLX5_TTC_NUM_GROUPS 3 -#define MLX5_TTC_GROUP1_SIZE (BIT(3) + MLX5_NUM_TUNNEL_TT) -#define MLX5_TTC_GROUP2_SIZE BIT(1) -#define MLX5_TTC_GROUP3_SIZE BIT(0) -#define MLX5_TTC_TABLE_SIZE (MLX5_TTC_GROUP1_SIZE +\ - MLX5_TTC_GROUP2_SIZE +\ - MLX5_TTC_GROUP3_SIZE) - -#define MLX5_INNER_TTC_NUM_GROUPS 3 -#define MLX5_INNER_TTC_GROUP1_SIZE BIT(3) -#define MLX5_INNER_TTC_GROUP2_SIZE BIT(1) -#define MLX5_INNER_TTC_GROUP3_SIZE BIT(0) -#define MLX5_INNER_TTC_TABLE_SIZE (MLX5_INNER_TTC_GROUP1_SIZE +\ - MLX5_INNER_TTC_GROUP2_SIZE +\ - MLX5_INNER_TTC_GROUP3_SIZE) +#define MLX5_TTC_MAX_NUM_GROUPS 4 +#define MLX5_TTC_GROUP_TCPUDP_SIZE (MLX5_TT_IPV6_UDP + 1) + +struct mlx5_fs_ttc_groups { + bool use_l4_type; + int num_groups; + int group_size[MLX5_TTC_MAX_NUM_GROUPS]; +}; + +static int mlx5_fs_ttc_table_size(const struct mlx5_fs_ttc_groups *groups) +{ + int i, sz = 0; + + for (i = 0; i < groups->num_groups; i++) + sz += groups->group_size[i]; + + return sz; +} /* L3/L4 traffic type classifier */ struct mlx5_ttc_table { @@ -138,6 +141,53 @@ static struct mlx5_etype_proto ttc_tunnel_rules[] = { }; +enum TTC_GROUP_TYPE { + TTC_GROUPS_DEFAULT = 0, + TTC_GROUPS_USE_L4_TYPE = 1, +}; + +static const struct mlx5_fs_ttc_groups ttc_groups[] = { + [TTC_GROUPS_DEFAULT] = { + .num_groups = 3, + .group_size = { + BIT(3) + MLX5_NUM_TUNNEL_TT, + BIT(1), + BIT(0), + }, + }, + [TTC_GROUPS_USE_L4_TYPE] = { + .use_l4_type = true, + .num_groups = 4, + .group_size = { + MLX5_TTC_GROUP_TCPUDP_SIZE, + BIT(3) + MLX5_NUM_TUNNEL_TT - MLX5_TTC_GROUP_TCPUDP_SIZE, + BIT(1), + BIT(0), + }, + }, +}; + +static const struct mlx5_fs_ttc_groups inner_ttc_groups[] = { + [TTC_GROUPS_DEFAULT] = { + .num_groups = 3, + .group_size = { + BIT(3), + BIT(1), + BIT(0), + }, + }, + [TTC_GROUPS_USE_L4_TYPE] = { + .use_l4_type = true, + .num_groups = 4, + .group_size = { + MLX5_TTC_GROUP_TCPUDP_SIZE, + BIT(3) - MLX5_TTC_GROUP_TCPUDP_SIZE, + BIT(1), + BIT(0), + }, + }, +}; + u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt) { return ttc_tunnel_rules[tt].proto; @@ -188,9 +238,29 @@ static u8 mlx5_etype_to_ipv(u16 ethertype) return 0; } +static void mlx5_fs_ttc_set_match_proto(void *headers_c, void *headers_v, + u8 proto, bool use_l4_type) +{ + int l4_type; + + if (use_l4_type && (proto == IPPROTO_TCP || proto == IPPROTO_UDP)) { + if (proto == IPPROTO_TCP) + l4_type = MLX5_PACKET_L4_TYPE_TCP; + else + l4_type = MLX5_PACKET_L4_TYPE_UDP; + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, l4_type); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, l4_type, l4_type); + } else { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, proto); + } +} + static struct mlx5_flow_handle * mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, - struct mlx5_flow_destination *dest, u16 etype, u8 proto) + struct mlx5_flow_destination *dest, u16 etype, u8 proto, + bool use_l4_type) { int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(dev, @@ -207,8 +277,13 @@ mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, if (proto) { spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); + mlx5_fs_ttc_set_match_proto(MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + outer_headers), + MLX5_ADDR_OF(fte_match_param, + spec->match_value, + outer_headers), + proto, use_l4_type); } ipv = mlx5_etype_to_ipv(etype); @@ -234,7 +309,8 @@ mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev, struct ttc_params *params, - struct mlx5_ttc_table *ttc) + struct mlx5_ttc_table *ttc, + bool use_l4_type) { struct mlx5_flow_handle **trules; struct mlx5_ttc_rule *rules; @@ -251,7 +327,8 @@ static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev, continue; rule->rule = mlx5_generate_ttc_rule(dev, ft, ¶ms->dests[tt], ttc_rules[tt].etype, - ttc_rules[tt].proto); + ttc_rules[tt].proto, + use_l4_type); if (IS_ERR(rule->rule)) { err = PTR_ERR(rule->rule); rule->rule = NULL; @@ -273,7 +350,8 @@ static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev, trules[tt] = mlx5_generate_ttc_rule(dev, ft, ¶ms->tunnel_dests[tt], ttc_tunnel_rules[tt].etype, - ttc_tunnel_rules[tt].proto); + ttc_tunnel_rules[tt].proto, + use_l4_type); if (IS_ERR(trules[tt])) { err = PTR_ERR(trules[tt]); trules[tt] = NULL; @@ -289,7 +367,8 @@ del_rules: } static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc, - bool use_ipv) + bool use_ipv, + const struct mlx5_fs_ttc_groups *groups) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); int ix = 0; @@ -297,7 +376,7 @@ static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc, int err; u8 *mc; - ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL); + ttc->g = kcalloc(groups->num_groups, sizeof(*ttc->g), GFP_KERNEL); if (!ttc->g) return -ENOMEM; in = kvzalloc(inlen, GFP_KERNEL); @@ -307,16 +386,31 @@ static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc, return -ENOMEM; } - /* L4 Group */ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); if (use_ipv) MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); else MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + + /* TCP UDP group */ + if (groups->use_l4_type) { + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.l4_type); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += groups->group_size[ttc->num_groups]; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + MLX5_SET(fte_match_param, mc, outer_headers.l4_type, 0); + } + + /* L4 Group */ + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5_TTC_GROUP1_SIZE; + ix += groups->group_size[ttc->num_groups]; MLX5_SET_CFG(in, end_flow_index, ix - 1); ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); if (IS_ERR(ttc->g[ttc->num_groups])) @@ -326,7 +420,7 @@ static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc, /* L3 Group */ MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5_TTC_GROUP2_SIZE; + ix += groups->group_size[ttc->num_groups]; MLX5_SET_CFG(in, end_flow_index, ix - 1); ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); if (IS_ERR(ttc->g[ttc->num_groups])) @@ -336,7 +430,7 @@ static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc, /* Any Group */ memset(in, 0, inlen); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5_TTC_GROUP3_SIZE; + ix += groups->group_size[ttc->num_groups]; MLX5_SET_CFG(in, end_flow_index, ix - 1); ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); if (IS_ERR(ttc->g[ttc->num_groups])) @@ -358,7 +452,7 @@ static struct mlx5_flow_handle * mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct mlx5_flow_destination *dest, - u16 etype, u8 proto) + u16 etype, u8 proto, bool use_l4_type) { MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; @@ -379,8 +473,13 @@ mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev, if (proto) { spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); - MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); + mlx5_fs_ttc_set_match_proto(MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + inner_headers), + MLX5_ADDR_OF(fte_match_param, + spec->match_value, + inner_headers), + proto, use_l4_type); } rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); @@ -395,7 +494,8 @@ mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev, static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev, struct ttc_params *params, - struct mlx5_ttc_table *ttc) + struct mlx5_ttc_table *ttc, + bool use_l4_type) { struct mlx5_ttc_rule *rules; struct mlx5_flow_table *ft; @@ -413,7 +513,8 @@ static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev, rule->rule = mlx5_generate_inner_ttc_rule(dev, ft, ¶ms->dests[tt], ttc_rules[tt].etype, - ttc_rules[tt].proto); + ttc_rules[tt].proto, + use_l4_type); if (IS_ERR(rule->rule)) { err = PTR_ERR(rule->rule); rule->rule = NULL; @@ -430,7 +531,8 @@ del_rules: return err; } -static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc) +static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc, + const struct mlx5_fs_ttc_groups *groups) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); int ix = 0; @@ -438,8 +540,7 @@ static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc) int err; u8 *mc; - ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g), - GFP_KERNEL); + ttc->g = kcalloc(groups->num_groups, sizeof(*ttc->g), GFP_KERNEL); if (!ttc->g) return -ENOMEM; in = kvzalloc(inlen, GFP_KERNEL); @@ -449,13 +550,28 @@ static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc) return -ENOMEM; } - /* L4 Group */ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + + /* TCP UDP group */ + if (groups->use_l4_type) { + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.l4_type); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += groups->group_size[ttc->num_groups]; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + MLX5_SET(fte_match_param, mc, inner_headers.l4_type, 0); + } + + /* L4 Group */ + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5_INNER_TTC_GROUP1_SIZE; + ix += groups->group_size[ttc->num_groups]; MLX5_SET_CFG(in, end_flow_index, ix - 1); ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); if (IS_ERR(ttc->g[ttc->num_groups])) @@ -465,7 +581,7 @@ static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc) /* L3 Group */ MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5_INNER_TTC_GROUP2_SIZE; + ix += groups->group_size[ttc->num_groups]; MLX5_SET_CFG(in, end_flow_index, ix - 1); ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); if (IS_ERR(ttc->g[ttc->num_groups])) @@ -475,7 +591,7 @@ static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc) /* Any Group */ memset(in, 0, inlen); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5_INNER_TTC_GROUP3_SIZE; + ix += groups->group_size[ttc->num_groups]; MLX5_SET_CFG(in, end_flow_index, ix - 1); ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); if (IS_ERR(ttc->g[ttc->num_groups])) @@ -496,27 +612,47 @@ err: struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, struct ttc_params *params) { + const struct mlx5_fs_ttc_groups *groups; + struct mlx5_flow_namespace *ns; struct mlx5_ttc_table *ttc; + bool use_l4_type; int err; ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); if (!ttc) return ERR_PTR(-ENOMEM); + switch (params->ns_type) { + case MLX5_FLOW_NAMESPACE_PORT_SEL: + use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && + MLX5_CAP_PORT_SELECTION_FT_FIELD_SUPPORT_2(dev, inner_l4_type); + break; + case MLX5_FLOW_NAMESPACE_KERNEL: + use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && + MLX5_CAP_NIC_RX_FT_FIELD_SUPPORT_2(dev, inner_l4_type); + break; + default: + return ERR_PTR(-EINVAL); + } + + ns = mlx5_get_flow_namespace(dev, params->ns_type); + groups = use_l4_type ? &inner_ttc_groups[TTC_GROUPS_USE_L4_TYPE] : + &inner_ttc_groups[TTC_GROUPS_DEFAULT]; + WARN_ON_ONCE(params->ft_attr.max_fte); - params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE; - ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr); + params->ft_attr.max_fte = mlx5_fs_ttc_table_size(groups); + ttc->t = mlx5_create_flow_table(ns, ¶ms->ft_attr); if (IS_ERR(ttc->t)) { err = PTR_ERR(ttc->t); kvfree(ttc); return ERR_PTR(err); } - err = mlx5_create_inner_ttc_table_groups(ttc); + err = mlx5_create_inner_ttc_table_groups(ttc, groups); if (err) goto destroy_ft; - err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc); + err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc, use_l4_type); if (err) goto destroy_ft; @@ -549,27 +685,47 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_field_support.outer_ip_version); + const struct mlx5_fs_ttc_groups *groups; + struct mlx5_flow_namespace *ns; struct mlx5_ttc_table *ttc; + bool use_l4_type; int err; ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); if (!ttc) return ERR_PTR(-ENOMEM); + switch (params->ns_type) { + case MLX5_FLOW_NAMESPACE_PORT_SEL: + use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && + MLX5_CAP_PORT_SELECTION_FT_FIELD_SUPPORT_2(dev, outer_l4_type); + break; + case MLX5_FLOW_NAMESPACE_KERNEL: + use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && + MLX5_CAP_NIC_RX_FT_FIELD_SUPPORT_2(dev, outer_l4_type); + break; + default: + return ERR_PTR(-EINVAL); + } + + ns = mlx5_get_flow_namespace(dev, params->ns_type); + groups = use_l4_type ? &ttc_groups[TTC_GROUPS_USE_L4_TYPE] : + &ttc_groups[TTC_GROUPS_DEFAULT]; + WARN_ON_ONCE(params->ft_attr.max_fte); - params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE; - ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr); + params->ft_attr.max_fte = mlx5_fs_ttc_table_size(groups); + ttc->t = mlx5_create_flow_table(ns, ¶ms->ft_attr); if (IS_ERR(ttc->t)) { err = PTR_ERR(ttc->t); kvfree(ttc); return ERR_PTR(err); } - err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer); + err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer, groups); if (err) goto destroy_ft; - err = mlx5_generate_ttc_table_rules(dev, params, ttc); + err = mlx5_generate_ttc_table_rules(dev, params, ttc, use_l4_type); if (err) goto destroy_ft; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h index 85fef0cd1c07..92eea6bea310 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h @@ -40,7 +40,7 @@ struct mlx5_ttc_rule { struct mlx5_ttc_table; struct ttc_params { - struct mlx5_flow_namespace *ns; + enum mlx5_flow_namespace_type ns_type; struct mlx5_flow_table_attr ft_attr; struct mlx5_flow_destination dests[MLX5_NUM_TT]; DECLARE_BITMAP(ignore_dests, MLX5_NUM_TT); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index da61be87a12e..d7bb31d9a446 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1336,6 +1336,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ESW_FT_FIELD_SUPPORT_2(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, ft_field_support_2_esw_fdb.cap) +#define MLX5_CAP_NIC_RX_FT_FIELD_SUPPORT_2(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, ft_field_support_2_nic_receive.cap) + #define MLX5_CAP_ESW(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH]->cur, cap) @@ -1359,6 +1362,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \ MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap) +#define MLX5_CAP_PORT_SELECTION_FT_FIELD_SUPPORT_2(mdev, cap) \ + MLX5_CAP_PORT_SELECTION(mdev, ft_field_support_2_port_selection.cap) + #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c940b329a475..40f6fa138e27 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -416,7 +416,10 @@ struct mlx5_ifc_flow_table_fields_supported_bits { /* Table 2170 - Flow Table Fields Supported 2 Format */ struct mlx5_ifc_flow_table_fields_supported_2_bits { - u8 reserved_at_0[0xe]; + u8 reserved_at_0[0x2]; + u8 inner_l4_type[0x1]; + u8 outer_l4_type[0x1]; + u8 reserved_at_4[0xa]; u8 bth_opcode[0x1]; u8 reserved_at_f[0x1]; u8 tunnel_header_0_1[0x1]; @@ -525,6 +528,12 @@ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits { u8 reserved_at_0[0x80]; }; +enum { + MLX5_PACKET_L4_TYPE_NONE, + MLX5_PACKET_L4_TYPE_TCP, + MLX5_PACKET_L4_TYPE_UDP, +}; + struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 smac_47_16[0x20]; @@ -550,7 +559,8 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 tcp_sport[0x10]; u8 tcp_dport[0x10]; - u8 reserved_at_c0[0x10]; + u8 l4_type[0x2]; + u8 reserved_at_c2[0xe]; u8 ipv4_ihl[0x4]; u8 reserved_at_c4[0x4]; @@ -846,7 +856,11 @@ struct mlx5_ifc_flow_table_nic_cap_bits { struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer; - u8 reserved_at_e00[0x700]; + u8 reserved_at_e00[0x600]; + + struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_nic_receive; + + u8 reserved_at_1480[0x80]; struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_nic_receive_rdma; @@ -876,7 +890,9 @@ struct mlx5_ifc_port_selection_cap_bits { struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_port_selection; - u8 reserved_at_400[0x7c00]; + struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_port_selection; + + u8 reserved_at_480[0x7b80]; }; enum { @@ -2004,7 +2020,13 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_3a0[0x10]; u8 max_rqt_vhca_id[0x10]; - u8 reserved_at_3c0[0x440]; + u8 reserved_at_3c0[0x20]; + + u8 reserved_at_3e0[0x10]; + u8 pcc_ifa2[0x1]; + u8 reserved_at_3f1[0xf]; + + u8 reserved_at_400[0x400]; }; enum mlx5_ifc_flow_destination_type { -- cgit v1.2.3 From c788d79cfa6b68b1068b25e9cdff25d6035ed191 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 2 Apr 2024 16:30:42 +0300 Subject: net/mlx5: Skip pages EQ creation for non-page supplier function Page events are not issued by device on the function if page_request_disable is set, so no need to create pages EQ. Signed-off-by: Jianbo Liu Reviewed-by: Parav Pandit Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240402133043.56322-11-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 9 ++++++++- include/linux/mlx5/mlx5_ifc.h | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 40a6cb052a2d..5693986ae656 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -688,6 +688,12 @@ static int create_async_eqs(struct mlx5_core_dev *dev) if (err) goto err2; + /* Skip page eq creation when the device does not request for page requests */ + if (MLX5_CAP_GEN(dev, page_request_disable)) { + mlx5_core_dbg(dev, "Skip page EQ creation\n"); + return 0; + } + param = (struct mlx5_eq_param) { .irq = table->ctrl_irq, .nent = /* TODO: sriov max_vf + */ 1, @@ -716,7 +722,8 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - cleanup_async_eq(dev, &table->pages_eq, "pages"); + if (!MLX5_CAP_GEN(dev, page_request_disable)) + cleanup_async_eq(dev, &table->pages_eq, "pages"); cleanup_async_eq(dev, &table->async_eq, "async"); mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ); mlx5_cmd_use_polling(dev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 40f6fa138e27..cc159d8563d1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1485,7 +1485,9 @@ enum { }; struct mlx5_ifc_cmd_hca_cap_bits { - u8 reserved_at_0[0x10]; + u8 reserved_at_0[0x6]; + u8 page_request_disable[0x1]; + u8 reserved_at_7[0x9]; u8 shared_object_to_user_object_allowed[0x1]; u8 reserved_at_13[0xe]; u8 vhca_resource_manager[0x1]; -- cgit v1.2.3 From 07e1bc785a91e1cb621ac6a098acb222fb0811cf Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 2 Apr 2024 16:30:43 +0300 Subject: net/mlx5: Don't call give_pages() if request 0 page Firmware will return 0 on query BOOT/INIT PAGES for non-page supplier functions (external host PF/VF/SF), so no page is needed to be allocated for them. Signed-off-by: Jianbo Liu Reviewed-by: Parav Pandit Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240402133043.56322-12-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index dcf58efac159..d894a88fa9f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -660,6 +660,9 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", npages, boot ? "boot" : "init", func_id); + if (!npages) + return 0; + return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev)); } -- cgit v1.2.3 From d38718a525a3017586daebc3cda22c25cd04d14c Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:14 +0200 Subject: mlxsw: pci: Move mlxsw_pci_eq_{init, fini}() Move mlxsw_pci_eq_{init, fini}() after mlxsw_pci_eq_tasklet() as a next patch will setup the tasklet as part of initialization. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/7ae120a02e1c490084daae7e684a0d40b7cce4e7.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 72 +++++++++++++++---------------- 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index af99bf17eb36..efb9472a2069 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -765,42 +765,6 @@ static u8 mlxsw_pci_cq_elem_size(const struct mlxsw_pci_queue *q) MLXSW_PCI_CQE01_SIZE; } -static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, - struct mlxsw_pci_queue *q) -{ - int i; - int err; - - q->consumer_counter = 0; - - for (i = 0; i < q->count; i++) { - char *elem = mlxsw_pci_queue_elem_get(q, i); - - mlxsw_pci_eqe_owner_set(elem, 1); - } - - mlxsw_cmd_mbox_sw2hw_eq_int_msix_set(mbox, 1); /* MSI-X used */ - mlxsw_cmd_mbox_sw2hw_eq_st_set(mbox, 1); /* armed */ - mlxsw_cmd_mbox_sw2hw_eq_log_eq_size_set(mbox, ilog2(q->count)); - for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { - dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); - - mlxsw_cmd_mbox_sw2hw_eq_pa_set(mbox, i, mapaddr); - } - err = mlxsw_cmd_sw2hw_eq(mlxsw_pci->core, mbox, q->num); - if (err) - return err; - mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); - mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); - return 0; -} - -static void mlxsw_pci_eq_fini(struct mlxsw_pci *mlxsw_pci, - struct mlxsw_pci_queue *q) -{ - mlxsw_cmd_hw2sw_eq(mlxsw_pci->core, q->num); -} - static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe) { mlxsw_pci->cmd.comp.status = mlxsw_pci_eqe_cmd_status_get(eqe); @@ -877,6 +841,42 @@ static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) } } +static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q) +{ + int i; + int err; + + q->consumer_counter = 0; + + for (i = 0; i < q->count; i++) { + char *elem = mlxsw_pci_queue_elem_get(q, i); + + mlxsw_pci_eqe_owner_set(elem, 1); + } + + mlxsw_cmd_mbox_sw2hw_eq_int_msix_set(mbox, 1); /* MSI-X used */ + mlxsw_cmd_mbox_sw2hw_eq_st_set(mbox, 1); /* armed */ + mlxsw_cmd_mbox_sw2hw_eq_log_eq_size_set(mbox, ilog2(q->count)); + for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { + dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); + + mlxsw_cmd_mbox_sw2hw_eq_pa_set(mbox, i, mapaddr); + } + err = mlxsw_cmd_sw2hw_eq(mlxsw_pci->core, mbox, q->num); + if (err) + return err; + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + return 0; +} + +static void mlxsw_pci_eq_fini(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + mlxsw_cmd_hw2sw_eq(mlxsw_pci->core, q->num); +} + struct mlxsw_pci_queue_ops { const char *name; enum mlxsw_pci_queue_type type; -- cgit v1.2.3 From f46de9f0e70c9e518a995194aea15ecaedffc0fa Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:15 +0200 Subject: mlxsw: pci: Move mlxsw_pci_cq_{init, fini}() Move mlxsw_pci_cq_{init, fini}() after mlxsw_pci_cq_tasklet() as a next patch will setup the tasklet as part of initialization. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/25196cb5baf5acf6ec1e956203790e018ba8e306.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 86 +++++++++++++++---------------- 1 file changed, 43 insertions(+), 43 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index efb9472a2069..3fec3909d3c7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -470,49 +470,6 @@ static void mlxsw_pci_cq_pre_init(struct mlxsw_pci *mlxsw_pci, q->u.cq.v = MLXSW_PCI_CQE_V1; } -static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, - struct mlxsw_pci_queue *q) -{ - int i; - int err; - - q->consumer_counter = 0; - - for (i = 0; i < q->count; i++) { - char *elem = mlxsw_pci_queue_elem_get(q, i); - - mlxsw_pci_cqe_owner_set(q->u.cq.v, elem, 1); - } - - if (q->u.cq.v == MLXSW_PCI_CQE_V1) - mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, - MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1); - else if (q->u.cq.v == MLXSW_PCI_CQE_V2) - mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, - MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2); - - mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM); - mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0); - mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count)); - for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { - dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); - - mlxsw_cmd_mbox_sw2hw_cq_pa_set(mbox, i, mapaddr); - } - err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num); - if (err) - return err; - mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); - mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); - return 0; -} - -static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci, - struct mlxsw_pci_queue *q) -{ - mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num); -} - static unsigned int mlxsw_pci_read32_off(struct mlxsw_pci *mlxsw_pci, ptrdiff_t off) { @@ -753,6 +710,49 @@ static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); } +static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_pci_queue *q) +{ + int i; + int err; + + q->consumer_counter = 0; + + for (i = 0; i < q->count; i++) { + char *elem = mlxsw_pci_queue_elem_get(q, i); + + mlxsw_pci_cqe_owner_set(q->u.cq.v, elem, 1); + } + + if (q->u.cq.v == MLXSW_PCI_CQE_V1) + mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, + MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1); + else if (q->u.cq.v == MLXSW_PCI_CQE_V2) + mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, + MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2); + + mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM); + mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0); + mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count)); + for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { + dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); + + mlxsw_cmd_mbox_sw2hw_cq_pa_set(mbox, i, mapaddr); + } + err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num); + if (err) + return err; + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + return 0; +} + +static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci, + struct mlxsw_pci_queue *q) +{ + mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num); +} + static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q) { return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT : -- cgit v1.2.3 From fb29028ae7181fbe268d94f6b69d172da4e3640f Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:16 +0200 Subject: mlxsw: pci: Do not setup tasklet from operation Currently, the structure 'mlxsw_pci_queue_ops' holds a pointer to the callback function of tasklet. This is used only for EQ and CQ. mlxsw driver will use NAPI in a following patch set, so CQ will not use tasklet anymore. As preparation, remove this pointer from the shared operation structure and setup the tasklet as part of queue initialization. For now, setup tasklet for EQ and CQ. Later, CQ code will be changed. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/a326cae5fc1ad085a1a063c004983de6fe389414.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 3fec3909d3c7..cd871dbfeb02 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -742,6 +742,7 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num); if (err) return err; + tasklet_setup(&q->tasklet, mlxsw_pci_cq_tasklet); mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); return 0; @@ -866,6 +867,7 @@ static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, err = mlxsw_cmd_sw2hw_eq(mlxsw_pci->core, mbox, q->num); if (err) return err; + tasklet_setup(&q->tasklet, mlxsw_pci_eq_tasklet); mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); return 0; @@ -886,7 +888,6 @@ struct mlxsw_pci_queue_ops { struct mlxsw_pci_queue *q); void (*fini)(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q); - void (*tasklet)(struct tasklet_struct *t); u16 (*elem_count_f)(const struct mlxsw_pci_queue *q); u8 (*elem_size_f)(const struct mlxsw_pci_queue *q); u16 elem_count; @@ -914,7 +915,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = { .pre_init = mlxsw_pci_cq_pre_init, .init = mlxsw_pci_cq_init, .fini = mlxsw_pci_cq_fini, - .tasklet = mlxsw_pci_cq_tasklet, .elem_count_f = mlxsw_pci_cq_elem_count, .elem_size_f = mlxsw_pci_cq_elem_size }; @@ -923,7 +923,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = { .type = MLXSW_PCI_QUEUE_TYPE_EQ, .init = mlxsw_pci_eq_init, .fini = mlxsw_pci_eq_fini, - .tasklet = mlxsw_pci_eq_tasklet, .elem_count = MLXSW_PCI_EQE_COUNT, .elem_size = MLXSW_PCI_EQE_SIZE }; @@ -948,9 +947,6 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox, q->type = q_ops->type; q->pci = mlxsw_pci; - if (q_ops->tasklet) - tasklet_setup(&q->tasklet, q_ops->tasklet); - mem_item->size = MLXSW_PCI_AQ_SIZE; mem_item->buf = dma_alloc_coherent(&mlxsw_pci->pdev->dev, mem_item->size, &mem_item->mapaddr, -- cgit v1.2.3 From 38b124cb4ee5d3567cd138af33bbd6fefbfdb703 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:17 +0200 Subject: mlxsw: pci: Arm CQ doorbell regardless of number of completions Currently, as part of mlxsw_pci_cq_tasklet(), we check if any item was handled, and only in such case we arm doorbell. This is unlikely case, as we schedule tasklet only for CQs that we get an event for them, which means that they contain completions to handle. Remove this check, which is supposed to be true always, and even if it is false, it is not a mistake to ring the doorbell. We can warn on such case, but it is not really worth to add a check which will be run for each CQ handling when we do not expect to reach it and it does not point to logic error that should be handled. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/f8efa481bfe7bebb9f93bb803f44ab7da77f53e6.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index cd871dbfeb02..799445111144 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -706,8 +706,8 @@ static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) if (++items == credits) break; } - if (items) - mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); } static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, -- cgit v1.2.3 From 57beea8e5667241638c62568ce9a2cb7391cfd4a Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:18 +0200 Subject: mlxsw: pci: Remove unused counters The structure 'mlxsw_pci_queue' stores several counters which were consumed via debugfs. Since commit 9a32562becd9 ("mlxsw: Remove debugfs interface"), these counters are not used. Remove them. This makes the 'union u' and 'struct eq' redundant. Maintain 'struct cq' as it will be extended later. Replace increasing 'q->u.eq.ev_other_count' with WARN_ON_ONCE(), as it is used in an unreasonable case of receiving event in EQ which is not EQ0 or EQ1. When the queues are initialized, we check number of event queues and fail with the print "Unsupported number of queues" in case that the driver tries to initialize more than two queues. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ee9e658800aa0390e08342100bc27daff4c176c0.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 49 ++++++++++++------------------- 1 file changed, 18 insertions(+), 31 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 799445111144..ec18fe0c2117 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -80,18 +80,9 @@ struct mlxsw_pci_queue { enum mlxsw_pci_queue_type type; struct tasklet_struct tasklet; /* queue processing tasklet */ struct mlxsw_pci *pci; - union { - struct { - u32 comp_sdq_count; - u32 comp_rdq_count; - enum mlxsw_pci_cqe_v v; - } cq; - struct { - u32 ev_cmd_count; - u32 ev_comp_count; - u32 ev_other_count; - } eq; - } u; + struct { + enum mlxsw_pci_cqe_v v; + } cq; }; struct mlxsw_pci_queue_type_group { @@ -462,12 +453,12 @@ static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci, static void mlxsw_pci_cq_pre_init(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q) { - q->u.cq.v = mlxsw_pci->max_cqe_ver; + q->cq.v = mlxsw_pci->max_cqe_ver; - if (q->u.cq.v == MLXSW_PCI_CQE_V2 && + if (q->cq.v == MLXSW_PCI_CQE_V2 && q->num < mlxsw_pci->num_sdq_cqs && !mlxsw_core_sdq_supports_cqe_v2(mlxsw_pci->core)) - q->u.cq.v = MLXSW_PCI_CQE_V1; + q->cq.v = MLXSW_PCI_CQE_V1; } static unsigned int mlxsw_pci_read32_off(struct mlxsw_pci *mlxsw_pci, @@ -663,7 +654,7 @@ static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); elem = elem_info->elem; - owner_bit = mlxsw_pci_cqe_owner_get(q->u.cq.v, elem); + owner_bit = mlxsw_pci_cqe_owner_get(q->cq.v, elem); if (mlxsw_pci_elem_hw_owned(q, owner_bit)) return NULL; q->consumer_counter++; @@ -681,8 +672,8 @@ static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) { u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); - u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe); - u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe); + u8 sendq = mlxsw_pci_cqe_sr_get(q->cq.v, cqe); + u8 dqn = mlxsw_pci_cqe_dqn_get(q->cq.v, cqe); char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; memcpy(ncqe, cqe, q->elem_size); @@ -693,15 +684,13 @@ static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, - wqe_counter, q->u.cq.v, ncqe); - q->u.cq.comp_sdq_count++; + wqe_counter, q->cq.v, ncqe); } else { struct mlxsw_pci_queue *rdq; rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, - wqe_counter, q->u.cq.v, ncqe); - q->u.cq.comp_rdq_count++; + wqe_counter, q->cq.v, ncqe); } if (++items == credits) break; @@ -721,13 +710,13 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, for (i = 0; i < q->count; i++) { char *elem = mlxsw_pci_queue_elem_get(q, i); - mlxsw_pci_cqe_owner_set(q->u.cq.v, elem, 1); + mlxsw_pci_cqe_owner_set(q->cq.v, elem, 1); } - if (q->u.cq.v == MLXSW_PCI_CQE_V1) + if (q->cq.v == MLXSW_PCI_CQE_V1) mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1); - else if (q->u.cq.v == MLXSW_PCI_CQE_V2) + else if (q->cq.v == MLXSW_PCI_CQE_V2) mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2); @@ -756,13 +745,13 @@ static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci, static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q) { - return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT : - MLXSW_PCI_CQE01_COUNT; + return q->cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT : + MLXSW_PCI_CQE01_COUNT; } static u8 mlxsw_pci_cq_elem_size(const struct mlxsw_pci_queue *q) { - return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_SIZE : + return q->cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_SIZE : MLXSW_PCI_CQE01_SIZE; } @@ -815,16 +804,14 @@ static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) switch (q->num) { case MLXSW_PCI_EQ_ASYNC_NUM: mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe); - q->u.eq.ev_cmd_count++; break; case MLXSW_PCI_EQ_COMP_NUM: cqn = mlxsw_pci_eqe_cqn_get(eqe); set_bit(cqn, active_cqns); cq_handle = true; - q->u.eq.ev_comp_count++; break; default: - q->u.eq.ev_other_count++; + WARN_ON_ONCE(1); } if (++items == credits) break; -- cgit v1.2.3 From 29ad2a990648c5e6ec93101cb3719dc820d363e9 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:19 +0200 Subject: mlxsw: pci: Make style changes in mlxsw_pci_eq_tasklet() This function will be used later only for EQ1. As preparation, reorder variables to reverse xmas tree and return earlier when it is possible, to simplify the code. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/2412d6c135b2a6aedb4484f5d8baab3aecd7b9ae.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index ec18fe0c2117..5688c14f7426 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -783,15 +783,14 @@ static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q) static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) { + unsigned long active_cqns[BITS_TO_LONGS(MLXSW_PCI_CQS_MAX)]; struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); struct mlxsw_pci *mlxsw_pci = q->pci; - u8 cq_count = mlxsw_pci_cq_count(mlxsw_pci); - unsigned long active_cqns[BITS_TO_LONGS(MLXSW_PCI_CQS_MAX)]; - char *eqe; - u8 cqn; + int credits = q->count >> 1; bool cq_handle = false; + u8 cqn, cq_count; int items = 0; - int credits = q->count >> 1; + char *eqe; memset(&active_cqns, 0, sizeof(active_cqns)); @@ -816,13 +815,17 @@ static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) if (++items == credits) break; } - if (items) { - mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); - mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); - } + + if (!items) + return; + + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); if (!cq_handle) return; + + cq_count = mlxsw_pci_cq_count(mlxsw_pci); for_each_set_bit(cqn, active_cqns, cq_count) { q = mlxsw_pci_cq_get(mlxsw_pci, cqn); mlxsw_pci_queue_tasklet_schedule(q); -- cgit v1.2.3 From d4b3930b19f7339c3fb98fc3ed71388b0180e7a7 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:20 +0200 Subject: mlxsw: pci: Poll command interface for each cmd_exec() Command interface is used for configuring and querying FW when EMADs are not available. During the time that the driver sets up the asynchronous queues, it polls the command interface for getting completions. Then, there is a short period when asynchronous queues work, but EMADs are not available (marked in the code as nopoll = true). During this time, we send commands via command interface, but we do not poll it, as we can get an interrupt for the completion. Completions of command interface are received from HW in EQ0 (event queue 0). The usage of EQ0 instead of polling is done only 4 times during initialization and one time during tear down, but it makes an overhead during lifetime of the driver. For each interrupt, we have to check if we get events in EQ0 or EQ1 and handle them. This is really ineffective, especially because of the fact that EQ0 is used only as part of driver init/fini. Instead, we can poll command interface for each call of cmd_exec(). It means that when we send a command via command interface (as EMADs are not available), we will poll it, regardless of availability of the asynchronous queues. This will allow us to configure later only EQ1 and simplify the flow. Remove 'nopoll' indication and change mlxsw_pci_cmd_exec() to poll till answer/timeout regardless of queues' state. For now, completions are handled also by EQ0, but it will be removed in next patch. Additional cleanups will be added in next patches. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/e674c70380ceda953e0e45a77334c5d22e69938f.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 48 +++++++++++-------------------- 1 file changed, 17 insertions(+), 31 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 5688c14f7426..2ee397922936 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -111,7 +111,6 @@ struct mlxsw_pci { struct mlxsw_pci_mem_item out_mbox; struct mlxsw_pci_mem_item in_mbox; struct mutex lock; /* Lock access to command registers */ - bool nopoll; wait_queue_head_t wait; bool wait_done; struct { @@ -1105,8 +1104,6 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) goto err_rdqs_init; } - /* We have to poll in command interface until queues are initialized */ - mlxsw_pci->cmd.nopoll = true; return 0; err_rdqs_init: @@ -1120,7 +1117,6 @@ err_cqs_init: static void mlxsw_pci_aqs_fini(struct mlxsw_pci *mlxsw_pci) { - mlxsw_pci->cmd.nopoll = false; mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_rdq_ops); mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_sdq_ops); mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_cq_ops); @@ -1848,9 +1844,9 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, { struct mlxsw_pci *mlxsw_pci = bus_priv; dma_addr_t in_mapaddr = 0, out_mapaddr = 0; - bool evreq = mlxsw_pci->cmd.nopoll; unsigned long timeout = msecs_to_jiffies(MLXSW_PCI_CIR_TIMEOUT_MSECS); bool *p_wait_done = &mlxsw_pci->cmd.wait_done; + unsigned long end; int err; *p_status = MLXSW_CMD_STATUS_OK; @@ -1879,28 +1875,20 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, wmb(); /* all needs to be written before we write control register */ mlxsw_pci_write32(mlxsw_pci, CIR_CTRL, MLXSW_PCI_CIR_CTRL_GO_BIT | - (evreq ? MLXSW_PCI_CIR_CTRL_EVREQ_BIT : 0) | (opcode_mod << MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT) | opcode); - if (!evreq) { - unsigned long end; - - end = jiffies + timeout; - do { - u32 ctrl = mlxsw_pci_read32(mlxsw_pci, CIR_CTRL); + end = jiffies + timeout; + do { + u32 ctrl = mlxsw_pci_read32(mlxsw_pci, CIR_CTRL); - if (!(ctrl & MLXSW_PCI_CIR_CTRL_GO_BIT)) { - *p_wait_done = true; - *p_status = ctrl >> MLXSW_PCI_CIR_CTRL_STATUS_SHIFT; - break; - } - cond_resched(); - } while (time_before(jiffies, end)); - } else { - wait_event_timeout(mlxsw_pci->cmd.wait, *p_wait_done, timeout); - *p_status = mlxsw_pci->cmd.comp.status; - } + if (!(ctrl & MLXSW_PCI_CIR_CTRL_GO_BIT)) { + *p_wait_done = true; + *p_status = ctrl >> MLXSW_PCI_CIR_CTRL_STATUS_SHIFT; + break; + } + cond_resched(); + } while (time_before(jiffies, end)); err = 0; if (*p_wait_done) { @@ -1917,14 +1905,12 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, */ __be32 tmp; - if (!evreq) { - tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci, - CIR_OUT_PARAM_HI)); - memcpy(out_mbox, &tmp, sizeof(tmp)); - tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci, - CIR_OUT_PARAM_LO)); - memcpy(out_mbox + sizeof(tmp), &tmp, sizeof(tmp)); - } + tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci, + CIR_OUT_PARAM_HI)); + memcpy(out_mbox, &tmp, sizeof(tmp)); + tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci, + CIR_OUT_PARAM_LO)); + memcpy(out_mbox + sizeof(tmp), &tmp, sizeof(tmp)); } else if (!err && out_mbox) { memcpy(out_mbox, mlxsw_pci->cmd.out_mbox.buf, out_mbox_size); } -- cgit v1.2.3 From 7bc6a3098c3821b50e9ef54e22a1a9d701f6fe00 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:21 +0200 Subject: mlxsw: pci: Rename MLXSW_PCI_EQS_COUNT Currently we use MLXSW_PCI_EQS_COUNT event queues. A next patch will change the driver to initialize only EQ1, as EQ0 is not required anymore when we poll command interface. Rename the macro to MLXSW_PCI_EQS_MAX as later we will not initialize the maximum supported EQs, this value represents the maximum and a new macro will be added to represent the actual used queues. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/b08df430b62f23ca1aa3aaa257896d2d95aa7691.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 2ee397922936..4ee8d71f0697 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1059,7 +1059,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) if (num_sdqs + num_rdqs > num_cqs || num_sdqs < MLXSW_PCI_SDQS_MIN || - num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_COUNT) { + num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_MAX) { dev_err(&pdev->dev, "Unsupported number of queues\n"); return -EINVAL; } @@ -1416,7 +1416,7 @@ static irqreturn_t mlxsw_pci_eq_irq_handler(int irq, void *dev_id) struct mlxsw_pci_queue *q; int i; - for (i = 0; i < MLXSW_PCI_EQS_COUNT; i++) { + for (i = 0; i < MLXSW_PCI_EQS_MAX; i++) { q = mlxsw_pci_eq_get(mlxsw_pci, i); mlxsw_pci_queue_tasklet_schedule(q); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 7cdf0ce24f28..32a4f436d24d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -42,7 +42,7 @@ ((offset) + (type_offset) + (num) * 4) #define MLXSW_PCI_CQS_MAX 96 -#define MLXSW_PCI_EQS_COUNT 2 +#define MLXSW_PCI_EQS_MAX 2 #define MLXSW_PCI_EQ_ASYNC_NUM 0 #define MLXSW_PCI_EQ_COMP_NUM 1 -- cgit v1.2.3 From 6fc280a365151d09c748eba1a90ea12e4615317e Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:22 +0200 Subject: mlxsw: pci: Use only one event queue The device supports two event queues. EQ0 is used for command interface completion events. EQ1 is used for completion events of RDQ or SDQ. Currently, for each EQE (event queue element), we check the queue number and handle accordingly. More than that, for each interrupt we schedule tasklets for both EQs. This is really ineffective, especially because of the fact that EQ0 is used only as part of driver init/fini, when EMADs are not available. There is no point to schedule the tasklet for it and check each EQE. A previous patch changed the code to poll command interface for each use of it. It means that now there is no real reason to use EQ0, as we poll the command interface. Initialize only one event queue and use it as EQ1 (this is determined by queue number). Then, for each interrupt we can schedule the tasklet only for one queue and we do not have to check the queue number. This simplifies the code and should improve performance. Note that polling command interface is ok as we use it only as part of driver init/fini. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/23d764f5c032e4c363b98590b746a4b32d2bf900.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 54 ++++++++-------------------- drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 2 +- 2 files changed, 16 insertions(+), 40 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 4ee8d71f0697..6b8596f2feb9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -223,10 +223,10 @@ static struct mlxsw_pci_queue *mlxsw_pci_cq_get(struct mlxsw_pci *mlxsw_pci, return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ, q_num); } -static struct mlxsw_pci_queue *mlxsw_pci_eq_get(struct mlxsw_pci *mlxsw_pci, - u8 q_num) +static struct mlxsw_pci_queue *mlxsw_pci_eq_get(struct mlxsw_pci *mlxsw_pci) { - return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ, q_num); + /* There is only one EQ at index 0. */ + return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ, 0); } static void __mlxsw_pci_queue_doorbell_set(struct mlxsw_pci *mlxsw_pci, @@ -754,16 +754,6 @@ static u8 mlxsw_pci_cq_elem_size(const struct mlxsw_pci_queue *q) MLXSW_PCI_CQE01_SIZE; } -static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe) -{ - mlxsw_pci->cmd.comp.status = mlxsw_pci_eqe_cmd_status_get(eqe); - mlxsw_pci->cmd.comp.out_param = - ((u64) mlxsw_pci_eqe_cmd_out_param_h_get(eqe)) << 32 | - mlxsw_pci_eqe_cmd_out_param_l_get(eqe); - mlxsw_pci->cmd.wait_done = true; - wake_up(&mlxsw_pci->cmd.wait); -} - static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q) { struct mlxsw_pci_queue_elem_info *elem_info; @@ -786,7 +776,6 @@ static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); struct mlxsw_pci *mlxsw_pci = q->pci; int credits = q->count >> 1; - bool cq_handle = false; u8 cqn, cq_count; int items = 0; char *eqe; @@ -794,23 +783,9 @@ static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) memset(&active_cqns, 0, sizeof(active_cqns)); while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) { + cqn = mlxsw_pci_eqe_cqn_get(eqe); + set_bit(cqn, active_cqns); - /* Command interface completion events are always received on - * queue MLXSW_PCI_EQ_ASYNC_NUM (EQ0) and completion events - * are mapped to queue MLXSW_PCI_EQ_COMP_NUM (EQ1). - */ - switch (q->num) { - case MLXSW_PCI_EQ_ASYNC_NUM: - mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe); - break; - case MLXSW_PCI_EQ_COMP_NUM: - cqn = mlxsw_pci_eqe_cqn_get(eqe); - set_bit(cqn, active_cqns); - cq_handle = true; - break; - default: - WARN_ON_ONCE(1); - } if (++items == credits) break; } @@ -821,9 +796,6 @@ static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); - if (!cq_handle) - return; - cq_count = mlxsw_pci_cq_count(mlxsw_pci); for_each_set_bit(cqn, active_cqns, cq_count) { q = mlxsw_pci_cq_get(mlxsw_pci, cqn); @@ -837,6 +809,13 @@ static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, int i; int err; + /* We expect to initialize only one EQ, which gets num=0 as it is + * located at index zero. We use the EQ as EQ1, so set the number for + * future use. + */ + WARN_ON_ONCE(q->num); + q->num = MLXSW_PCI_EQ_COMP_NUM; + q->consumer_counter = 0; for (i = 0; i < q->count; i++) { @@ -1077,7 +1056,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) mlxsw_pci->num_sdq_cqs = num_sdqs; err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_eq_ops, - num_eqs); + MLXSW_PCI_EQS_COUNT); if (err) { dev_err(&pdev->dev, "Failed to initialize event queues\n"); return err; @@ -1414,12 +1393,9 @@ static irqreturn_t mlxsw_pci_eq_irq_handler(int irq, void *dev_id) { struct mlxsw_pci *mlxsw_pci = dev_id; struct mlxsw_pci_queue *q; - int i; - for (i = 0; i < MLXSW_PCI_EQS_MAX; i++) { - q = mlxsw_pci_eq_get(mlxsw_pci, i); - mlxsw_pci_queue_tasklet_schedule(q); - } + q = mlxsw_pci_eq_get(mlxsw_pci); + mlxsw_pci_queue_tasklet_schedule(q); return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 32a4f436d24d..6bed495dcf0f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -43,7 +43,7 @@ #define MLXSW_PCI_CQS_MAX 96 #define MLXSW_PCI_EQS_MAX 2 -#define MLXSW_PCI_EQ_ASYNC_NUM 0 +#define MLXSW_PCI_EQS_COUNT 1 #define MLXSW_PCI_EQ_COMP_NUM 1 #define MLXSW_PCI_SDQS_MIN 2 /* EMAD and control traffic */ -- cgit v1.2.3 From 2c200863fcc78ec1797b0a7a36303e9466b0617d Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:23 +0200 Subject: mlxsw: pci: Remove unused wait queue The previous patch changed the code to do not handle command interface from event queue. With this change the wait queue is not used anymore. Remove it and 'wait_done' variable. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/f3af6a5a9dabd97d2920cefe475c6aa57767f504.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 6b8596f2feb9..6ca782f37c2f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -111,8 +110,6 @@ struct mlxsw_pci { struct mlxsw_pci_mem_item out_mbox; struct mlxsw_pci_mem_item in_mbox; struct mutex lock; /* Lock access to command registers */ - wait_queue_head_t wait; - bool wait_done; struct { u8 status; u64 out_param; @@ -1821,8 +1818,8 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, struct mlxsw_pci *mlxsw_pci = bus_priv; dma_addr_t in_mapaddr = 0, out_mapaddr = 0; unsigned long timeout = msecs_to_jiffies(MLXSW_PCI_CIR_TIMEOUT_MSECS); - bool *p_wait_done = &mlxsw_pci->cmd.wait_done; unsigned long end; + bool wait_done; int err; *p_status = MLXSW_CMD_STATUS_OK; @@ -1846,7 +1843,7 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, mlxsw_pci_write32(mlxsw_pci, CIR_IN_MODIFIER, in_mod); mlxsw_pci_write32(mlxsw_pci, CIR_TOKEN, 0); - *p_wait_done = false; + wait_done = false; wmb(); /* all needs to be written before we write control register */ mlxsw_pci_write32(mlxsw_pci, CIR_CTRL, @@ -1859,7 +1856,7 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, u32 ctrl = mlxsw_pci_read32(mlxsw_pci, CIR_CTRL); if (!(ctrl & MLXSW_PCI_CIR_CTRL_GO_BIT)) { - *p_wait_done = true; + wait_done = true; *p_status = ctrl >> MLXSW_PCI_CIR_CTRL_STATUS_SHIFT; break; } @@ -1867,7 +1864,7 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, } while (time_before(jiffies, end)); err = 0; - if (*p_wait_done) { + if (wait_done) { if (*p_status) err = -EIO; } else { @@ -1965,7 +1962,6 @@ static int mlxsw_pci_cmd_init(struct mlxsw_pci *mlxsw_pci) int err; mutex_init(&mlxsw_pci->cmd.lock); - init_waitqueue_head(&mlxsw_pci->cmd.wait); err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.in_mbox); if (err) -- cgit v1.2.3 From a0639236d42060c8f0e994ddf87fbb0f3cb2b048 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:24 +0200 Subject: mlxsw: pci: Make style change in mlxsw_pci_cq_tasklet() This function will be broken into several functions later. As preparation, reorder variables to reverse xmas tree. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/7170a8f4429ecb5a539b0374c621697778ff8363.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 6ca782f37c2f..84bad681021e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -662,9 +662,9 @@ static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) { struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); struct mlxsw_pci *mlxsw_pci = q->pci; - char *cqe; - int items = 0; int credits = q->count >> 1; + int items = 0; + char *cqe; while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) { u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); -- cgit v1.2.3 From 1df7d871e3491de2f53afafedef8383185a3f613 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:25 +0200 Subject: mlxsw: pci: Break mlxsw_pci_cq_tasklet() into tasklets per queue type Completion queues are used for completions of RDQ or SDQ. Each completion queue is used for one DQ. The first CQs are used for SDQs and the rest are used for RDQs. Currently, for each CQE (completion queue element), we check 'sr' value (send/receive) to know if it is completion of RDQ or SDQ. Actually, we do not really have to check it, as according to the queue number we know if it handles completions of Rx or Tx. Break the tasklet into two - one for Rx (RDQ) and one for Tx (SDQ). Then, setup the appropriate tasklet for each queue as part of queue initialization. Use 'sr' value for unlikely case that we get completion with type that we do not expect. Call WARN_ON_ONCE() only after checking the value, to avoid calling this method for each completion. A next patch set will use NAPI to handle events, then we will have a separate poll method for Rx and Tx. This change is a preparation for NAPI usage. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/50fbc366f8de54cb5dc72a7c4f394333ef71f1d0.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 86 ++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 84bad681021e..c10e6f22f818 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -35,6 +35,11 @@ enum mlxsw_pci_queue_type { #define MLXSW_PCI_QUEUE_TYPE_COUNT 4 +enum mlxsw_pci_cq_type { + MLXSW_PCI_CQ_SDQ, + MLXSW_PCI_CQ_RDQ, +}; + static const u16 mlxsw_pci_doorbell_type_offset[] = { MLXSW_PCI_DOORBELL_SDQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_SDQ */ MLXSW_PCI_DOORBELL_RDQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_RDQ */ @@ -658,7 +663,7 @@ static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) return elem; } -static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) +static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) { struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); struct mlxsw_pci *mlxsw_pci = q->pci; @@ -671,23 +676,54 @@ static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) u8 sendq = mlxsw_pci_cqe_sr_get(q->cq.v, cqe); u8 dqn = mlxsw_pci_cqe_dqn_get(q->cq.v, cqe); char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; + struct mlxsw_pci_queue *rdq; + + if (unlikely(sendq)) { + WARN_ON_ONCE(1); + continue; + } memcpy(ncqe, cqe, q->elem_size); mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); - if (sendq) { - struct mlxsw_pci_queue *sdq; + rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); + mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, + wqe_counter, q->cq.v, ncqe); + + if (++items == credits) + break; + } - sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); - mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, - wqe_counter, q->cq.v, ncqe); - } else { - struct mlxsw_pci_queue *rdq; + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); +} - rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); - mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, - wqe_counter, q->cq.v, ncqe); +static void mlxsw_pci_cq_tx_tasklet(struct tasklet_struct *t) +{ + struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); + struct mlxsw_pci *mlxsw_pci = q->pci; + int credits = q->count >> 1; + int items = 0; + char *cqe; + + while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) { + u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); + u8 sendq = mlxsw_pci_cqe_sr_get(q->cq.v, cqe); + u8 dqn = mlxsw_pci_cqe_dqn_get(q->cq.v, cqe); + char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; + struct mlxsw_pci_queue *sdq; + + if (unlikely(!sendq)) { + WARN_ON_ONCE(1); + continue; } + + memcpy(ncqe, cqe, q->elem_size); + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + + sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); + mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, + wqe_counter, q->cq.v, ncqe); + if (++items == credits) break; } @@ -695,6 +731,32 @@ static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t) mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); } +static enum mlxsw_pci_cq_type +mlxsw_pci_cq_type(const struct mlxsw_pci *mlxsw_pci, + const struct mlxsw_pci_queue *q) +{ + /* Each CQ is mapped to one DQ. The first 'num_sdq_cqs' queues are used + * for SDQs and the rest are used for RDQs. + */ + if (q->num < mlxsw_pci->num_sdq_cqs) + return MLXSW_PCI_CQ_SDQ; + + return MLXSW_PCI_CQ_RDQ; +} + +static void mlxsw_pci_cq_tasklet_setup(struct mlxsw_pci_queue *q, + enum mlxsw_pci_cq_type cq_type) +{ + switch (cq_type) { + case MLXSW_PCI_CQ_SDQ: + tasklet_setup(&q->tasklet, mlxsw_pci_cq_tx_tasklet); + break; + case MLXSW_PCI_CQ_RDQ: + tasklet_setup(&q->tasklet, mlxsw_pci_cq_rx_tasklet); + break; + } +} + static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { @@ -727,7 +789,7 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num); if (err) return err; - tasklet_setup(&q->tasklet, mlxsw_pci_cq_tasklet); + mlxsw_pci_cq_tasklet_setup(q, mlxsw_pci_cq_type(mlxsw_pci, q)); mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); return 0; -- cgit v1.2.3 From 0cd1453b7e55064d06b49eebe34ffb43748ba12e Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:26 +0200 Subject: mlxsw: pci: Remove mlxsw_pci_sdq_count() The number of SDQs is stored as part of 'mlxsw_pci' structure. In some cases, the driver uses this value and in some cases it calls mlxsw_pci_sdq_count() to get the value. Align the code to use the stored value. This simplifies the code and makes it clearer that the value is always the same. Rename 'mlxsw_pci->num_sdq_cqs' to 'mlxsw_pci->num_sdqs' as now it is used not only in CQ context. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/0c8788506d9af35d589dbf64be35a508fd63d681.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index c10e6f22f818..592e93a530d0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -123,7 +123,7 @@ struct mlxsw_pci { struct mlxsw_bus_info bus_info; const struct pci_device_id *id; enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */ - u8 num_sdq_cqs; /* Number of CQs used for SDQs */ + u8 num_sdqs; /* Number of SDQs */ bool skip_reset; }; @@ -188,11 +188,6 @@ static u8 __mlxsw_pci_queue_count(struct mlxsw_pci *mlxsw_pci, return queue_group->count; } -static u8 mlxsw_pci_sdq_count(struct mlxsw_pci *mlxsw_pci) -{ - return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_SDQ); -} - static u8 mlxsw_pci_cq_count(struct mlxsw_pci *mlxsw_pci) { return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ); @@ -391,7 +386,7 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { struct mlxsw_pci_queue_elem_info *elem_info; - u8 sdq_count = mlxsw_pci_sdq_count(mlxsw_pci); + u8 sdq_count = mlxsw_pci->num_sdqs; int i; int err; @@ -457,7 +452,7 @@ static void mlxsw_pci_cq_pre_init(struct mlxsw_pci *mlxsw_pci, q->cq.v = mlxsw_pci->max_cqe_ver; if (q->cq.v == MLXSW_PCI_CQE_V2 && - q->num < mlxsw_pci->num_sdq_cqs && + q->num < mlxsw_pci->num_sdqs && !mlxsw_core_sdq_supports_cqe_v2(mlxsw_pci->core)) q->cq.v = MLXSW_PCI_CQE_V1; } @@ -735,10 +730,10 @@ static enum mlxsw_pci_cq_type mlxsw_pci_cq_type(const struct mlxsw_pci *mlxsw_pci, const struct mlxsw_pci_queue *q) { - /* Each CQ is mapped to one DQ. The first 'num_sdq_cqs' queues are used + /* Each CQ is mapped to one DQ. The first 'num_sdqs' queues are used * for SDQs and the rest are used for RDQs. */ - if (q->num < mlxsw_pci->num_sdq_cqs) + if (q->num < mlxsw_pci->num_sdqs) return MLXSW_PCI_CQ_SDQ; return MLXSW_PCI_CQ_RDQ; @@ -1112,7 +1107,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) return -EINVAL; } - mlxsw_pci->num_sdq_cqs = num_sdqs; + mlxsw_pci->num_sdqs = num_sdqs; err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_eq_ops, MLXSW_PCI_EQS_COUNT); @@ -1780,7 +1775,7 @@ static struct mlxsw_pci_queue * mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci, const struct mlxsw_tx_info *tx_info) { - u8 ctl_sdq_count = mlxsw_pci_sdq_count(mlxsw_pci) - 1; + u8 ctl_sdq_count = mlxsw_pci->num_sdqs - 1; u8 sdqn; if (tx_info->is_emad) { -- cgit v1.2.3 From 82238f0ddb4628af28e021e2ee07c7b7d97f2eb2 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:27 +0200 Subject: mlxsw: pci: Remove mlxsw_pci_cq_count() Currently, for each interrupt we call mlxsw_pci_cq_count() to determine the number of CQs. This call makes additional two function's calls. This can be removed by storing this value as part of structure 'mlxsw_pci', as we already do for number of SDQs. Remove the function and __mlxsw_pci_queue_count() which is now not used and store the value instead. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/f08ad113e8160678f3c8d401382a696c6c7f44c7.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 592e93a530d0..094b9d752a02 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -123,6 +123,7 @@ struct mlxsw_pci { struct mlxsw_bus_info bus_info; const struct pci_device_id *id; enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */ + u8 num_cqs; /* Number of CQs */ u8 num_sdqs; /* Number of SDQs */ bool skip_reset; }; @@ -179,20 +180,6 @@ mlxsw_pci_queue_type_group_get(struct mlxsw_pci *mlxsw_pci, return &mlxsw_pci->queues[q_type]; } -static u8 __mlxsw_pci_queue_count(struct mlxsw_pci *mlxsw_pci, - enum mlxsw_pci_queue_type q_type) -{ - struct mlxsw_pci_queue_type_group *queue_group; - - queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_type); - return queue_group->count; -} - -static u8 mlxsw_pci_cq_count(struct mlxsw_pci *mlxsw_pci) -{ - return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ); -} - static struct mlxsw_pci_queue * __mlxsw_pci_queue_get(struct mlxsw_pci *mlxsw_pci, enum mlxsw_pci_queue_type q_type, u8 q_num) @@ -850,7 +837,7 @@ static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); - cq_count = mlxsw_pci_cq_count(mlxsw_pci); + cq_count = mlxsw_pci->num_cqs; for_each_set_bit(cqn, active_cqns, cq_count) { q = mlxsw_pci_cq_get(mlxsw_pci, cqn); mlxsw_pci_queue_tasklet_schedule(q); @@ -1107,6 +1094,7 @@ static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox) return -EINVAL; } + mlxsw_pci->num_cqs = num_cqs; mlxsw_pci->num_sdqs = num_sdqs; err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_eq_ops, -- cgit v1.2.3 From 77c6e27df9e50dfe7deb5a0cfe8c30175b88d89a Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Tue, 2 Apr 2024 15:54:28 +0200 Subject: mlxsw: pci: Store DQ pointer as part of CQ structure Currently, for each completion, we check the number of descriptor queue and take it via mlxsw_pci_{sdq,rdq}_get(). This is inefficient, the DQ should be the same for all the completions in CQ, as each CQ handles only one DQ - SDQ or RDQ. This mapping is handled as part of DQ initialization via mlxsw_cmd_mbox_sw2hw_dq_cq_set(). Instead, as part of DQ initialization, set DQ pointer in the appropriate CQ structure. When we handle completions, warn in case that the DQ number that we expect is different from the number we get in the CQE. Call WARN_ON_ONCE() only after checking the value, to avoid calling this method for each completion. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/a5b2559cd6d532c120f3194f89a1e257110318f1.1712062203.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 41 +++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 094b9d752a02..4d617057af25 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -86,6 +86,7 @@ struct mlxsw_pci_queue { struct mlxsw_pci *pci; struct { enum mlxsw_pci_cqe_v v; + struct mlxsw_pci_queue *dq; } cq; }; @@ -194,13 +195,6 @@ static struct mlxsw_pci_queue *mlxsw_pci_sdq_get(struct mlxsw_pci *mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_SDQ, q_num); } -static struct mlxsw_pci_queue *mlxsw_pci_rdq_get(struct mlxsw_pci *mlxsw_pci, - u8 q_num) -{ - return __mlxsw_pci_queue_get(mlxsw_pci, - MLXSW_PCI_QUEUE_TYPE_RDQ, q_num); -} - static struct mlxsw_pci_queue *mlxsw_pci_cq_get(struct mlxsw_pci *mlxsw_pci, u8 q_num) { @@ -265,7 +259,9 @@ static dma_addr_t __mlxsw_pci_queue_page_get(struct mlxsw_pci_queue *q, static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { + struct mlxsw_pci_queue *cq; int tclass; + u8 cq_num; int lp; int i; int err; @@ -278,7 +274,8 @@ static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_WQE; /* Set CQ of same number of this SDQ. */ - mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num); + cq_num = q->num; + mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, cq_num); mlxsw_cmd_mbox_sw2hw_dq_sdq_lp_set(mbox, lp); mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, tclass); mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ @@ -291,6 +288,9 @@ static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, err = mlxsw_cmd_sw2hw_sdq(mlxsw_pci->core, mbox, q->num); if (err) return err; + + cq = mlxsw_pci_cq_get(mlxsw_pci, cq_num); + cq->cq.dq = q; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); return 0; } @@ -374,6 +374,8 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, { struct mlxsw_pci_queue_elem_info *elem_info; u8 sdq_count = mlxsw_pci->num_sdqs; + struct mlxsw_pci_queue *cq; + u8 cq_num; int i; int err; @@ -383,7 +385,8 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, /* Set CQ of same number of this RDQ with base * above SDQ count as the lower ones are assigned to SDQs. */ - mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, sdq_count + q->num); + cq_num = sdq_count + q->num; + mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, cq_num); mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i); @@ -395,6 +398,9 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, if (err) return err; + cq = mlxsw_pci_cq_get(mlxsw_pci, cq_num); + cq->cq.dq = q; + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); for (i = 0; i < q->count; i++) { @@ -415,6 +421,7 @@ rollback: elem_info = mlxsw_pci_queue_elem_info_get(q, i); mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info); } + cq->cq.dq = NULL; mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num); return err; @@ -648,6 +655,7 @@ static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) { struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); + struct mlxsw_pci_queue *rdq = q->cq.dq; struct mlxsw_pci *mlxsw_pci = q->pci; int credits = q->count >> 1; int items = 0; @@ -658,17 +666,20 @@ static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) u8 sendq = mlxsw_pci_cqe_sr_get(q->cq.v, cqe); u8 dqn = mlxsw_pci_cqe_dqn_get(q->cq.v, cqe); char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; - struct mlxsw_pci_queue *rdq; if (unlikely(sendq)) { WARN_ON_ONCE(1); continue; } + if (unlikely(dqn != rdq->num)) { + WARN_ON_ONCE(1); + continue; + } + memcpy(ncqe, cqe, q->elem_size); mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); - rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, wqe_counter, q->cq.v, ncqe); @@ -682,6 +693,7 @@ static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) static void mlxsw_pci_cq_tx_tasklet(struct tasklet_struct *t) { struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); + struct mlxsw_pci_queue *sdq = q->cq.dq; struct mlxsw_pci *mlxsw_pci = q->pci; int credits = q->count >> 1; int items = 0; @@ -692,17 +704,20 @@ static void mlxsw_pci_cq_tx_tasklet(struct tasklet_struct *t) u8 sendq = mlxsw_pci_cqe_sr_get(q->cq.v, cqe); u8 dqn = mlxsw_pci_cqe_dqn_get(q->cq.v, cqe); char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; - struct mlxsw_pci_queue *sdq; if (unlikely(!sendq)) { WARN_ON_ONCE(1); continue; } + if (unlikely(dqn != sdq->num)) { + WARN_ON_ONCE(1); + continue; + } + memcpy(ncqe, cqe, q->elem_size); mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); - sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, wqe_counter, q->cq.v, ncqe); -- cgit v1.2.3 From d72b735712e65cccb85f81e6b26a14ea53cc6438 Mon Sep 17 00:00:00 2001 From: Nikita Kiryushin Date: Mon, 1 Apr 2024 22:14:18 +0300 Subject: tg3: Remove residual error handling in tg3_suspend As of now, tg3_power_down_prepare always ends with success, but the error handling code from former tg3_set_power_state call is still here. This code became unreachable in commit c866b7eac073 ("tg3: Do not use legacy PCI power management"). Remove (now unreachable) error handling code for simplification and change tg3_power_down_prepare to a void function as its result is no more checked. Signed-off-by: Nikita Kiryushin Reviewed-by: Michael Chan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240401191418.361747-1-kiryushin@ancud.ru Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/tg3.c | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 62ff4381ac83..e6ff3c9bd7e5 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -4019,7 +4019,7 @@ static int tg3_power_up(struct tg3 *tp) static int tg3_setup_phy(struct tg3 *, bool); -static int tg3_power_down_prepare(struct tg3 *tp) +static void tg3_power_down_prepare(struct tg3 *tp) { u32 misc_host_ctrl; bool device_should_wake, do_low_power; @@ -4263,7 +4263,7 @@ static int tg3_power_down_prepare(struct tg3 *tp) tg3_ape_driver_state_change(tp, RESET_KIND_SHUTDOWN); - return 0; + return; } static void tg3_power_down(struct tg3 *tp) @@ -18084,7 +18084,6 @@ static int tg3_suspend(struct device *device) { struct net_device *dev = dev_get_drvdata(device); struct tg3 *tp = netdev_priv(dev); - int err = 0; rtnl_lock(); @@ -18108,32 +18107,11 @@ static int tg3_suspend(struct device *device) tg3_flag_clear(tp, INIT_COMPLETE); tg3_full_unlock(tp); - err = tg3_power_down_prepare(tp); - if (err) { - int err2; - - tg3_full_lock(tp, 0); - - tg3_flag_set(tp, INIT_COMPLETE); - err2 = tg3_restart_hw(tp, true); - if (err2) - goto out; - - tg3_timer_start(tp); - - netif_device_attach(dev); - tg3_netif_start(tp); - -out: - tg3_full_unlock(tp); - - if (!err2) - tg3_phy_start(tp); - } + tg3_power_down_prepare(tp); unlock: rtnl_unlock(); - return err; + return 0; } static int tg3_resume(struct device *device) -- cgit v1.2.3 From 04172043bd218dbbfc0c13bd4630142eb3f027cb Mon Sep 17 00:00:00 2001 From: linke li Date: Wed, 3 Apr 2024 10:54:00 +0800 Subject: net: ethernet: mtk_eth_soc: Reuse value using READ_ONCE instead of re-rereading it In mtk_flow_entry_update_l2, the hwe->ib1 is read using READ_ONCE at the beginning of the function, checked, and then re-read from hwe->ib1, may void all guarantees of the checks. Reuse the value that was read by READ_ONCE to ensure the consistency of the ib1 throughout the function. Signed-off-by: linke li Link: https://lore.kernel.org/r/tencent_C699E9540505523424F11A9BD3D21B86840A@qq.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_ppe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 6ce0db3a1a92..0acee405a749 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -580,7 +580,7 @@ mtk_flow_entry_update_l2(struct mtk_ppe *ppe, struct mtk_flow_entry *entry) idle = cur_idle; entry->data.ib1 &= ~ib1_ts_mask; - entry->data.ib1 |= hwe->ib1 & ib1_ts_mask; + entry->data.ib1 |= ib1 & ib1_ts_mask; } } -- cgit v1.2.3 From d5ab32e9b02dc228444add3502261ad68391bf30 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Tue, 2 Apr 2024 02:37:47 -0700 Subject: bnxt_en: Add delay to handle Downstream Port Containment (DPC) AER In case of DPC, after issuing the hot reset, the kernel waits for 100ms for the device to complete the reset. However on some older chips, the firmware may take up to 1 second to complete the reset, only after which the driver can restart the card. Introduce delay of 900ms to handle this scenario on the older chipsets. Signed-off-by: Vikas Gupta Reviewed-by: Michael Chan Reviewed-by: Somnath Kotur Signed-off-by: Pavan Chebbi Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240402093753.331120-2-pavan.chebbi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b4db4b1aaffb..6e24a341ad28 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15550,6 +15550,10 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) netdev_info(bp->dev, "PCI Slot Reset\n"); + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && + test_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state)) + msleep(900); + rtnl_lock(); if (pci_enable_device(pdev)) { -- cgit v1.2.3 From 8635ae8e99a670b38198b7561c6c57b13418f108 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Tue, 2 Apr 2024 02:37:48 -0700 Subject: bnxt_en: Enable XPS by default on driver load Enable XPS on default during NIC open. The choice of Tx queue is based on the CPU executing the thread that submits the Tx request. The pool of Tx queues will be spread evenly across both device-attached NUMA nodes(local) and remote NUMA nodes. Signed-off-by: Somnath Kotur Reviewed-by: Michael Chan Signed-off-by: Pavan Chebbi Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240402093753.331120-3-pavan.chebbi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 46 ++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6e24a341ad28..44b9332c147e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11804,6 +11804,46 @@ static void bnxt_cfg_usr_fltrs(struct bnxt *bp) bnxt_cfg_one_usr_fltr(bp, usr_fltr); } +static int bnxt_set_xps_mapping(struct bnxt *bp) +{ + int numa_node = dev_to_node(&bp->pdev->dev); + unsigned int q_idx, map_idx, cpu, i; + const struct cpumask *cpu_mask_ptr; + int nr_cpus = num_online_cpus(); + cpumask_t *q_map; + int rc = 0; + + q_map = kcalloc(bp->tx_nr_rings_per_tc, sizeof(*q_map), GFP_KERNEL); + if (!q_map) + return -ENOMEM; + + /* Create CPU mask for all TX queues across MQPRIO traffic classes. + * Each TC has the same number of TX queues. The nth TX queue for each + * TC will have the same CPU mask. + */ + for (i = 0; i < nr_cpus; i++) { + map_idx = i % bp->tx_nr_rings_per_tc; + cpu = cpumask_local_spread(i, numa_node); + cpu_mask_ptr = get_cpu_mask(cpu); + cpumask_or(&q_map[map_idx], &q_map[map_idx], cpu_mask_ptr); + } + + /* Register CPU mask for each TX queue except the ones marked for XDP */ + for (q_idx = 0; q_idx < bp->dev->real_num_tx_queues; q_idx++) { + map_idx = q_idx % bp->tx_nr_rings_per_tc; + rc = netif_set_xps_queue(bp->dev, &q_map[map_idx], q_idx); + if (rc) { + netdev_warn(bp->dev, "Error setting XPS for q:%d\n", + q_idx); + break; + } + } + + kfree(q_map); + + return rc; +} + static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@ -11866,8 +11906,12 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) } } - if (irq_re_init) + if (irq_re_init) { udp_tunnel_nic_reset_ntf(bp->dev); + rc = bnxt_set_xps_mapping(bp); + if (rc) + netdev_warn(bp->dev, "failed to set xps mapping\n"); + } if (bp->tx_nr_rings_xdp < num_possible_cpus()) { if (!static_key_enabled(&bnxt_xdp_locking_key)) -- cgit v1.2.3 From fba2e4e5dbab399eb8801801471ac69f9baeba98 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Tue, 2 Apr 2024 02:37:49 -0700 Subject: bnxt_en: Allocate page pool per numa node Driver's Page Pool allocation code looks at the node local to the PCIe device to determine where to allocate memory. In scenarios where the core count per NUMA node is low (< default rings) it makes sense to exhaust page pool allocations on Node 0 first and then moving on to allocating page pools for the remaining rings from Node 1. With this patch, and the following configuration on the NIC $ ethtool -L ens1f0np0 combined 16 (core count/node = 12, first 12 rings on node#0, last 4 rings node#1) and traffic redirected to a ring on node#1 , we see a performance improvement of ~20% Signed-off-by: Somnath Kotur Reviewed-by: Andy Gospodarek Reviewed-by: Michael Chan Signed-off-by: Pavan Chebbi Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240402093753.331120-4-pavan.chebbi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 44b9332c147e..9fca1dee6486 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3559,14 +3559,15 @@ static void bnxt_free_rx_rings(struct bnxt *bp) } static int bnxt_alloc_rx_page_pool(struct bnxt *bp, - struct bnxt_rx_ring_info *rxr) + struct bnxt_rx_ring_info *rxr, + int numa_node) { struct page_pool_params pp = { 0 }; pp.pool_size = bp->rx_agg_ring_size; if (BNXT_RX_PAGE_MODE(bp)) pp.pool_size += bp->rx_ring_size; - pp.nid = dev_to_node(&bp->pdev->dev); + pp.nid = numa_node; pp.napi = &rxr->bnapi->napi; pp.netdev = bp->dev; pp.dev = &bp->pdev->dev; @@ -3586,7 +3587,8 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, static int bnxt_alloc_rx_rings(struct bnxt *bp) { - int i, rc = 0, agg_rings = 0; + int numa_node = dev_to_node(&bp->pdev->dev); + int i, rc = 0, agg_rings = 0, cpu; if (!bp->rx_ring) return -ENOMEM; @@ -3597,10 +3599,15 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp) for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring; + int cpu_node; ring = &rxr->rx_ring_struct; - rc = bnxt_alloc_rx_page_pool(bp, rxr); + cpu = cpumask_local_spread(i, numa_node); + cpu_node = cpu_to_node(cpu); + netdev_dbg(bp->dev, "Allocating page pool for rx_ring[%d] on numa_node: %d\n", + i, cpu_node); + rc = bnxt_alloc_rx_page_pool(bp, rxr, cpu_node); if (rc) return rc; -- cgit v1.2.3 From 1614f06e09ad6b271c6ed8ffd87ccf89ec51526c Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Tue, 2 Apr 2024 02:37:50 -0700 Subject: bnxt_en: Change bnxt_rx_xdp function prototype Change bnxt_rx_xdp() to take a pointer to xdp instead of stack variable. This is in prepartion for the XDP metadata patch change where the BPF program can change the value of the xdp.meta_data. Signed-off-by: Somnath Kotur Reviewed-by: Andy Gospodarek Reviewed-by: Michael Chan Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jesper Dangaard Brouer Cc: John Fastabend Signed-off-by: Pavan Chebbi Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240402093753.331120-5-pavan.chebbi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 28 +++++++++++++-------------- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9fca1dee6486..5e54b7cacc45 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2104,7 +2104,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } if (xdp_active) { - if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &data_ptr, &len, event)) { + if (bnxt_rx_xdp(bp, rxr, cons, &xdp, data, &data_ptr, &len, event)) { rc = 1; goto next_rx; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 4079538bc310..6b904b4f4d34 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -222,7 +222,7 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, * false - packet should be passed to the stack. */ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, - struct xdp_buff xdp, struct page *page, u8 **data_ptr, + struct xdp_buff *xdp, struct page *page, u8 **data_ptr, unsigned int *len, u8 *event) { struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog); @@ -244,9 +244,9 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, txr = rxr->bnapi->tx_ring[0]; /* BNXT_RX_PAGE_MODE(bp) when XDP enabled */ - orig_data = xdp.data; + orig_data = xdp->data; - act = bpf_prog_run_xdp(xdp_prog, &xdp); + act = bpf_prog_run_xdp(xdp_prog, xdp); tx_avail = bnxt_tx_avail(bp, txr); /* If the tx ring is not full, we must not update the rx producer yet @@ -255,10 +255,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, if (tx_avail != bp->tx_ring_size) *event &= ~BNXT_RX_EVENT; - *len = xdp.data_end - xdp.data; - if (orig_data != xdp.data) { - offset = xdp.data - xdp.data_hard_start; - *data_ptr = xdp.data_hard_start + offset; + *len = xdp->data_end - xdp->data; + if (orig_data != xdp->data) { + offset = xdp->data - xdp->data_hard_start; + *data_ptr = xdp->data_hard_start + offset; } switch (act) { @@ -270,8 +270,8 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, mapping = rx_buf->mapping - bp->rx_dma_offset; *event &= BNXT_TX_CMP_EVENT; - if (unlikely(xdp_buff_has_frags(&xdp))) { - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(&xdp); + if (unlikely(xdp_buff_has_frags(xdp))) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); tx_needed += sinfo->nr_frags; *event = BNXT_AGG_EVENT; @@ -279,7 +279,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, if (tx_avail < tx_needed) { trace_xdp_exception(bp->dev, xdp_prog, act); - bnxt_xdp_buff_frags_free(rxr, &xdp); + bnxt_xdp_buff_frags_free(rxr, xdp); bnxt_reuse_rx_data(rxr, cons, page); return true; } @@ -289,7 +289,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, *event |= BNXT_TX_EVENT; __bnxt_xmit_xdp(bp, txr, mapping + offset, *len, - NEXT_RX(rxr->rx_prod), &xdp); + NEXT_RX(rxr->rx_prod), xdp); bnxt_reuse_rx_data(rxr, cons, page); return true; case XDP_REDIRECT: @@ -306,12 +306,12 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, /* if we are unable to allocate a new buffer, abort and reuse */ if (bnxt_alloc_rx_data(bp, rxr, rxr->rx_prod, GFP_ATOMIC)) { trace_xdp_exception(bp->dev, xdp_prog, act); - bnxt_xdp_buff_frags_free(rxr, &xdp); + bnxt_xdp_buff_frags_free(rxr, xdp); bnxt_reuse_rx_data(rxr, cons, page); return true; } - if (xdp_do_redirect(bp->dev, &xdp, xdp_prog)) { + if (xdp_do_redirect(bp->dev, xdp, xdp_prog)) { trace_xdp_exception(bp->dev, xdp_prog, act); page_pool_recycle_direct(rxr->page_pool, page); return true; @@ -326,7 +326,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, trace_xdp_exception(bp->dev, xdp_prog, act); fallthrough; case XDP_DROP: - bnxt_xdp_buff_frags_free(rxr, &xdp); + bnxt_xdp_buff_frags_free(rxr, xdp); bnxt_reuse_rx_data(rxr, cons, page); break; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 5e412c5655ba..0122782400b8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -18,7 +18,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct xdp_buff *xdp); void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget); bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, - struct xdp_buff xdp, struct page *page, u8 **data_ptr, + struct xdp_buff *xdp, struct page *page, u8 **data_ptr, unsigned int *len, u8 *event); int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp); int bnxt_xdp_xmit(struct net_device *dev, int num_frames, -- cgit v1.2.3 From 0ae1fafc8be6d4271a8ad66b4695e123a4e8cef1 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Tue, 2 Apr 2024 02:37:51 -0700 Subject: bnxt_en: Add XDP Metadata support - Change the last arg to xdp_prepare_buff to true from false. - Ensure that when XDP_PASS is returned the xdp->data_meta area is copied to the skb->data area. Account for the meta data size on skb allocation and do a pull after to move it to the "reserved" zone. Signed-off-by: Somnath Kotur Reviewed-by: Michael Chan Reviewed-by: Pavan Chebbi Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jesper Dangaard Brouer Cc: John Fastabend Signed-off-by: Pavan Chebbi Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240402093753.331120-6-pavan.chebbi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 44 ++++++++++++++++++++++++--- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 +- 2 files changed, 41 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5e54b7cacc45..3982858a59a1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1296,9 +1296,9 @@ static int bnxt_agg_bufs_valid(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, return RX_AGG_CMP_VALID(agg, *raw_cons); } -static inline struct sk_buff *bnxt_copy_skb(struct bnxt_napi *bnapi, u8 *data, - unsigned int len, - dma_addr_t mapping) +static struct sk_buff *bnxt_copy_data(struct bnxt_napi *bnapi, u8 *data, + unsigned int len, + dma_addr_t mapping) { struct bnxt *bp = bnapi->bp; struct pci_dev *pdev = bp->pdev; @@ -1318,6 +1318,39 @@ static inline struct sk_buff *bnxt_copy_skb(struct bnxt_napi *bnapi, u8 *data, bp->rx_dir); skb_put(skb, len); + + return skb; +} + +static struct sk_buff *bnxt_copy_skb(struct bnxt_napi *bnapi, u8 *data, + unsigned int len, + dma_addr_t mapping) +{ + return bnxt_copy_data(bnapi, data, len, mapping); +} + +static struct sk_buff *bnxt_copy_xdp(struct bnxt_napi *bnapi, + struct xdp_buff *xdp, + unsigned int len, + dma_addr_t mapping) +{ + unsigned int metasize = 0; + u8 *data = xdp->data; + struct sk_buff *skb; + + len = xdp->data_end - xdp->data_meta; + metasize = xdp->data - xdp->data_meta; + data = xdp->data_meta; + + skb = bnxt_copy_data(bnapi, data, len, mapping); + if (!skb) + return skb; + + if (metasize) { + skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } + return skb; } @@ -2111,7 +2144,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } if (len <= bp->rx_copy_thresh) { - skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr); + if (!xdp_active) + skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr); + else + skb = bnxt_copy_xdp(bnapi, &xdp, len, dma_addr); bnxt_reuse_rx_data(rxr, cons, data); if (!skb) { if (agg_bufs) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 6b904b4f4d34..345681d5007e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -197,7 +197,7 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, dma_sync_single_for_cpu(&pdev->dev, mapping + offset, len, bp->rx_dir); xdp_init_buff(xdp, buflen, &rxr->xdp_rxq); - xdp_prepare_buff(xdp, data_ptr - offset, offset, len, false); + xdp_prepare_buff(xdp, data_ptr - offset, offset, len, true); } void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, -- cgit v1.2.3 From 4e474addc05a51c3515a88b54dbf21a10b747b23 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Tue, 2 Apr 2024 02:37:52 -0700 Subject: bnxt_en: Update firmware interface to 1.10.3.39 This updated interface supports backing store APIs to configure host FW trace buffers, updates transceivers ID types, updates to TrueFlow features and other changes for the basic L2 features. Signed-off-by: Pavan Chebbi Reviewed-by: Michael Chan Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240402093753.331120-7-pavan.chebbi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 184 +++++++++++++++++++------- 1 file changed, 137 insertions(+), 47 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index e957abd704db..06ea86c80be1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -468,6 +468,10 @@ struct cmd_nums { #define HWRM_TF_GLOBAL_CFG_GET 0x2fdUL #define HWRM_TF_IF_TBL_SET 0x2feUL #define HWRM_TF_IF_TBL_GET 0x2ffUL + #define HWRM_TF_RESC_USAGE_SET 0x300UL + #define HWRM_TF_RESC_USAGE_QUERY 0x301UL + #define HWRM_TF_TBL_TYPE_ALLOC 0x302UL + #define HWRM_TF_TBL_TYPE_FREE 0x303UL #define HWRM_TFC_TBL_SCOPE_QCAPS 0x380UL #define HWRM_TFC_TBL_SCOPE_ID_ALLOC 0x381UL #define HWRM_TFC_TBL_SCOPE_CONFIG 0x382UL @@ -495,6 +499,7 @@ struct cmd_nums { #define HWRM_TFC_IF_TBL_SET 0x398UL #define HWRM_TFC_IF_TBL_GET 0x399UL #define HWRM_TFC_TBL_SCOPE_CONFIG_GET 0x39aUL + #define HWRM_TFC_RESC_USAGE_QUERY 0x39bUL #define HWRM_SV 0x400UL #define HWRM_DBG_READ_DIRECT 0xff10UL #define HWRM_DBG_READ_INDIRECT 0xff11UL @@ -604,8 +609,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 3 -#define HWRM_VERSION_RSVD 15 -#define HWRM_VERSION_STR "1.10.3.15" +#define HWRM_VERSION_RSVD 39 +#define HWRM_VERSION_STR "1.10.3.39" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -1328,8 +1333,9 @@ struct hwrm_async_event_cmpl_error_report_base { #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD 0x5UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD 0x5UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED 0x6UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED }; /* hwrm_async_event_cmpl_error_report_pause_storm (size:128b/16B) */ @@ -1478,6 +1484,30 @@ struct hwrm_async_event_cmpl_error_report_thermal { #define ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR_INCREASING }; +/* hwrm_async_event_cmpl_error_report_dual_data_rate_not_supported (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_report_dual_data_rate_not_supported { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_ID_ERROR_REPORT 0x45UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_ID_ERROR_REPORT + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED 0x6UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DUAL_DATA_RATE_NOT_SUPPORTED_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED +}; + /* hwrm_func_reset_input (size:192b/24B) */ struct hwrm_func_reset_input { __le16 req_type; @@ -1781,6 +1811,9 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_EXT2_ROCE_VF_RESOURCE_MGMT_SUPPORTED 0x100000UL #define FUNC_QCAPS_RESP_FLAGS_EXT2_UDCC_SUPPORTED 0x200000UL #define FUNC_QCAPS_RESP_FLAGS_EXT2_TIMED_TX_SO_TXTIME_SUPPORTED 0x400000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_SW_MAX_RESOURCE_LIMITS_SUPPORTED 0x800000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_TF_INGRESS_NIC_FLOW_SUPPORTED 0x1000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT2_LPBK_STATS_SUPPORTED 0x2000000UL __le16 tunnel_disable_flag; #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_VXLAN 0x1UL #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_NGE 0x2UL @@ -1791,10 +1824,8 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_MPLS 0x40UL #define FUNC_QCAPS_RESP_TUNNEL_DISABLE_FLAG_DISABLE_PPPOE 0x80UL __le16 xid_partition_cap; - #define FUNC_QCAPS_RESP_XID_PARTITION_CAP_KTLS_TKC 0x1UL - #define FUNC_QCAPS_RESP_XID_PARTITION_CAP_KTLS_RKC 0x2UL - #define FUNC_QCAPS_RESP_XID_PARTITION_CAP_QUIC_TKC 0x4UL - #define FUNC_QCAPS_RESP_XID_PARTITION_CAP_QUIC_RKC 0x8UL + #define FUNC_QCAPS_RESP_XID_PARTITION_CAP_TX_CK 0x1UL + #define FUNC_QCAPS_RESP_XID_PARTITION_CAP_RX_CK 0x2UL u8 device_serial_number[8]; __le16 ctxs_per_partition; u8 unused_2[2]; @@ -1844,6 +1875,7 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_FLAGS_FAST_RESET_ALLOWED 0x1000UL #define FUNC_QCFG_RESP_FLAGS_MULTI_ROOT 0x2000UL #define FUNC_QCFG_RESP_FLAGS_ENABLE_RDMA_SRIOV 0x4000UL + #define FUNC_QCFG_RESP_FLAGS_ROCE_VNIC_ID_VALID 0x8000UL u8 mac_address[6]; __le16 pci_id; __le16 alloc_rsscos_ctx; @@ -1955,7 +1987,7 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_DB_PAGE_SIZE_2MB 0x9UL #define FUNC_QCFG_RESP_DB_PAGE_SIZE_4MB 0xaUL #define FUNC_QCFG_RESP_DB_PAGE_SIZE_LAST FUNC_QCFG_RESP_DB_PAGE_SIZE_4MB - u8 unused_2[2]; + __le16 roce_vnic_id; __le32 partition_min_bw; #define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_MASK 0xfffffffUL #define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_SFT 0 @@ -2003,6 +2035,8 @@ struct hwrm_func_qcfg_output { __le32 roce_max_srq_per_vf; __le32 roce_max_gid_per_vf; __le16 xid_partition_cfg; + #define FUNC_QCFG_RESP_XID_PARTITION_CFG_TX_CK 0x1UL + #define FUNC_QCFG_RESP_XID_PARTITION_CFG_RX_CK 0x2UL u8 unused_7; u8 valid; }; @@ -2229,10 +2263,8 @@ struct hwrm_func_cfg_input { __le32 roce_max_srq_per_vf; __le32 roce_max_gid_per_vf; __le16 xid_partition_cfg; - #define FUNC_CFG_REQ_XID_PARTITION_CFG_KTLS_TKC 0x1UL - #define FUNC_CFG_REQ_XID_PARTITION_CFG_KTLS_RKC 0x2UL - #define FUNC_CFG_REQ_XID_PARTITION_CFG_QUIC_TKC 0x4UL - #define FUNC_CFG_REQ_XID_PARTITION_CFG_QUIC_RKC 0x8UL + #define FUNC_CFG_REQ_XID_PARTITION_CFG_TX_CK 0x1UL + #define FUNC_CFG_REQ_XID_PARTITION_CFG_RX_CK 0x2UL __le16 unused_2; }; @@ -2416,6 +2448,7 @@ struct hwrm_func_drv_rgtr_input { #define FUNC_DRV_RGTR_REQ_FLAGS_RSS_STRICT_HASH_TYPE_SUPPORT 0x100UL #define FUNC_DRV_RGTR_REQ_FLAGS_NPAR_1_2_SUPPORT 0x200UL #define FUNC_DRV_RGTR_REQ_FLAGS_ASYM_QUEUE_CFG_SUPPORT 0x400UL + #define FUNC_DRV_RGTR_REQ_FLAGS_TF_INGRESS_NIC_FLOW_MODE 0x800UL __le32 enables; #define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE 0x1UL #define FUNC_DRV_RGTR_REQ_ENABLES_VER 0x2UL @@ -3636,19 +3669,22 @@ struct hwrm_func_backing_store_cfg_v2_input { #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV 0xeUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TKC 0x13UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RKC 0x14UL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QUIC_TKC 0x1aUL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QUIC_RKC 0x1bUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TBL_SCOPE 0x1cUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_XID_PARTITION 0x1dUL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID 0xffffUL - #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT_TRACE 0x1eUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRT2_TRACE 0x1fUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CRT_TRACE 0x20UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CRT2_TRACE 0x21UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RIGP0_TRACE 0x22UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_L2_HWRM_TRACE 0x23UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_ROCE_HWRM_TRACE 0x24UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID __le16 instance; __le32 flags; #define FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_PREBOOT_MODE 0x1UL @@ -3707,17 +3743,22 @@ struct hwrm_func_backing_store_qcfg_v2_input { #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MRAV 0xeUL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TKC 0x13UL - #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RKC 0x14UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TX_CK 0x13UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RX_CK 0x14UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL - #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_QUIC_TKC 0x1aUL - #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_QUIC_RKC 0x1bUL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TBL_SCOPE 0x1cUL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_XID_PARTITION_TABLE 0x1dUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRT_TRACE 0x1eUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRT2_TRACE 0x1fUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CRT_TRACE 0x20UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CRT2_TRACE 0x21UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RIGP0_TRACE 0x22UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_L2_HWRM_TRACE 0x23UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_ROCE_HWRM_TRACE 0x24UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID __le16 instance; @@ -3740,15 +3781,18 @@ struct hwrm_func_backing_store_qcfg_v2_output { #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV 0xeUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TKC 0x13UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RKC 0x14UL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QUIC_TKC 0x1aUL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QUIC_RKC 0x1bUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TBL_SCOPE 0x1cUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_XID_PARTITION 0x1dUL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID 0xffffUL - #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT_TRACE 0x1eUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRT2_TRACE 0x1fUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CRT_TRACE 0x20UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CRT2_TRACE 0x21UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RIGP0_TRACE 0x22UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_L2_HWRM_TRACE 0x23UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_ROCE_HWRM_TRACE 0x24UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID __le16 instance; __le32 flags; __le64 page_dir; @@ -3841,19 +3885,22 @@ struct hwrm_func_backing_store_qcaps_v2_input { #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV 0xeUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_KTLS_TKC 0x13UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_KTLS_RKC 0x14UL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW 0x16UL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW 0x17UL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW 0x18UL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW 0x19UL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QUIC_TKC 0x1aUL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QUIC_RKC 0x1bUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE 0x1cUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION 0x1dUL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID 0xffffUL - #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT_TRACE 0x1eUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRT2_TRACE 0x1fUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT_TRACE 0x20UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CRT2_TRACE 0x21UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP0_TRACE 0x22UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_L2_HWRM_TRACE 0x23UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_ROCE_HWRM_TRACE 0x24UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID u8 rsvd[6]; }; @@ -3873,19 +3920,22 @@ struct hwrm_func_backing_store_qcaps_v2_output { #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV 0xeUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM 0xfUL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_KTLS_TKC 0x13UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_KTLS_RKC 0x14UL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SQ_DB_SHADOW 0x16UL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RQ_DB_SHADOW 0x17UL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ_DB_SHADOW 0x18UL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ_DB_SHADOW 0x19UL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QUIC_TKC 0x1aUL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QUIC_RKC 0x1bUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TBL_SCOPE 0x1cUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_XID_PARTITION 0x1dUL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID 0xffffUL - #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT_TRACE 0x1eUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRT2_TRACE 0x1fUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CRT_TRACE 0x20UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CRT2_TRACE 0x21UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RIGP0_TRACE 0x22UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_L2_HWRM_TRACE 0x23UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_ROCE_HWRM_TRACE 0x24UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID __le16 entry_size; __le32 flags; #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT 0x1UL @@ -3990,6 +4040,7 @@ struct hwrm_func_drv_if_change_output { __le32 flags; #define FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE 0x1UL #define FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE 0x2UL + #define FUNC_DRV_IF_CHANGE_RESP_FLAGS_CAPS_CHANGE 0x4UL u8 unused_0[3]; u8 valid; }; @@ -4472,7 +4523,11 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP (0xcUL << 24) #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFPPLUS (0xdUL << 24) #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP28 (0x11UL << 24) - #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_LAST PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP28 + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFPDD (0x18UL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP112 (0x1eUL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_SFPDD (0x1fUL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_CSFP (0x20UL << 24) + #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_LAST PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_CSFP __le16 fec_cfg; #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_NONE_SUPPORTED 0x1UL #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_SUPPORTED 0x2UL @@ -7380,7 +7435,7 @@ struct hwrm_cfa_l2_filter_free_output { u8 valid; }; -/* hwrm_cfa_l2_filter_cfg_input (size:320b/40B) */ +/* hwrm_cfa_l2_filter_cfg_input (size:384b/48B) */ struct hwrm_cfa_l2_filter_cfg_input { __le16 req_type; __le16 cmpl_ring; @@ -7399,12 +7454,22 @@ struct hwrm_cfa_l2_filter_cfg_input { #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_L2 (0x1UL << 2) #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE (0x2UL << 2) #define CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_TRAFFIC_ROCE + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_MASK 0x30UL + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_SFT 4 + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_NO_UPDATE (0x0UL << 4) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_BYPASS_LKUP (0x1UL << 4) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_ENABLE_LKUP (0x2UL << 4) + #define CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_LAST CFA_L2_FILTER_CFG_REQ_FLAGS_REMAP_OP_ENABLE_LKUP __le32 enables; #define CFA_L2_FILTER_CFG_REQ_ENABLES_DST_ID 0x1UL #define CFA_L2_FILTER_CFG_REQ_ENABLES_NEW_MIRROR_VNIC_ID 0x2UL + #define CFA_L2_FILTER_CFG_REQ_ENABLES_PROF_FUNC 0x4UL + #define CFA_L2_FILTER_CFG_REQ_ENABLES_L2_CONTEXT_ID 0x8UL __le64 l2_filter_id; __le32 dst_id; __le32 new_mirror_vnic_id; + __le32 prof_func; + __le32 l2_context_id; }; /* hwrm_cfa_l2_filter_cfg_output (size:128b/16B) */ @@ -8466,7 +8531,15 @@ struct hwrm_tunnel_dst_port_query_input { #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_SRV6 0xfUL #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GRE 0x11UL - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_LAST TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GRE + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES03 0x15UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES04 0x16UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES05 0x17UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES06 0x18UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES07 0x19UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_LAST TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES07 u8 tunnel_next_proto; u8 unused_0[6]; }; @@ -8514,7 +8587,15 @@ struct hwrm_tunnel_dst_port_alloc_input { #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_SRV6 0xfUL #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GRE 0x11UL - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_LAST TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GRE + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES03 0x15UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES04 0x16UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES05 0x17UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES06 0x18UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES07 0x19UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_LAST TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES07 u8 tunnel_next_proto; __be16 tunnel_dst_port_val; u8 unused_0[4]; @@ -8565,7 +8646,15 @@ struct hwrm_tunnel_dst_port_free_input { #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_SRV6 0xfUL #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_GPE 0x10UL #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GRE 0x11UL - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_LAST TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GRE + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR 0x12UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES01 0x13UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES02 0x14UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES03 0x15UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES04 0x16UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES05 0x17UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES06 0x18UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES07 0x19UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_LAST TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_ULP_DYN_UPAR_RES07 u8 tunnel_next_proto; __le16 tunnel_dst_port_id; u8 unused_0[4]; @@ -8860,7 +8949,7 @@ struct hwrm_stat_generic_qstats_output { u8 valid; }; -/* generic_sw_hw_stats (size:1408b/176B) */ +/* generic_sw_hw_stats (size:1472b/184B) */ struct generic_sw_hw_stats { __le64 pcie_statistics_tx_tlp; __le64 pcie_statistics_rx_tlp; @@ -8884,6 +8973,7 @@ struct generic_sw_hw_stats { __le64 hw_db_recov_dbs_dropped; __le64 hw_db_recov_drops_serviced; __le64 hw_db_recov_dbs_recovered; + __le64 hw_db_recov_oo_drop_count; }; /* hwrm_fw_reset_input (size:192b/24B) */ -- cgit v1.2.3 From e193f53aed21ad8fed5b87238c006090d75c9d18 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 2 Apr 2024 02:37:53 -0700 Subject: bnxt_en: Add warning message about disallowed speed change Some chips may not allow changing default speed when dual rate transceivers modules are used. Firmware on those chips will indicate the same to the driver. Add a warning message when speed change is not supported because a dual rate transceiver is detected by the NIC. Signed-off-by: Sreekanth Reddy Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Reviewed-by: Michael Chan Signed-off-by: Pavan Chebbi Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240402093753.331120-8-pavan.chebbi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3982858a59a1..795f3f957eb5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2525,6 +2525,9 @@ static bool bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2) } return false; } + case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED: + netdev_warn(bp->dev, "Speed change not supported with dual rate transceivers on this board\n"); + break; default: netdev_err(bp->dev, "FW reported unknown error type %u\n", err_type); -- cgit v1.2.3 From da48a65f3ff4155364fb9e3efe0bfba58291da6b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 4 Apr 2024 12:55:00 -0700 Subject: bnxt_en: Fix PTP firmware timeout parameter Use the correct tmo_us microsecond parameter for the PTP firmware timeout parameter. Fixes: 7de3c2218eed ("bnxt_en: Add a timeout parameter to bnxt_hwrm_port_ts_query()") Reported-by: Pavan Chebbi Reviewed-by: Pavan Chebbi Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240404195500.171071-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 345aac4484ee..e661ab154d6b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -131,7 +131,7 @@ static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts, if (!tmo_us) tmo_us = BNXT_PTP_QTS_TIMEOUT; tmo_us = min(tmo_us, BNXT_PTP_QTS_MAX_TMO_US); - req->ts_req_timeout = cpu_to_le16(txts_tmo); + req->ts_req_timeout = cpu_to_le16(tmo_us); } resp = hwrm_req_hold(bp, req); -- cgit v1.2.3 From d4383ce15f5bb0e889406961372d31199f3f6edc Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Thu, 4 Apr 2024 20:33:53 +0300 Subject: net/mlx5e: Extract checking of FEC support for a link mode The check of whether a given FEC mode is supported in a given link mode is about to get more complicated, so extract it in a separate function to avoid code duplication. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240404173357.123307-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index dbe2b19a9570..b4681a93807d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -308,6 +308,14 @@ enum mlx5e_fec_supported_link_mode { *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \ } while (0) +/* Returns true if FEC can be set for a given link mode. */ +static bool mlx5e_is_fec_supported_link_mode(struct mlx5_core_dev *dev, + enum mlx5e_fec_supported_link_mode link_mode) +{ + return link_mode < MLX5E_FEC_FIRST_50G_PER_LANE_MODE || + MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); +} + /* get/set FEC admin field for a given speed */ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, enum mlx5e_fec_supported_link_mode link_mode) @@ -389,7 +397,6 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap, bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy) { - bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(pplm_reg); @@ -407,7 +414,7 @@ bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy) for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { u16 fec_caps; - if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + if (!mlx5e_is_fec_supported_link_mode(dev, i)) break; mlx5e_get_fec_cap_field(out, &fec_caps, i); @@ -420,7 +427,6 @@ bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy) int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, u16 *fec_configured_mode) { - bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(pplm_reg); @@ -445,7 +451,7 @@ int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, *fec_configured_mode = 0; for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { - if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + if (!mlx5e_is_fec_supported_link_mode(dev, i)) break; mlx5e_fec_admin_field(out, fec_configured_mode, 0, i); @@ -489,7 +495,7 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) u16 conf_fec = fec_policy; u16 fec_caps = 0; - if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + if (!mlx5e_is_fec_supported_link_mode(dev, i)) break; /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514 -- cgit v1.2.3 From 4aafb8ab2a626ac714931ec7894cf16df948b359 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Thu, 4 Apr 2024 20:33:54 +0300 Subject: net/mlx5e: Support FEC settings for 100G/lane modes This consists of: 1. Expose the 100G/lane capability bit in the PCAM reg. 2. Expose the per link mode FEC capability masks in the PPLM reg. 3. Set the overrides according to ethtool parameters. FEC for new modes is set if and only if the PCAM 100G/lane capability is advertised and the capability mask for a given link mode reports that it can accept the requested FEC mode. Signed-off-by: Cosmin Ratiu Reviewed-by: Gal Pressman Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240404173357.123307-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 36 +++++++++++++++++++++-- include/linux/mlx5/mlx5_ifc.h | 20 +++++++++++-- 2 files changed, 52 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index b4681a93807d..b4efc780e297 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -292,10 +292,15 @@ enum mlx5e_fec_supported_link_mode { MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X, MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X, MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X, + MLX5E_FEC_SUPPORTED_LINK_MODE_100G_1X, + MLX5E_FEC_SUPPORTED_LINK_MODE_200G_2X, + MLX5E_FEC_SUPPORTED_LINK_MODE_400G_4X, + MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X, MLX5E_MAX_FEC_SUPPORTED_LINK_MODE, }; #define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X +#define MLX5E_FEC_FIRST_100G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_100G_1X #define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \ do { \ @@ -313,7 +318,10 @@ static bool mlx5e_is_fec_supported_link_mode(struct mlx5_core_dev *dev, enum mlx5e_fec_supported_link_mode link_mode) { return link_mode < MLX5E_FEC_FIRST_50G_PER_LANE_MODE || - MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); + (link_mode < MLX5E_FEC_FIRST_100G_PER_LANE_MODE && + MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm)) || + (link_mode >= MLX5E_FEC_FIRST_100G_PER_LANE_MODE && + MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm)); } /* get/set FEC admin field for a given speed */ @@ -348,6 +356,18 @@ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_8x); break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_1X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_2X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_4X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_8x); + break; default: return -EINVAL; } @@ -389,6 +409,18 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap, case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_8x); break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_1X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_2X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_4X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_8x); + break; default: return -EINVAL; } @@ -501,7 +533,7 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514 * to link modes up to 25G per lane and to * MLX5E_FEC_RS_544_514 in the new link modes based on - * 50 G per lane + * 50G or 100G per lane */ if (conf_fec == (1 << MLX5E_FEC_RS_528_514) && i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cc159d8563d1..35ffc9b9f241 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9817,7 +9817,21 @@ struct mlx5_ifc_pplm_reg_bits { u8 fec_override_admin_100g_2x[0x10]; u8 fec_override_admin_50g_1x[0x10]; - u8 reserved_at_140[0x140]; + u8 fec_override_cap_800g_8x[0x10]; + u8 fec_override_cap_400g_4x[0x10]; + + u8 fec_override_cap_200g_2x[0x10]; + u8 fec_override_cap_100g_1x[0x10]; + + u8 reserved_at_180[0xa0]; + + u8 fec_override_admin_800g_8x[0x10]; + u8 fec_override_admin_400g_4x[0x10]; + + u8 fec_override_admin_200g_2x[0x10]; + u8 fec_override_admin_100g_1x[0x10]; + + u8 reserved_at_260[0x20]; }; struct mlx5_ifc_ppcnt_reg_bits { @@ -10189,7 +10203,9 @@ struct mlx5_ifc_mtutc_reg_bits { }; struct mlx5_ifc_pcam_enhanced_features_bits { - u8 reserved_at_0[0x68]; + u8 reserved_at_0[0x48]; + u8 fec_100G_per_lane_in_pplm[0x1]; + u8 reserved_at_49[0x1f]; u8 fec_50G_per_lane_in_pplm[0x1]; u8 reserved_at_69[0x4]; u8 rx_icrc_encapsulated_counter[0x1]; -- cgit v1.2.3 From 958f56e4838579544fbc5183073518c7c4d22d44 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 4 Apr 2024 20:33:57 +0300 Subject: net/mlx5e: Un-expose functions in en.h Un-expose functions that are not used outside of their c file. Make them static. Signed-off-by: Tariq Toukan Reviewed-by: Kalesh AP Link: https://lore.kernel.org/r/20240404173357.123307-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 12 ------------ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 22 +++++++++++----------- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 3 files changed, 12 insertions(+), 24 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f5a3ac40f6e3..2acd1ebb0888 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1143,7 +1143,6 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq); int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); -int mlx5e_update_nic_rx(struct mlx5e_priv *priv); void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); @@ -1180,23 +1179,12 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack); -int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, - struct ethtool_link_ksettings *link_ksettings); -int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, - const struct ethtool_link_ksettings *link_ksettings); -int mlx5e_get_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh); -int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, - struct netlink_ext_ack *extack); u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash); -void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, - struct ethtool_pauseparam *pauseparam); -int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, - struct ethtool_pauseparam *pauseparam); /* mlx5e generic netdev management API */ static inline bool diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 69f6a6aa7c55..93a13a478c11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -996,8 +996,8 @@ static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp, ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext); } -int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, - struct ethtool_link_ksettings *link_ksettings) +static int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, + struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; @@ -1167,8 +1167,8 @@ static bool ext_requested(u8 autoneg, const unsigned long *adver, bool ext_suppo return autoneg == AUTONEG_ENABLE ? ext_link_mode : ext_supported; } -int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, - const struct ethtool_link_ksettings *link_ksettings) +static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, + const struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_port_eth_proto eproto; @@ -1268,7 +1268,7 @@ static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) return mlx5e_ethtool_get_rxfh_indir_size(priv); } -int mlx5e_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) +static int mlx5e_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) { struct mlx5e_priv *priv = netdev_priv(netdev); u32 rss_context = rxfh->rss_context; @@ -1281,8 +1281,8 @@ int mlx5e_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) return err; } -int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, - struct netlink_ext_ack *extack) +static int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = netdev_priv(dev); u32 *rss_context = &rxfh->rss_context; @@ -1411,8 +1411,8 @@ static void mlx5e_get_pause_stats(struct net_device *netdev, mlx5e_stats_pause_get(priv, pause_stats); } -void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, - struct ethtool_pauseparam *pauseparam) +static void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) { struct mlx5_core_dev *mdev = priv->mdev; int err; @@ -1433,8 +1433,8 @@ static void mlx5e_get_pauseparam(struct net_device *netdev, mlx5e_ethtool_get_pauseparam(priv, pauseparam); } -int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, - struct ethtool_pauseparam *pauseparam) +static int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) { struct mlx5_core_dev *mdev = priv->mdev; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 81e1c1e401f9..a0d3af96dcb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5562,7 +5562,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5e_ipsec_cleanup(priv); } -int mlx5e_update_nic_rx(struct mlx5e_priv *priv) +static int mlx5e_update_nic_rx(struct mlx5e_priv *priv) { return mlx5e_refresh_tirs(priv, false, false); } -- cgit v1.2.3 From 17b35355c2c6a761eaf571d63dab7f06b62249fb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Apr 2024 10:06:23 +0200 Subject: 3c515: remove unused 'mtu' variable This has never been used since the start of the git history. When building with W=1, the unused variable produces a gcc warning: drivers/net/ethernet/3com/3c515.c:35:18: error: 'mtu' defined but not used [-Werror=unused-const-variable=] Just remove it. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240403080702.3509288-6-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/3com/3c515.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index ba3e7aa1a28f..4725a8cfd695 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -31,9 +31,6 @@ Setting to > 1512 effectively disables this feature. */ static int rx_copybreak = 200; -/* Allow setting MTU to a larger size, bypassing the normal ethernet setup. */ -static const int mtu = 1500; - /* Maximum events (Rx packets, etc.) to handle at each interrupt. */ static int max_interrupt_work = 20; -- cgit v1.2.3 From 0ef416e045ad7083933b469b6ca7968d1cc0eb80 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Apr 2024 10:06:46 +0200 Subject: net: xgbe: remove extraneous #ifdef checks When both ACPI and OF are disabled, xgbe_v1 is unused and causes a W=1 warning: drivers/net/ethernet/amd/xgbe/xgbe-platform.c:533:39: error: unused variable 'xgbe_v1' [-Werror,-Wunused-const-variable] static const struct xgbe_version_data xgbe_v1 = { There is no real point in trying to save a few bytes for the match tables, so just make them always visible. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240403080702.3509288-29-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-platform.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c index 9131020d06af..7912b3b45148 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c @@ -538,7 +538,6 @@ static const struct xgbe_version_data xgbe_v1 = { .tx_tstamp_workaround = 1, }; -#ifdef CONFIG_ACPI static const struct acpi_device_id xgbe_acpi_match[] = { { .id = "AMDI8001", .driver_data = (kernel_ulong_t)&xgbe_v1 }, @@ -546,9 +545,7 @@ static const struct acpi_device_id xgbe_acpi_match[] = { }; MODULE_DEVICE_TABLE(acpi, xgbe_acpi_match); -#endif -#ifdef CONFIG_OF static const struct of_device_id xgbe_of_match[] = { { .compatible = "amd,xgbe-seattle-v1a", .data = &xgbe_v1 }, @@ -556,7 +553,6 @@ static const struct of_device_id xgbe_of_match[] = { }; MODULE_DEVICE_TABLE(of, xgbe_of_match); -#endif static SIMPLE_DEV_PM_OPS(xgbe_platform_pm_ops, xgbe_platform_suspend, xgbe_platform_resume); @@ -564,12 +560,8 @@ static SIMPLE_DEV_PM_OPS(xgbe_platform_pm_ops, static struct platform_driver xgbe_driver = { .driver = { .name = XGBE_DRV_NAME, -#ifdef CONFIG_ACPI .acpi_match_table = xgbe_acpi_match, -#endif -#ifdef CONFIG_OF .of_match_table = xgbe_of_match, -#endif .pm = &xgbe_platform_pm_ops, }, .probe = xgbe_platform_probe, -- cgit v1.2.3 From adda5401807882f5575364442cd583462fc40ae4 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Wed, 3 Apr 2024 14:28:40 -0700 Subject: net/mlx5e: Introduce lost_cqe statistic counter for PTP Tx port timestamping CQ Track the number of times a CQE was expected to not be delivered on PTP Tx port timestamping CQ. A CQE is expected to not be delivered if a certain amount of time passes since the corresponding CQE containing the DMA timestamp information has arrived. Increment the late_cqe counter when such a CQE does manage to be delivered to the CQ. Signed-off-by: Rahul Rameshbabu Reviewed-by: Tariq Toukan Reviewed-by: Dragos Tatulea Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20240403212931.128541-3-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- .../networking/device_drivers/ethernet/mellanox/mlx5/counters.rst | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 1 + 4 files changed, 9 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst index f69ee1ebee01..5464cd9e2694 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst @@ -702,6 +702,12 @@ the software port. the device typically ensures not posting the CQE. - Error + * - `ptp_cq[i]_lost_cqe` + - Number of times a CQE is expected to not be delivered on the PTP + timestamping CQE by the device due to a time delta elapsing. If such a + CQE is somehow delivered, `ptp_cq[i]_late_cqe` is incremented. + - Error + .. [#ring_global] The corresponding ring and global counters do not share the same name (i.e. do not follow the common naming scheme). diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index d0af7271da34..afd654583b6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -169,6 +169,7 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, WARN_ON_ONCE(!pos->inuse); pos->inuse = false; list_del(&pos->entry); + ptpsq->cq_stats->lost_cqe++; } spin_unlock_bh(&cqe_list->tracker_list_lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 4f372cb2fc9a..3004778e253e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -2178,6 +2178,7 @@ static const struct counter_desc ptp_cq_stats_desc[] = { { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort) }, { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, late_cqe) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, lost_cqe) }, }; static const struct counter_desc ptp_rq_stats_desc[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index b71e3fdf92c5..ccc67a471f68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -463,6 +463,7 @@ struct mlx5e_ptp_cq_stats { u64 abort; u64 abort_abs_diff_ns; u64 late_cqe; + u64 lost_cqe; }; struct mlx5e_rep_stats { -- cgit v1.2.3 From cd429012f078395c5bfa558e2e81e882b48023b0 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Wed, 3 Apr 2024 14:28:41 -0700 Subject: net/mlx5e: Introduce timestamps statistic counter for Tx DMA layer Count number of transmitted packets that were hardware timestamped at the device DMA layer. Signed-off-by: Rahul Rameshbabu Reviewed-by: Dragos Tatulea Link: https://lore.kernel.org/r/20240403212931.128541-4-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- .../networking/device_drivers/ethernet/mellanox/mlx5/counters.rst | 5 +++++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 6 ++++-- 4 files changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst index 5464cd9e2694..fed821ef9b09 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst @@ -300,6 +300,11 @@ the software port. in the beginning of the queue. This is a normal condition. - Informative + * - `tx[i]_timestamps` + - Transmitted packets that were hardware timestamped at the device's DMA + layer. + - Informative + * - `tx[i]_added_vlan_packets` - The number of packets sent where vlan tag insertion was offloaded to the hardware. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 3004778e253e..aa1de8bf0841 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -2066,6 +2066,7 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, timestamps) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) }, #ifdef CONFIG_MLX5_EN_TLS @@ -2218,6 +2219,7 @@ static const struct counter_desc qos_sq_stats_desc[] = { { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, timestamps) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) }, #ifdef CONFIG_MLX5_EN_TLS diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index ccc67a471f68..b0090ad133aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -431,6 +431,7 @@ struct mlx5e_sq_stats { u64 stopped; u64 dropped; u64 recover; + u64 timestamps; /* dirtied @completion */ u64 cqes ____cacheline_aligned_in_smp; u64 wake; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 2fa076b23fbe..09a592909101 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -750,11 +750,13 @@ static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb, u64 ts = get_cqe_ts(cqe); hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts); - if (sq->ptpsq) + if (sq->ptpsq) { mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP, hwts.hwtstamp, sq->ptpsq->cq_stats); - else + } else { skb_tstamp_tx(skb, &hwts); + sq->stats->timestamps++; + } } napi_consume_skb(skb, napi_budget); -- cgit v1.2.3 From 3579032c08c1d313bb53e67e97e9ea77bed48014 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Wed, 3 Apr 2024 14:28:42 -0700 Subject: net/mlx5e: Implement ethtool hardware timestamping statistics Feed driver statistics counters related to hardware timestamping to standardized ethtool hardware timestamping statistics group. Signed-off-by: Rahul Rameshbabu Reviewed-by: Dragos Tatulea Link: https://lore.kernel.org/r/20240403212931.128541-5-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 9 +++++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 45 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 2 + 3 files changed, 56 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 93a13a478c11..a4fe164a1c4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -2387,6 +2387,14 @@ static void mlx5e_get_rmon_stats(struct net_device *netdev, mlx5e_stats_rmon_get(priv, rmon_stats, ranges); } +static void mlx5e_get_ts_stats(struct net_device *netdev, + struct ethtool_ts_stats *ts_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_ts_get(priv, ts_stats); +} + const struct ethtool_ops mlx5e_ethtool_ops = { .cap_rss_ctx_supported = true, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | @@ -2436,5 +2444,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_eth_mac_stats = mlx5e_get_eth_mac_stats, .get_eth_ctrl_stats = mlx5e_get_eth_ctrl_stats, .get_rmon_stats = mlx5e_get_rmon_stats, + .get_ts_stats = mlx5e_get_ts_stats, .get_link_ext_stats = mlx5e_get_link_ext_stats }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index aa1de8bf0841..e211c41cec06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -1172,6 +1172,51 @@ void mlx5e_stats_rmon_get(struct mlx5e_priv *priv, *ranges = mlx5e_rmon_ranges; } +void mlx5e_stats_ts_get(struct mlx5e_priv *priv, + struct ethtool_ts_stats *ts_stats) +{ + int i, j; + + mutex_lock(&priv->state_lock); + + if (priv->tx_ptp_opened) { + struct mlx5e_ptp *ptp = priv->channels.ptp; + + ts_stats->pkts = 0; + ts_stats->err = 0; + ts_stats->lost = 0; + + /* Aggregate stats across all TCs */ + for (i = 0; i < ptp->num_tc; i++) { + struct mlx5e_ptp_cq_stats *stats = + ptp->ptpsq[i].cq_stats; + + ts_stats->pkts += stats->cqe; + ts_stats->err += stats->abort + stats->err_cqe + + stats->late_cqe; + ts_stats->lost += stats->lost_cqe; + } + } else { + /* DMA layer will always successfully timestamp packets. Other + * counters do not make sense for this layer. + */ + ts_stats->pkts = 0; + + /* Aggregate stats across all SQs */ + for (j = 0; j < priv->channels.num; j++) { + struct mlx5e_channel *c = priv->channels.c[j]; + + for (i = 0; i < c->num_tc; i++) { + struct mlx5e_sq_stats *stats = c->sq[i].stats; + + ts_stats->pkts += stats->timestamps; + } + } + } + + mutex_unlock(&priv->state_lock); +} + #define PPORT_PHY_STATISTICAL_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.phys_layer_statistical_cntrs.c##_high) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index b0090ad133aa..9cee4c9472e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -128,6 +128,8 @@ void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv, void mlx5e_stats_rmon_get(struct mlx5e_priv *priv, struct ethtool_rmon_stats *rmon, const struct ethtool_rmon_hist_range **ranges); +void mlx5e_stats_ts_get(struct mlx5e_priv *priv, + struct ethtool_ts_stats *ts_stats); void mlx5e_get_link_ext_stats(struct net_device *dev, struct ethtool_link_ext_stats *stats); -- cgit v1.2.3 From a29689e60ed3e65463d6462390caad669d08a6b7 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 5 Apr 2024 13:18:31 +0200 Subject: net: handle HAS_IOPORT dependencies In a future patch HAS_IOPORT=n will disable inb()/outb() and friends at compile time. We thus need to add HAS_IOPORT as dependency for those drivers requiring them. For the DEFXX driver the use of I/O ports is optional and we only need to fence specific code paths. It also turns out that with HAS_IOPORT handled explicitly HAMRADIO does not need the !S390 dependency and successfully builds the bpqether driver. Acked-by: Marc Kleine-Budde Acked-by: Jakub Kicinski Acked-by: Maciej W. Rozycki Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Niklas Schnelle Signed-off-by: David S. Miller --- drivers/net/Kconfig | 2 +- drivers/net/arcnet/Kconfig | 2 +- drivers/net/can/cc770/Kconfig | 1 + drivers/net/can/sja1000/Kconfig | 1 + drivers/net/ethernet/3com/Kconfig | 4 ++-- drivers/net/ethernet/8390/Kconfig | 6 +++--- drivers/net/ethernet/amd/Kconfig | 4 ++-- drivers/net/ethernet/fujitsu/Kconfig | 2 +- drivers/net/ethernet/intel/Kconfig | 2 +- drivers/net/ethernet/sis/Kconfig | 4 ++-- drivers/net/ethernet/smsc/Kconfig | 2 +- drivers/net/ethernet/ti/Kconfig | 2 +- drivers/net/ethernet/via/Kconfig | 1 + drivers/net/ethernet/xircom/Kconfig | 2 +- drivers/net/fddi/defxx.c | 2 +- drivers/net/hamradio/Kconfig | 6 +++--- drivers/net/wan/Kconfig | 2 +- net/ax25/Kconfig | 2 +- 18 files changed, 25 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 172d84e39129..f0663cf1f755 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -520,7 +520,7 @@ source "drivers/net/ipa/Kconfig" config NET_SB1000 tristate "General Instruments Surfboard 1000" - depends on PNP + depends on ISA && PNP help This is a driver for the General Instrument (also known as NextLevel) SURFboard 1000 internal diff --git a/drivers/net/arcnet/Kconfig b/drivers/net/arcnet/Kconfig index a51b9dab6d3a..d1d07a1d4fbc 100644 --- a/drivers/net/arcnet/Kconfig +++ b/drivers/net/arcnet/Kconfig @@ -4,7 +4,7 @@ # menuconfig ARCNET - depends on NETDEVICES && (ISA || PCI || PCMCIA) + depends on NETDEVICES && (ISA || PCI || PCMCIA) && HAS_IOPORT tristate "ARCnet support" help If you have a network card of this type, say Y and check out the diff --git a/drivers/net/can/cc770/Kconfig b/drivers/net/can/cc770/Kconfig index 9ef1359319f0..467ef19de1c1 100644 --- a/drivers/net/can/cc770/Kconfig +++ b/drivers/net/can/cc770/Kconfig @@ -7,6 +7,7 @@ if CAN_CC770 config CAN_CC770_ISA tristate "ISA Bus based legacy CC770 driver" + depends on ISA help This driver adds legacy support for CC770 and AN82527 chips connected to the ISA bus using I/O port, memory mapped or diff --git a/drivers/net/can/sja1000/Kconfig b/drivers/net/can/sja1000/Kconfig index 4b2f9cb17fc3..01168db4c106 100644 --- a/drivers/net/can/sja1000/Kconfig +++ b/drivers/net/can/sja1000/Kconfig @@ -87,6 +87,7 @@ config CAN_PLX_PCI config CAN_SJA1000_ISA tristate "ISA Bus based legacy SJA1000 driver" + depends on ISA help This driver adds legacy support for SJA1000 chips connected to the ISA bus using I/O port, memory mapped or indirect access. diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig index 706bd59bf645..1fbab79e2be4 100644 --- a/drivers/net/ethernet/3com/Kconfig +++ b/drivers/net/ethernet/3com/Kconfig @@ -44,7 +44,7 @@ config 3C515 config PCMCIA_3C574 tristate "3Com 3c574 PCMCIA support" - depends on PCMCIA + depends on PCMCIA && HAS_IOPORT help Say Y here if you intend to attach a 3Com 3c574 or compatible PCMCIA (PC-card) Fast Ethernet card to your computer. @@ -54,7 +54,7 @@ config PCMCIA_3C574 config PCMCIA_3C589 tristate "3Com 3c589 PCMCIA support" - depends on PCMCIA + depends on PCMCIA && HAS_IOPORT help Say Y here if you intend to attach a 3Com 3c589 or compatible PCMCIA (PC-card) Ethernet card to your computer. diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig index a4130e643342..345f250781c6 100644 --- a/drivers/net/ethernet/8390/Kconfig +++ b/drivers/net/ethernet/8390/Kconfig @@ -19,7 +19,7 @@ if NET_VENDOR_8390 config PCMCIA_AXNET tristate "Asix AX88190 PCMCIA support" - depends on PCMCIA + depends on PCMCIA && HAS_IOPORT help Say Y here if you intend to attach an Asix AX88190-based PCMCIA (PC-card) Fast Ethernet card to your computer. These cards are @@ -117,7 +117,7 @@ config NE2000 config NE2K_PCI tristate "PCI NE2000 and clones support (see help)" - depends on PCI + depends on PCI && HAS_IOPORT select CRC32 help This driver is for NE2000 compatible PCI cards. It will not work @@ -146,7 +146,7 @@ config APNE config PCMCIA_PCNET tristate "NE2000 compatible PCMCIA support" - depends on PCMCIA + depends on PCMCIA && HAS_IOPORT select CRC32 help Say Y here if you intend to attach an NE2000 compatible PCMCIA diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index f8cc8925161c..b39c6f3e1eda 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -56,7 +56,7 @@ config LANCE config PCNET32 tristate "AMD PCnet32 PCI support" - depends on PCI + depends on PCI && HAS_IOPORT select CRC32 select MII help @@ -122,7 +122,7 @@ config MVME147_NET config PCMCIA_NMCLAN tristate "New Media PCMCIA support" - depends on PCMCIA + depends on PCMCIA && HAS_IOPORT help Say Y here if you intend to attach a New Media Ethernet or LiveWire PCMCIA (PC-card) Ethernet card to your computer. diff --git a/drivers/net/ethernet/fujitsu/Kconfig b/drivers/net/ethernet/fujitsu/Kconfig index 0a1400cb410a..06a28bce5d27 100644 --- a/drivers/net/ethernet/fujitsu/Kconfig +++ b/drivers/net/ethernet/fujitsu/Kconfig @@ -18,7 +18,7 @@ if NET_VENDOR_FUJITSU config PCMCIA_FMVJ18X tristate "Fujitsu FMV-J18x PCMCIA support" - depends on PCMCIA + depends on PCMCIA && HAS_IOPORT select CRC32 help Say Y here if you intend to attach a Fujitsu FMV-J18x or compatible diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 639fbb12bd35..6e7901e12699 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -41,7 +41,7 @@ config E100 config E1000 tristate "Intel(R) PRO/1000 Gigabit Ethernet support" - depends on PCI + depends on PCI && HAS_IOPORT help This driver supports Intel(R) PRO/1000 gigabit ethernet family of adapters. For more information on how to identify your adapter, go diff --git a/drivers/net/ethernet/sis/Kconfig b/drivers/net/ethernet/sis/Kconfig index 775d76d9890e..7e498bdbca73 100644 --- a/drivers/net/ethernet/sis/Kconfig +++ b/drivers/net/ethernet/sis/Kconfig @@ -19,7 +19,7 @@ if NET_VENDOR_SIS config SIS900 tristate "SiS 900/7016 PCI Fast Ethernet Adapter support" - depends on PCI + depends on PCI && HAS_IOPORT select CRC32 select MII help @@ -35,7 +35,7 @@ config SIS900 config SIS190 tristate "SiS190/SiS191 gigabit ethernet support" - depends on PCI + depends on PCI && HAS_IOPORT select CRC32 select MII help diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig index 5f22a8a4d27b..13ce9086a9ca 100644 --- a/drivers/net/ethernet/smsc/Kconfig +++ b/drivers/net/ethernet/smsc/Kconfig @@ -54,7 +54,7 @@ config SMC91X config PCMCIA_SMC91C92 tristate "SMC 91Cxx PCMCIA support" - depends on PCMCIA + depends on PCMCIA && HAS_IOPORT select CRC32 select MII help diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 1530d13984d4..cfe9bd6b1d62 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -167,7 +167,7 @@ config TI_KEYSTONE_NETCP_ETHSS config TLAN tristate "TI ThunderLAN support" - depends on (PCI || EISA) + depends on (PCI || EISA) && HAS_IOPORT help If you have a PCI Ethernet network card based on the ThunderLAN chip which is supported by this driver, say Y here. diff --git a/drivers/net/ethernet/via/Kconfig b/drivers/net/ethernet/via/Kconfig index da287ef65be7..00773f5e4d7e 100644 --- a/drivers/net/ethernet/via/Kconfig +++ b/drivers/net/ethernet/via/Kconfig @@ -20,6 +20,7 @@ config VIA_RHINE tristate "VIA Rhine support" depends on PCI || (OF_IRQ && GENERIC_PCI_IOMAP) depends on PCI || ARCH_VT8500 || COMPILE_TEST + depends on HAS_IOPORT depends on HAS_DMA select CRC32 select MII diff --git a/drivers/net/ethernet/xircom/Kconfig b/drivers/net/ethernet/xircom/Kconfig index 7497b9bea511..bfbdcf758afb 100644 --- a/drivers/net/ethernet/xircom/Kconfig +++ b/drivers/net/ethernet/xircom/Kconfig @@ -19,7 +19,7 @@ if NET_VENDOR_XIRCOM config PCMCIA_XIRC2PS tristate "Xircom 16-bit PCMCIA support" - depends on PCMCIA + depends on PCMCIA && HAS_IOPORT help Say Y here if you intend to attach a Xircom 16-bit PCMCIA (PC-card) Ethernet or Fast Ethernet card to your computer. diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c index 1fef8a9b1a0f..0fbbb7286008 100644 --- a/drivers/net/fddi/defxx.c +++ b/drivers/net/fddi/defxx.c @@ -254,7 +254,7 @@ static const char version[] = #define DFX_BUS_TC(dev) 0 #endif -#if defined(CONFIG_EISA) || defined(CONFIG_PCI) +#ifdef CONFIG_HAS_IOPORT #define dfx_use_mmio bp->mmio #else #define dfx_use_mmio true diff --git a/drivers/net/hamradio/Kconfig b/drivers/net/hamradio/Kconfig index 25b1f929c422..36a9aade9f33 100644 --- a/drivers/net/hamradio/Kconfig +++ b/drivers/net/hamradio/Kconfig @@ -83,7 +83,7 @@ config SCC_TRXECHO config BAYCOM_SER_FDX tristate "BAYCOM ser12 fullduplex driver for AX.25" - depends on AX25 && !S390 + depends on AX25 && HAS_IOPORT select CRC_CCITT help This is one of two drivers for Baycom style simple amateur radio @@ -103,7 +103,7 @@ config BAYCOM_SER_FDX config BAYCOM_SER_HDX tristate "BAYCOM ser12 halfduplex driver for AX.25" - depends on AX25 && !S390 + depends on AX25 && HAS_IOPORT select CRC_CCITT help This is one of two drivers for Baycom style simple amateur radio @@ -150,7 +150,7 @@ config BAYCOM_EPP config YAM tristate "YAM driver for AX.25" - depends on AX25 && !S390 + depends on AX25 && HAS_IOPORT help The YAM is a modem for packet radio which connects to the serial port and includes some of the functions of a Terminal Node diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 31ab2136cdf1..67be9857c86c 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -180,7 +180,7 @@ config C101 config FARSYNC tristate "FarSync T-Series support" - depends on HDLC && PCI + depends on HDLC && PCI && HAS_IOPORT help Support for the FarSync T-Series X.21 (and V.35/V.24) cards by FarSite Communications Ltd. diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig index fdb666607f10..e23a3dc14b93 100644 --- a/net/ax25/Kconfig +++ b/net/ax25/Kconfig @@ -4,7 +4,7 @@ # menuconfig HAMRADIO - depends on NET && !S390 + depends on NET bool "Amateur Radio support" help If you want to connect your Linux box to an amateur radio, answer Y -- cgit v1.2.3 From ec20b283009346adc66d5a460b1f8fb5adafbcfe Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 5 Apr 2024 22:05:00 +0200 Subject: ipv4: Set scope explicitly in ip_route_output(). Add a "scope" parameter to ip_route_output() so that callers don't have to override the tos parameter with the RTO_ONLINK flag if they want a local scope. This will allow converting flowi4_tos to dscp_t in the future, thus allowing static analysers to flag invalid interactions between "tos" (the DSCP bits) and ECN. Only three users ask for local scope (bonding, arp and atm). The others continue to use RT_SCOPE_UNIVERSE. While there, add a comment to warn users about the limitations of ip_route_output(). Signed-off-by: Guillaume Nault Acked-by: Leon Romanovsky # infiniband Signed-off-by: David S. Miller --- drivers/infiniband/hw/irdma/cm.c | 3 ++- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 3 ++- drivers/net/bonding/bond_main.c | 4 ++-- drivers/net/ethernet/broadcom/cnic.c | 3 ++- include/net/route.h | 9 ++++++++- net/atm/clip.c | 2 +- net/bridge/br_netfilter_hooks.c | 3 ++- net/ipv4/arp.c | 9 ++++++--- net/ipv4/igmp.c | 3 ++- net/mpls/af_mpls.c | 2 +- 10 files changed, 28 insertions(+), 13 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 1ee7a4e0d8d8..36bb7e5ce638 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1985,7 +1985,8 @@ static int irdma_addr_resolve_neigh(struct irdma_device *iwdev, u32 src_ip, __be32 dst_ipaddr = htonl(dst_ip); __be32 src_ipaddr = htonl(src_ip); - rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0); + rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0, + RT_SCOPE_UNIVERSE); if (IS_ERR(rt)) { ibdev_dbg(&iwdev->ibdev, "CM: ip_route_output fail\n"); return -EINVAL; diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index a51fc6854984..259303b9907c 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -447,7 +447,8 @@ qedr_addr4_resolve(struct qedr_dev *dev, struct rtable *rt = NULL; int rc = 0; - rt = ip_route_output(&init_net, dst_ip, src_ip, 0, 0); + rt = ip_route_output(&init_net, dst_ip, src_ip, 0, 0, + RT_SCOPE_UNIVERSE); if (IS_ERR(rt)) { DP_ERR(dev, "ip_route_output returned error\n"); return -EINVAL; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2c5ed0a7cb18..c9f0415f780a 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3014,8 +3014,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) tags = NULL; /* Find out through which dev should the packet go */ - rt = ip_route_output(dev_net(bond->dev), targets[i], 0, - RTO_ONLINK, 0); + rt = ip_route_output(dev_net(bond->dev), targets[i], 0, 0, 0, + RT_SCOPE_LINK); if (IS_ERR(rt)) { /* there's no route to target - try to send arp * probe to generate any traffic (arp_validate=0) diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index 3d63177e7e52..c2b4188a1ef1 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -3682,7 +3682,8 @@ static int cnic_get_v4_route(struct sockaddr_in *dst_addr, #if defined(CONFIG_INET) struct rtable *rt; - rt = ip_route_output(&init_net, dst_addr->sin_addr.s_addr, 0, 0, 0); + rt = ip_route_output(&init_net, dst_addr->sin_addr.s_addr, 0, 0, 0, + RT_SCOPE_UNIVERSE); if (!IS_ERR(rt)) { *dst = &rt->dst; return 0; diff --git a/include/net/route.h b/include/net/route.h index d4a0147942f1..315a8acee6c6 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -141,15 +141,22 @@ static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4 return ip_route_output_flow(net, flp, NULL); } +/* Simplistic IPv4 route lookup function. + * This is only suitable for some particular use cases: since the flowi4 + * structure is only partially set, it may bypass some fib-rules. + */ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr, - __be32 saddr, u8 tos, int oif) + __be32 saddr, u8 tos, int oif, + __u8 scope) { struct flowi4 fl4 = { .flowi4_oif = oif, .flowi4_tos = tos, + .flowi4_scope = scope, .daddr = daddr, .saddr = saddr, }; + return ip_route_output_key(net, &fl4); } diff --git a/net/atm/clip.c b/net/atm/clip.c index 294cb9efe3d3..362e8d25a79e 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -463,7 +463,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) unlink_clip_vcc(clip_vcc); return 0; } - rt = ip_route_output(&init_net, ip, 0, 1, 0); + rt = ip_route_output(&init_net, ip, 0, 0, 0, RT_SCOPE_LINK); if (IS_ERR(rt)) return PTR_ERR(rt); neigh = __neigh_lookup(&arp_tbl, &ip, rt->dst.dev, 1); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 35e10c5a766d..4242447be322 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -399,7 +399,8 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ goto free_skb; rt = ip_route_output(net, iph->daddr, 0, - RT_TOS(iph->tos), 0); + RT_TOS(iph->tos), 0, + RT_SCOPE_UNIVERSE); if (!IS_ERR(rt)) { /* - Bridged-and-DNAT'ed traffic doesn't * require ip_forwarding. */ diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 0d0d725b46ad..ab82ca104496 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -456,7 +456,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) /*unsigned long now; */ struct net *net = dev_net(dev); - rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev)); + rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev), + RT_SCOPE_UNIVERSE); if (IS_ERR(rt)) return 1; if (rt->dst.dev != dev) { @@ -1056,7 +1057,8 @@ static int arp_req_set(struct net *net, struct arpreq *r, if (r->arp_flags & ATF_PERM) r->arp_flags |= ATF_COM; if (!dev) { - struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); + struct rtable *rt = ip_route_output(net, ip, 0, 0, 0, + RT_SCOPE_LINK); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -1188,7 +1190,8 @@ static int arp_req_delete(struct net *net, struct arpreq *r, ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; if (!dev) { - struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); + struct rtable *rt = ip_route_output(net, ip, 0, 0, 0, + RT_SCOPE_LINK); if (IS_ERR(rt)) return PTR_ERR(rt); dev = rt->dst.dev; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 717e97a389a8..9bf09de6a2e7 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1842,7 +1842,8 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) if (!dev) { struct rtable *rt = ip_route_output(net, imr->imr_multiaddr.s_addr, - 0, 0, 0); + 0, 0, 0, + RT_SCOPE_UNIVERSE); if (!IS_ERR(rt)) { dev = rt->dst.dev; ip_rt_put(rt); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 6dab883a08dd..1303acb9cdd2 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -594,7 +594,7 @@ static struct net_device *inet_fib_lookup_dev(struct net *net, struct in_addr daddr; memcpy(&daddr, addr, sizeof(struct in_addr)); - rt = ip_route_output(net, daddr.s_addr, 0, 0, 0); + rt = ip_route_output(net, daddr.s_addr, 0, 0, 0, RT_SCOPE_UNIVERSE); if (IS_ERR(rt)) return ERR_CAST(rt); -- cgit v1.2.3 From ef460a8986fa0dae1cdcb158a06127f7af27c92d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 6 Apr 2024 15:16:00 -0500 Subject: net: lan743x: Fixup EEE The enabling/disabling of EEE in the MAC should happen as a result of auto negotiation. So move the enable/disable into lan743x_phy_link_status_change() which gets called by phylib when there is a change in link status. lan743x_ethtool_set_eee() now just programs the hardware with the LTI timer value, and passed everything else to phylib, so it can correctly setup the PHY. lan743x_ethtool_get_eee() relies on phylib doing most of the work, the MAC driver just adds the LTI timer value. Signed-off-by: Andrew Lunn Reviewed-by: Oleksij Rempel Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_ethtool.c | 21 --------------------- drivers/net/ethernet/microchip/lan743x_main.c | 7 +++++++ 2 files changed, 7 insertions(+), 21 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index 8a6ae171e375..d0f4ff4ee075 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -1076,15 +1076,10 @@ static int lan743x_ethtool_get_eee(struct net_device *netdev, buf = lan743x_csr_read(adapter, MAC_CR); if (buf & MAC_CR_EEE_EN_) { - eee->eee_enabled = true; - eee->tx_lpi_enabled = true; /* EEE_TX_LPI_REQ_DLY & tx_lpi_timer are same uSec unit */ buf = lan743x_csr_read(adapter, MAC_EEE_TX_LPI_REQ_DLY_CNT); eee->tx_lpi_timer = buf; } else { - eee->eee_enabled = false; - eee->eee_active = false; - eee->tx_lpi_enabled = false; eee->tx_lpi_timer = 0; } @@ -1097,7 +1092,6 @@ static int lan743x_ethtool_set_eee(struct net_device *netdev, struct lan743x_adapter *adapter; struct phy_device *phydev; u32 buf = 0; - int ret = 0; if (!netdev) return -EINVAL; @@ -1114,23 +1108,8 @@ static int lan743x_ethtool_set_eee(struct net_device *netdev, } if (eee->eee_enabled) { - ret = phy_init_eee(phydev, false); - if (ret) { - netif_err(adapter, drv, adapter->netdev, - "EEE initialization failed\n"); - return ret; - } - buf = (u32)eee->tx_lpi_timer; lan743x_csr_write(adapter, MAC_EEE_TX_LPI_REQ_DLY_CNT, buf); - - buf = lan743x_csr_read(adapter, MAC_CR); - buf |= MAC_CR_EEE_EN_; - lan743x_csr_write(adapter, MAC_CR, buf); - } else { - buf = lan743x_csr_read(adapter, MAC_CR); - buf &= ~MAC_CR_EEE_EN_; - lan743x_csr_write(adapter, MAC_CR, buf); } return phy_ethtool_set_eee(phydev, eee); diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 75a988c0bd79..d37a49cd5c69 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1462,6 +1462,13 @@ static void lan743x_phy_link_status_change(struct net_device *netdev) phydev->interface == PHY_INTERFACE_MODE_1000BASEX || phydev->interface == PHY_INTERFACE_MODE_2500BASEX) lan743x_sgmii_config(adapter); + + data = lan743x_csr_read(adapter, MAC_CR); + if (phydev->enable_tx_lpi) + data |= MAC_CR_EEE_EN_; + else + data &= ~MAC_CR_EEE_EN_; + lan743x_csr_write(adapter, MAC_CR, data); } } -- cgit v1.2.3 From 93197c7c509d0c872de8b4fa17b44db175633f5c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sat, 6 Apr 2024 04:05:38 +0300 Subject: mlx5/core: Support max_io_eqs for a function Implement get and set for the maximum IO event queues for SF and VF. This enables administrator on the hypervisor to control the maximum IO event queues which are typically used to derive the maximum and default number of net device channels or rdma device completion vectors. Reviewed-by: Shay Drory Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/esw/devlink_port.c | 4 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 7 ++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 97 ++++++++++++++++++++++ 3 files changed, 108 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index d8e739cbcbce..f8869c9b6802 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -98,6 +98,8 @@ static const struct devlink_port_ops mlx5_esw_pf_vf_dl_port_ops = { .port_fn_ipsec_packet_get = mlx5_devlink_port_fn_ipsec_packet_get, .port_fn_ipsec_packet_set = mlx5_devlink_port_fn_ipsec_packet_set, #endif /* CONFIG_XFRM_OFFLOAD */ + .port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get, + .port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set, }; static void mlx5_esw_offloads_sf_devlink_port_attrs_set(struct mlx5_eswitch *esw, @@ -143,6 +145,8 @@ static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = { .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get, .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set, #endif + .port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get, + .port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set, }; int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx5_vport *vport) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 349e28a6dd8d..50ce1ea20dd4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -573,6 +573,13 @@ int mlx5_devlink_port_fn_ipsec_packet_get(struct devlink_port *port, bool *is_en int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, bool enable, struct netlink_ext_ack *extack); #endif /* CONFIG_XFRM_OFFLOAD */ +int mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, + u32 *max_io_eqs, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, + u32 max_io_eqs, + struct netlink_ext_ack *extack); + void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index baaae628b0a0..20927f65ac2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -66,6 +66,8 @@ #define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1) +#define MLX5_ESW_MAX_CTRL_EQS 4 + static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = { .max_fte = MLX5_ESW_VPORT_TBL_SIZE, .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS, @@ -4557,3 +4559,98 @@ unlock: return err; } #endif /* CONFIG_XFRM_OFFLOAD */ + +int +mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32 *max_io_eqs, + struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + u16 vport_num = vport->vport; + struct mlx5_eswitch *esw; + void *query_ctx; + void *hca_caps; + u32 max_eqs; + int err; + + esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + NL_SET_ERR_MSG_MOD(extack, + "Device doesn't support VHCA management"); + return -EOPNOTSUPP; + } + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + mutex_lock(&esw->state_lock); + err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx, + MLX5_CAP_GENERAL); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); + goto out; + } + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + max_eqs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_eqs); + if (max_eqs < MLX5_ESW_MAX_CTRL_EQS) + *max_io_eqs = 0; + else + *max_io_eqs = max_eqs - MLX5_ESW_MAX_CTRL_EQS; +out: + mutex_unlock(&esw->state_lock); + kfree(query_ctx); + return err; +} + +int +mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs, + struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + u16 vport_num = vport->vport; + struct mlx5_eswitch *esw; + void *query_ctx; + void *hca_caps; + u16 max_eqs; + int err; + + esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + NL_SET_ERR_MSG_MOD(extack, + "Device doesn't support VHCA management"); + return -EOPNOTSUPP; + } + + if (check_add_overflow(max_io_eqs, MLX5_ESW_MAX_CTRL_EQS, &max_eqs)) { + NL_SET_ERR_MSG_MOD(extack, "Supplied value out of range"); + return -EINVAL; + } + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + mutex_lock(&esw->state_lock); + err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx, + MLX5_CAP_GENERAL); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); + goto out; + } + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + MLX5_SET(cmd_hca_cap, hca_caps, max_num_eqs, max_eqs); + + err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); + if (err) + NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps"); + +out: + mutex_unlock(&esw->state_lock); + kfree(query_ctx); + return err; +} -- cgit v1.2.3 From 39f59c72ad3a1eaab9a60f0671bc94d2bc826d21 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 7 Apr 2024 23:19:25 +0200 Subject: r8169: add support for RTL8168M MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A user reported an unknown chip version. According to the r8168 vendor driver it's called RTL8168M, but handling is identical to RTL8168H. So let's simply treat it as RTL8168H. Tested-by: Евгений Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 6f1e6f386b7b..60540ca63464 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2227,6 +2227,8 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) * the wild. Let's disable detection. * { 0x7cf, 0x540, RTL_GIGA_MAC_VER_45 }, */ + /* Realtek calls it RTL8168M, but it's handled like RTL8168H */ + { 0x7cf, 0x6c0, RTL_GIGA_MAC_VER_46 }, /* 8168G family. */ { 0x7cf, 0x5c8, RTL_GIGA_MAC_VER_44 }, -- cgit v1.2.3 From b2c289415b2b2ef112b78d5e73b4acecf5db409e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 25 Mar 2024 17:29:49 -0500 Subject: e1000e: Remove redundant runtime resume for ethtool_ops e60b22c5b7e5 ("e1000e: fix accessing to suspended device") added ethtool_ops.begin() and .complete(), which used pm_runtime_get_sync() to resume suspended devices before any ethtool_ops callback and allow suspend after it completed. 3ef672ab1862 ("e1000e: ethtool unnecessarily takes device out of RPM suspend") removed ethtool_ops.begin() and .complete() and instead did pm_runtime_get_sync() only in the individual ethtool_ops callbacks that access device registers. Subsequently, f32a21376573 ("ethtool: runtime-resume netdev parent before ethtool ioctl ops") added pm_runtime_get_sync() in the dev_ethtool() path, so the device is resumed before *any* ethtool_ops callback, as it was before 3ef672ab1862. Remove most runtime resumes from ethtool_ops, which are now redundant because the resume has already been done by dev_ethtool(). This is essentially a revert of 3ef672ab1862 ("e1000e: ethtool unnecessarily takes device out of RPM suspend"). There are a couple subtleties: - Prior to 3ef672ab1862, the device was resumed only for the duration of a single ethtool callback. 3ef672ab1862 changed e1000_set_phys_id() so the device was resumed for ETHTOOL_ID_ACTIVE and remained resumed until a subsequent callback for ETHTOOL_ID_INACTIVE. Preserve that part of 3ef672ab1862 so the device will not be runtime suspended while in the ETHTOOL_ID_ACTIVE state. - 3ef672ab1862 added "if (!pm_runtime_suspended())" in before reading the STATUS register in e1000_get_settings(). This was racy and is now unnecessary because dev_ethtool() has resumed the device already, so revert that. Signed-off-by: Bjorn Helgaas Reviewed-by: Simon Horman Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/ethtool.c | 62 +++-------------------------- 1 file changed, 6 insertions(+), 56 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index dc553c51d79a..85da20778e0f 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -156,7 +156,7 @@ static int e1000_get_link_ksettings(struct net_device *netdev, speed = adapter->link_speed; cmd->base.duplex = adapter->link_duplex - 1; } - } else if (!pm_runtime_suspended(netdev->dev.parent)) { + } else { u32 status = er32(STATUS); if (status & E1000_STATUS_LU) { @@ -274,16 +274,13 @@ static int e1000_set_link_ksettings(struct net_device *netdev, ethtool_convert_link_mode_to_legacy_u32(&advertising, cmd->link_modes.advertising); - pm_runtime_get_sync(netdev->dev.parent); - /* When SoL/IDER sessions are active, autoneg/speed/duplex * cannot be changed */ if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw)) { e_err("Cannot change link characteristics when SoL/IDER is active.\n"); - ret_val = -EINVAL; - goto out; + return -EINVAL; } /* MDI setting is only allowed when autoneg enabled because @@ -291,16 +288,13 @@ static int e1000_set_link_ksettings(struct net_device *netdev, * duplex is forced. */ if (cmd->base.eth_tp_mdix_ctrl) { - if (hw->phy.media_type != e1000_media_type_copper) { - ret_val = -EOPNOTSUPP; - goto out; - } + if (hw->phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; if ((cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && (cmd->base.autoneg != AUTONEG_ENABLE)) { e_err("forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); - ret_val = -EINVAL; - goto out; + return -EINVAL; } } @@ -347,7 +341,6 @@ static int e1000_set_link_ksettings(struct net_device *netdev, } out: - pm_runtime_put_sync(netdev->dev.parent); clear_bit(__E1000_RESETTING, &adapter->state); return ret_val; } @@ -383,8 +376,6 @@ static int e1000_set_pauseparam(struct net_device *netdev, while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) usleep_range(1000, 2000); - pm_runtime_get_sync(netdev->dev.parent); - if (adapter->fc_autoneg == AUTONEG_ENABLE) { hw->fc.requested_mode = e1000_fc_default; if (netif_running(adapter->netdev)) { @@ -417,7 +408,6 @@ static int e1000_set_pauseparam(struct net_device *netdev, } out: - pm_runtime_put_sync(netdev->dev.parent); clear_bit(__E1000_RESETTING, &adapter->state); return retval; } @@ -448,8 +438,6 @@ static void e1000_get_regs(struct net_device *netdev, u32 *regs_buff = p; u16 phy_data; - pm_runtime_get_sync(netdev->dev.parent); - memset(p, 0, E1000_REGS_LEN * sizeof(u32)); regs->version = (1u << 24) | @@ -495,8 +483,6 @@ static void e1000_get_regs(struct net_device *netdev, e1e_rphy(hw, MII_STAT1000, &phy_data); regs_buff[24] = (u32)phy_data; /* phy local receiver status */ regs_buff[25] = regs_buff[24]; /* phy remote receiver status */ - - pm_runtime_put_sync(netdev->dev.parent); } static int e1000_get_eeprom_len(struct net_device *netdev) @@ -529,8 +515,6 @@ static int e1000_get_eeprom(struct net_device *netdev, if (!eeprom_buff) return -ENOMEM; - pm_runtime_get_sync(netdev->dev.parent); - if (hw->nvm.type == e1000_nvm_eeprom_spi) { ret_val = e1000_read_nvm(hw, first_word, last_word - first_word + 1, @@ -544,8 +528,6 @@ static int e1000_get_eeprom(struct net_device *netdev, } } - pm_runtime_put_sync(netdev->dev.parent); - if (ret_val) { /* a read error occurred, throw away the result */ memset(eeprom_buff, 0xff, sizeof(u16) * @@ -595,8 +577,6 @@ static int e1000_set_eeprom(struct net_device *netdev, ptr = (void *)eeprom_buff; - pm_runtime_get_sync(netdev->dev.parent); - if (eeprom->offset & 1) { /* need read/modify/write of first changed EEPROM word */ /* only the second byte of the word is being modified */ @@ -637,7 +617,6 @@ static int e1000_set_eeprom(struct net_device *netdev, ret_val = e1000e_update_nvm_checksum(hw); out: - pm_runtime_put_sync(netdev->dev.parent); kfree(eeprom_buff); return ret_val; } @@ -733,8 +712,6 @@ static int e1000_set_ringparam(struct net_device *netdev, } } - pm_runtime_get_sync(netdev->dev.parent); - e1000e_down(adapter, true); /* We can't just free everything and then setup again, because the @@ -773,7 +750,6 @@ err_setup_rx: e1000e_free_tx_resources(temp_tx); err_setup: e1000e_up(adapter); - pm_runtime_put_sync(netdev->dev.parent); free_temp: vfree(temp_tx); vfree(temp_rx); @@ -1816,8 +1792,6 @@ static void e1000_diag_test(struct net_device *netdev, u8 autoneg; bool if_running = netif_running(netdev); - pm_runtime_get_sync(netdev->dev.parent); - set_bit(__E1000_TESTING, &adapter->state); if (!if_running) { @@ -1903,8 +1877,6 @@ static void e1000_diag_test(struct net_device *netdev, } msleep_interruptible(4 * 1000); - - pm_runtime_put_sync(netdev->dev.parent); } static void e1000_get_wol(struct net_device *netdev, @@ -2046,15 +2018,11 @@ static int e1000_set_coalesce(struct net_device *netdev, adapter->itr_setting = adapter->itr & ~3; } - pm_runtime_get_sync(netdev->dev.parent); - if (adapter->itr_setting != 0) e1000e_write_itr(adapter, adapter->itr); else e1000e_write_itr(adapter, 0); - pm_runtime_put_sync(netdev->dev.parent); - return 0; } @@ -2068,9 +2036,7 @@ static int e1000_nway_reset(struct net_device *netdev) if (!adapter->hw.mac.autoneg) return -EINVAL; - pm_runtime_get_sync(netdev->dev.parent); e1000e_reinit_locked(adapter); - pm_runtime_put_sync(netdev->dev.parent); return 0; } @@ -2084,12 +2050,8 @@ static void e1000_get_ethtool_stats(struct net_device *netdev, int i; char *p = NULL; - pm_runtime_get_sync(netdev->dev.parent); - dev_get_stats(netdev, &net_stats); - pm_runtime_put_sync(netdev->dev.parent); - for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) { switch (e1000_gstrings_stats[i].type) { case NETDEV_STATS: @@ -2146,9 +2108,7 @@ static int e1000_get_rxnfc(struct net_device *netdev, struct e1000_hw *hw = &adapter->hw; u32 mrqc; - pm_runtime_get_sync(netdev->dev.parent); mrqc = er32(MRQC); - pm_runtime_put_sync(netdev->dev.parent); if (!(mrqc & E1000_MRQC_RSS_FIELD_MASK)) return 0; @@ -2211,13 +2171,9 @@ static int e1000e_get_eee(struct net_device *netdev, struct ethtool_keee *edata) return -EOPNOTSUPP; } - pm_runtime_get_sync(netdev->dev.parent); - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) { - pm_runtime_put_sync(netdev->dev.parent); + if (ret_val) return -EBUSY; - } /* EEE Capability */ ret_val = e1000_read_emi_reg_locked(hw, cap_addr, &phy_data); @@ -2257,8 +2213,6 @@ release: if (ret_val) ret_val = -ENODATA; - pm_runtime_put_sync(netdev->dev.parent); - return ret_val; } @@ -2299,16 +2253,12 @@ static int e1000e_set_eee(struct net_device *netdev, struct ethtool_keee *edata) hw->dev_spec.ich8lan.eee_disable = !edata->eee_enabled; - pm_runtime_get_sync(netdev->dev.parent); - /* reset the link */ if (netif_running(netdev)) e1000e_reinit_locked(adapter); else e1000e_reset(adapter); - pm_runtime_put_sync(netdev->dev.parent); - return 0; } -- cgit v1.2.3 From 461359c4f3700b4bc4da0c3358be423296ee6561 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 25 Mar 2024 17:29:50 -0500 Subject: igb: Remove redundant runtime resume for ethtool_ops 749ab2cd1270 ("igb: add basic runtime PM support") added ethtool_ops.begin() and .complete(), which used pm_runtime_get_sync() to resume suspended devices before any ethtool_ops callback and allow suspend after it completed. Subsequently, f32a21376573 ("ethtool: runtime-resume netdev parent before ethtool ioctl ops") added pm_runtime_get_sync() in the dev_ethtool() path, so the device is resumed before any ethtool_ops callback even if the driver didn't supply a .begin() callback. Remove the .begin() and .complete() callbacks, which are now redundant because dev_ethtool() already resumes the device. Signed-off-by: Bjorn Helgaas Reviewed-by: Simon Horman Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 99977a22b843..61d72250c0ed 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -3272,19 +3272,6 @@ static int igb_get_module_eeprom(struct net_device *netdev, return 0; } -static int igb_ethtool_begin(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - pm_runtime_get_sync(&adapter->pdev->dev); - return 0; -} - -static void igb_ethtool_complete(struct net_device *netdev) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - pm_runtime_put(&adapter->pdev->dev); -} - static u32 igb_get_rxfh_indir_size(struct net_device *netdev) { return IGB_RETA_SIZE; @@ -3508,8 +3495,6 @@ static const struct ethtool_ops igb_ethtool_ops = { .set_channels = igb_set_channels, .get_priv_flags = igb_get_priv_flags, .set_priv_flags = igb_set_priv_flags, - .begin = igb_ethtool_begin, - .complete = igb_ethtool_complete, .get_link_ksettings = igb_get_link_ksettings, .set_link_ksettings = igb_set_link_ksettings, }; -- cgit v1.2.3 From 75f16e06dfb8035a89548ca33d26b362c934acc9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 25 Mar 2024 17:29:51 -0500 Subject: igc: Remove redundant runtime resume for ethtool ops 8c5ad0dae93c ("igc: Add ethtool support") added ethtool_ops.begin() and .complete(), which used pm_runtime_get_sync() to resume suspended devices before any ethtool_ops callback and allow suspend after it completed. Subsequently, f32a21376573 ("ethtool: runtime-resume netdev parent before ethtool ioctl ops") added pm_runtime_get_sync() in the dev_ethtool() path, so the device is resumed before any ethtool_ops callback even if the driver didn't supply a .begin() callback. Remove the .begin() and .complete() callbacks, which are now redundant because dev_ethtool() already resumes the device. Signed-off-by: Bjorn Helgaas Reviewed-by: Simon Horman Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 1a64f1ca6ca8..f2c4f1966bb0 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1711,21 +1711,6 @@ static int igc_ethtool_set_eee(struct net_device *netdev, return 0; } -static int igc_ethtool_begin(struct net_device *netdev) -{ - struct igc_adapter *adapter = netdev_priv(netdev); - - pm_runtime_get_sync(&adapter->pdev->dev); - return 0; -} - -static void igc_ethtool_complete(struct net_device *netdev) -{ - struct igc_adapter *adapter = netdev_priv(netdev); - - pm_runtime_put(&adapter->pdev->dev); -} - static int igc_ethtool_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { @@ -2025,8 +2010,6 @@ static const struct ethtool_ops igc_ethtool_ops = { .set_priv_flags = igc_ethtool_set_priv_flags, .get_eee = igc_ethtool_get_eee, .set_eee = igc_ethtool_set_eee, - .begin = igc_ethtool_begin, - .complete = igc_ethtool_complete, .get_link_ksettings = igc_ethtool_get_link_ksettings, .set_link_ksettings = igc_ethtool_set_link_ksettings, .self_test = igc_ethtool_diag_test, -- cgit v1.2.3 From e2dc7bfd677fc454668eb5bf0eab74ea35691040 Mon Sep 17 00:00:00 2001 From: Diogo Ivo Date: Wed, 3 Apr 2024 11:48:13 +0100 Subject: net: ti: icssg-prueth: Move common functions into a separate file In order to allow code sharing between Silicon Revisions 1.0 and 2.0 move all functions that can be shared into a common file. This commit introduces no functional changes. Signed-off-by: Diogo Ivo Reviewed-by: MD Danish Anwar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/Makefile | 1 + drivers/net/ethernet/ti/icssg/icssg_common.c | 1198 ++++++++++++++++++++++++++ drivers/net/ethernet/ti/icssg/icssg_prueth.c | 1183 ------------------------- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 59 ++ 4 files changed, 1258 insertions(+), 1183 deletions(-) create mode 100644 drivers/net/ethernet/ti/icssg/icssg_common.c (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile index d8590304f3df..4876f20aa495 100644 --- a/drivers/net/ethernet/ti/Makefile +++ b/drivers/net/ethernet/ti/Makefile @@ -33,6 +33,7 @@ obj-$(CONFIG_TI_K3_AM65_CPTS) += am65-cpts.o obj-$(CONFIG_TI_ICSSG_PRUETH) += icssg-prueth.o icssg-prueth-y := icssg/icssg_prueth.o \ + icssg/icssg_common.o \ icssg/icssg_classifier.o \ icssg/icssg_queues.o \ icssg/icssg_config.o \ diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c new file mode 100644 index 000000000000..99f27ecc9352 --- /dev/null +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -0,0 +1,1198 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Texas Instruments ICSSG Ethernet Driver + * + * Copyright (C) 2018-2022 Texas Instruments Incorporated - https://www.ti.com/ + * Copyright (C) Siemens AG, 2024 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "icssg_prueth.h" +#include "../k3-cppi-desc-pool.h" + +/* Netif debug messages possible */ +#define PRUETH_EMAC_DEBUG (NETIF_MSG_DRV | \ + NETIF_MSG_PROBE | \ + NETIF_MSG_LINK | \ + NETIF_MSG_TIMER | \ + NETIF_MSG_IFDOWN | \ + NETIF_MSG_IFUP | \ + NETIF_MSG_RX_ERR | \ + NETIF_MSG_TX_ERR | \ + NETIF_MSG_TX_QUEUED | \ + NETIF_MSG_INTR | \ + NETIF_MSG_TX_DONE | \ + NETIF_MSG_RX_STATUS | \ + NETIF_MSG_PKTDATA | \ + NETIF_MSG_HW | \ + NETIF_MSG_WOL) + +#define prueth_napi_to_emac(napi) container_of(napi, struct prueth_emac, napi_rx) + +void prueth_cleanup_rx_chns(struct prueth_emac *emac, + struct prueth_rx_chn *rx_chn, + int max_rflows) +{ + if (rx_chn->desc_pool) + k3_cppi_desc_pool_destroy(rx_chn->desc_pool); + + if (rx_chn->rx_chn) + k3_udma_glue_release_rx_chn(rx_chn->rx_chn); +} + +void prueth_cleanup_tx_chns(struct prueth_emac *emac) +{ + int i; + + for (i = 0; i < emac->tx_ch_num; i++) { + struct prueth_tx_chn *tx_chn = &emac->tx_chns[i]; + + if (tx_chn->desc_pool) + k3_cppi_desc_pool_destroy(tx_chn->desc_pool); + + if (tx_chn->tx_chn) + k3_udma_glue_release_tx_chn(tx_chn->tx_chn); + + /* Assume prueth_cleanup_tx_chns() is called at the + * end after all channel resources are freed + */ + memset(tx_chn, 0, sizeof(*tx_chn)); + } +} + +void prueth_ndev_del_tx_napi(struct prueth_emac *emac, int num) +{ + int i; + + for (i = 0; i < num; i++) { + struct prueth_tx_chn *tx_chn = &emac->tx_chns[i]; + + if (tx_chn->irq) + free_irq(tx_chn->irq, tx_chn); + netif_napi_del(&tx_chn->napi_tx); + } +} + +void prueth_xmit_free(struct prueth_tx_chn *tx_chn, + struct cppi5_host_desc_t *desc) +{ + struct cppi5_host_desc_t *first_desc, *next_desc; + dma_addr_t buf_dma, next_desc_dma; + u32 buf_dma_len; + + first_desc = desc; + next_desc = first_desc; + + cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); + + dma_unmap_single(tx_chn->dma_dev, buf_dma, buf_dma_len, + DMA_TO_DEVICE); + + next_desc_dma = cppi5_hdesc_get_next_hbdesc(first_desc); + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma); + while (next_desc_dma) { + next_desc = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, + next_desc_dma); + cppi5_hdesc_get_obuf(next_desc, &buf_dma, &buf_dma_len); + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); + + dma_unmap_page(tx_chn->dma_dev, buf_dma, buf_dma_len, + DMA_TO_DEVICE); + + next_desc_dma = cppi5_hdesc_get_next_hbdesc(next_desc); + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma); + + k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); + } + + k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc); +} + +int emac_tx_complete_packets(struct prueth_emac *emac, int chn, + int budget) +{ + struct net_device *ndev = emac->ndev; + struct cppi5_host_desc_t *desc_tx; + struct netdev_queue *netif_txq; + struct prueth_tx_chn *tx_chn; + unsigned int total_bytes = 0; + struct sk_buff *skb; + dma_addr_t desc_dma; + int res, num_tx = 0; + void **swdata; + + tx_chn = &emac->tx_chns[chn]; + + while (true) { + res = k3_udma_glue_pop_tx_chn(tx_chn->tx_chn, &desc_dma); + if (res == -ENODATA) + break; + + /* teardown completion */ + if (cppi5_desc_is_tdcm(desc_dma)) { + if (atomic_dec_and_test(&emac->tdown_cnt)) + complete(&emac->tdown_complete); + break; + } + + desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, + desc_dma); + swdata = cppi5_hdesc_get_swdata(desc_tx); + + skb = *(swdata); + prueth_xmit_free(tx_chn, desc_tx); + + ndev = skb->dev; + ndev->stats.tx_packets++; + ndev->stats.tx_bytes += skb->len; + total_bytes += skb->len; + napi_consume_skb(skb, budget); + num_tx++; + } + + if (!num_tx) + return 0; + + netif_txq = netdev_get_tx_queue(ndev, chn); + netdev_tx_completed_queue(netif_txq, num_tx, total_bytes); + + if (netif_tx_queue_stopped(netif_txq)) { + /* If the TX queue was stopped, wake it now + * if we have enough room. + */ + __netif_tx_lock(netif_txq, smp_processor_id()); + if (netif_running(ndev) && + (k3_cppi_desc_pool_avail(tx_chn->desc_pool) >= + MAX_SKB_FRAGS)) + netif_tx_wake_queue(netif_txq); + __netif_tx_unlock(netif_txq); + } + + return num_tx; +} + +static int emac_napi_tx_poll(struct napi_struct *napi_tx, int budget) +{ + struct prueth_tx_chn *tx_chn = prueth_napi_to_tx_chn(napi_tx); + struct prueth_emac *emac = tx_chn->emac; + int num_tx_packets; + + num_tx_packets = emac_tx_complete_packets(emac, tx_chn->id, budget); + + if (num_tx_packets >= budget) + return budget; + + if (napi_complete_done(napi_tx, num_tx_packets)) + enable_irq(tx_chn->irq); + + return num_tx_packets; +} + +static irqreturn_t prueth_tx_irq(int irq, void *dev_id) +{ + struct prueth_tx_chn *tx_chn = dev_id; + + disable_irq_nosync(irq); + napi_schedule(&tx_chn->napi_tx); + + return IRQ_HANDLED; +} + +int prueth_ndev_add_tx_napi(struct prueth_emac *emac) +{ + struct prueth *prueth = emac->prueth; + int i, ret; + + for (i = 0; i < emac->tx_ch_num; i++) { + struct prueth_tx_chn *tx_chn = &emac->tx_chns[i]; + + netif_napi_add_tx(emac->ndev, &tx_chn->napi_tx, emac_napi_tx_poll); + ret = request_irq(tx_chn->irq, prueth_tx_irq, + IRQF_TRIGGER_HIGH, tx_chn->name, + tx_chn); + if (ret) { + netif_napi_del(&tx_chn->napi_tx); + dev_err(prueth->dev, "unable to request TX IRQ %d\n", + tx_chn->irq); + goto fail; + } + } + + return 0; +fail: + prueth_ndev_del_tx_napi(emac, i); + return ret; +} + +int prueth_init_tx_chns(struct prueth_emac *emac) +{ + static const struct k3_ring_cfg ring_cfg = { + .elm_size = K3_RINGACC_RING_ELSIZE_8, + .mode = K3_RINGACC_RING_MODE_RING, + .flags = 0, + .size = PRUETH_MAX_TX_DESC, + }; + struct k3_udma_glue_tx_channel_cfg tx_cfg; + struct device *dev = emac->prueth->dev; + struct net_device *ndev = emac->ndev; + int ret, slice, i; + u32 hdesc_size; + + slice = prueth_emac_slice(emac); + if (slice < 0) + return slice; + + init_completion(&emac->tdown_complete); + + hdesc_size = cppi5_hdesc_calc_size(true, PRUETH_NAV_PS_DATA_SIZE, + PRUETH_NAV_SW_DATA_SIZE); + memset(&tx_cfg, 0, sizeof(tx_cfg)); + tx_cfg.swdata_size = PRUETH_NAV_SW_DATA_SIZE; + tx_cfg.tx_cfg = ring_cfg; + tx_cfg.txcq_cfg = ring_cfg; + + for (i = 0; i < emac->tx_ch_num; i++) { + struct prueth_tx_chn *tx_chn = &emac->tx_chns[i]; + + /* To differentiate channels for SLICE0 vs SLICE1 */ + snprintf(tx_chn->name, sizeof(tx_chn->name), + "tx%d-%d", slice, i); + + tx_chn->emac = emac; + tx_chn->id = i; + tx_chn->descs_num = PRUETH_MAX_TX_DESC; + + tx_chn->tx_chn = + k3_udma_glue_request_tx_chn(dev, tx_chn->name, + &tx_cfg); + if (IS_ERR(tx_chn->tx_chn)) { + ret = PTR_ERR(tx_chn->tx_chn); + tx_chn->tx_chn = NULL; + netdev_err(ndev, + "Failed to request tx dma ch: %d\n", ret); + goto fail; + } + + tx_chn->dma_dev = k3_udma_glue_tx_get_dma_device(tx_chn->tx_chn); + tx_chn->desc_pool = + k3_cppi_desc_pool_create_name(tx_chn->dma_dev, + tx_chn->descs_num, + hdesc_size, + tx_chn->name); + if (IS_ERR(tx_chn->desc_pool)) { + ret = PTR_ERR(tx_chn->desc_pool); + tx_chn->desc_pool = NULL; + netdev_err(ndev, "Failed to create tx pool: %d\n", ret); + goto fail; + } + + ret = k3_udma_glue_tx_get_irq(tx_chn->tx_chn); + if (ret < 0) { + netdev_err(ndev, "failed to get tx irq\n"); + goto fail; + } + tx_chn->irq = ret; + + snprintf(tx_chn->name, sizeof(tx_chn->name), "%s-tx%d", + dev_name(dev), tx_chn->id); + } + + return 0; + +fail: + prueth_cleanup_tx_chns(emac); + return ret; +} + +int prueth_init_rx_chns(struct prueth_emac *emac, + struct prueth_rx_chn *rx_chn, + char *name, u32 max_rflows, + u32 max_desc_num) +{ + struct k3_udma_glue_rx_channel_cfg rx_cfg; + struct device *dev = emac->prueth->dev; + struct net_device *ndev = emac->ndev; + u32 fdqring_id, hdesc_size; + int i, ret = 0, slice; + + slice = prueth_emac_slice(emac); + if (slice < 0) + return slice; + + /* To differentiate channels for SLICE0 vs SLICE1 */ + snprintf(rx_chn->name, sizeof(rx_chn->name), "%s%d", name, slice); + + hdesc_size = cppi5_hdesc_calc_size(true, PRUETH_NAV_PS_DATA_SIZE, + PRUETH_NAV_SW_DATA_SIZE); + memset(&rx_cfg, 0, sizeof(rx_cfg)); + rx_cfg.swdata_size = PRUETH_NAV_SW_DATA_SIZE; + rx_cfg.flow_id_num = max_rflows; + rx_cfg.flow_id_base = -1; /* udmax will auto select flow id base */ + + /* init all flows */ + rx_chn->dev = dev; + rx_chn->descs_num = max_desc_num; + + rx_chn->rx_chn = k3_udma_glue_request_rx_chn(dev, rx_chn->name, + &rx_cfg); + if (IS_ERR(rx_chn->rx_chn)) { + ret = PTR_ERR(rx_chn->rx_chn); + rx_chn->rx_chn = NULL; + netdev_err(ndev, "Failed to request rx dma ch: %d\n", ret); + goto fail; + } + + rx_chn->dma_dev = k3_udma_glue_rx_get_dma_device(rx_chn->rx_chn); + rx_chn->desc_pool = k3_cppi_desc_pool_create_name(rx_chn->dma_dev, + rx_chn->descs_num, + hdesc_size, + rx_chn->name); + if (IS_ERR(rx_chn->desc_pool)) { + ret = PTR_ERR(rx_chn->desc_pool); + rx_chn->desc_pool = NULL; + netdev_err(ndev, "Failed to create rx pool: %d\n", ret); + goto fail; + } + + emac->rx_flow_id_base = k3_udma_glue_rx_get_flow_id_base(rx_chn->rx_chn); + netdev_dbg(ndev, "flow id base = %d\n", emac->rx_flow_id_base); + + fdqring_id = K3_RINGACC_RING_ID_ANY; + for (i = 0; i < rx_cfg.flow_id_num; i++) { + struct k3_ring_cfg rxring_cfg = { + .elm_size = K3_RINGACC_RING_ELSIZE_8, + .mode = K3_RINGACC_RING_MODE_RING, + .flags = 0, + }; + struct k3_ring_cfg fdqring_cfg = { + .elm_size = K3_RINGACC_RING_ELSIZE_8, + .flags = K3_RINGACC_RING_SHARED, + }; + struct k3_udma_glue_rx_flow_cfg rx_flow_cfg = { + .rx_cfg = rxring_cfg, + .rxfdq_cfg = fdqring_cfg, + .ring_rxq_id = K3_RINGACC_RING_ID_ANY, + .src_tag_lo_sel = + K3_UDMA_GLUE_SRC_TAG_LO_USE_REMOTE_SRC_TAG, + }; + + rx_flow_cfg.ring_rxfdq0_id = fdqring_id; + rx_flow_cfg.rx_cfg.size = max_desc_num; + rx_flow_cfg.rxfdq_cfg.size = max_desc_num; + rx_flow_cfg.rxfdq_cfg.mode = emac->prueth->pdata.fdqring_mode; + + ret = k3_udma_glue_rx_flow_init(rx_chn->rx_chn, + i, &rx_flow_cfg); + if (ret) { + netdev_err(ndev, "Failed to init rx flow%d %d\n", + i, ret); + goto fail; + } + if (!i) + fdqring_id = k3_udma_glue_rx_flow_get_fdq_id(rx_chn->rx_chn, + i); + rx_chn->irq[i] = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i); + if (rx_chn->irq[i] <= 0) { + ret = rx_chn->irq[i]; + netdev_err(ndev, "Failed to get rx dma irq"); + goto fail; + } + } + + return 0; + +fail: + prueth_cleanup_rx_chns(emac, rx_chn, max_rflows); + return ret; +} + +int prueth_dma_rx_push(struct prueth_emac *emac, + struct sk_buff *skb, + struct prueth_rx_chn *rx_chn) +{ + struct net_device *ndev = emac->ndev; + struct cppi5_host_desc_t *desc_rx; + u32 pkt_len = skb_tailroom(skb); + dma_addr_t desc_dma; + dma_addr_t buf_dma; + void **swdata; + + desc_rx = k3_cppi_desc_pool_alloc(rx_chn->desc_pool); + if (!desc_rx) { + netdev_err(ndev, "rx push: failed to allocate descriptor\n"); + return -ENOMEM; + } + desc_dma = k3_cppi_desc_pool_virt2dma(rx_chn->desc_pool, desc_rx); + + buf_dma = dma_map_single(rx_chn->dma_dev, skb->data, pkt_len, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rx_chn->dma_dev, buf_dma))) { + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); + netdev_err(ndev, "rx push: failed to map rx pkt buffer\n"); + return -EINVAL; + } + + cppi5_hdesc_init(desc_rx, CPPI5_INFO0_HDESC_EPIB_PRESENT, + PRUETH_NAV_PS_DATA_SIZE); + k3_udma_glue_rx_dma_to_cppi5_addr(rx_chn->rx_chn, &buf_dma); + cppi5_hdesc_attach_buf(desc_rx, buf_dma, skb_tailroom(skb), buf_dma, skb_tailroom(skb)); + + swdata = cppi5_hdesc_get_swdata(desc_rx); + *swdata = skb; + + return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, 0, + desc_rx, desc_dma); +} + +u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns) +{ + u32 iepcount_lo, iepcount_hi, hi_rollover_count; + u64 ns; + + iepcount_lo = lo & GENMASK(19, 0); + iepcount_hi = (hi & GENMASK(11, 0)) << 12 | lo >> 20; + hi_rollover_count = hi >> 11; + + ns = ((u64)hi_rollover_count) << 23 | (iepcount_hi + hi_sw); + ns = ns * cycle_time_ns + iepcount_lo; + + return ns; +} + +void emac_rx_timestamp(struct prueth_emac *emac, + struct sk_buff *skb, u32 *psdata) +{ + struct skb_shared_hwtstamps *ssh; + u64 ns; + + u32 hi_sw = readl(emac->prueth->shram.va + + TIMESYNC_FW_WC_COUNT_HI_SW_OFFSET_OFFSET); + ns = icssg_ts_to_ns(hi_sw, psdata[1], psdata[0], + IEP_DEFAULT_CYCLE_TIME_NS); + + ssh = skb_hwtstamps(skb); + memset(ssh, 0, sizeof(*ssh)); + ssh->hwtstamp = ns_to_ktime(ns); +} + +static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id) +{ + struct prueth_rx_chn *rx_chn = &emac->rx_chns; + u32 buf_dma_len, pkt_len, port_id = 0; + struct net_device *ndev = emac->ndev; + struct cppi5_host_desc_t *desc_rx; + struct sk_buff *skb, *new_skb; + dma_addr_t desc_dma, buf_dma; + void **swdata; + u32 *psdata; + int ret; + + ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma); + if (ret) { + if (ret != -ENODATA) + netdev_err(ndev, "rx pop: failed: %d\n", ret); + return ret; + } + + if (cppi5_desc_is_tdcm(desc_dma)) /* Teardown ? */ + return 0; + + desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); + + swdata = cppi5_hdesc_get_swdata(desc_rx); + skb = *swdata; + + psdata = cppi5_hdesc_get_psdata(desc_rx); + /* RX HW timestamp */ + if (emac->rx_ts_enabled) + emac_rx_timestamp(emac, skb, psdata); + + cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); + k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); + pkt_len = cppi5_hdesc_get_pktlen(desc_rx); + /* firmware adds 4 CRC bytes, strip them */ + pkt_len -= 4; + cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); + + dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); + + skb->dev = ndev; + new_skb = netdev_alloc_skb_ip_align(ndev, PRUETH_MAX_PKT_SIZE); + /* if allocation fails we drop the packet but push the + * descriptor back to the ring with old skb to prevent a stall + */ + if (!new_skb) { + ndev->stats.rx_dropped++; + new_skb = skb; + } else { + /* send the filled skb up the n/w stack */ + skb_put(skb, pkt_len); + skb->protocol = eth_type_trans(skb, ndev); + napi_gro_receive(&emac->napi_rx, skb); + ndev->stats.rx_bytes += pkt_len; + ndev->stats.rx_packets++; + } + + /* queue another RX DMA */ + ret = prueth_dma_rx_push(emac, new_skb, &emac->rx_chns); + if (WARN_ON(ret < 0)) { + dev_kfree_skb_any(new_skb); + ndev->stats.rx_errors++; + ndev->stats.rx_dropped++; + } + + return ret; +} + +static void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) +{ + struct prueth_rx_chn *rx_chn = data; + struct cppi5_host_desc_t *desc_rx; + struct sk_buff *skb; + dma_addr_t buf_dma; + u32 buf_dma_len; + void **swdata; + + desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); + swdata = cppi5_hdesc_get_swdata(desc_rx); + skb = *swdata; + cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); + k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); + + dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, + DMA_FROM_DEVICE); + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); + + dev_kfree_skb_any(skb); +} + +static int prueth_tx_ts_cookie_get(struct prueth_emac *emac) +{ + int i; + + /* search and get the next free slot */ + for (i = 0; i < PRUETH_MAX_TX_TS_REQUESTS; i++) { + if (!emac->tx_ts_skb[i]) { + emac->tx_ts_skb[i] = ERR_PTR(-EBUSY); /* reserve slot */ + return i; + } + } + + return -EBUSY; +} + +/** + * emac_ndo_start_xmit - EMAC Transmit function + * @skb: SKB pointer + * @ndev: EMAC network adapter + * + * Called by the system to transmit a packet - we queue the packet in + * EMAC hardware transmit queue + * Doesn't wait for completion we'll check for TX completion in + * emac_tx_complete_packets(). + * + * Return: enum netdev_tx + */ +enum netdev_tx emac_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct cppi5_host_desc_t *first_desc, *next_desc, *cur_desc; + struct prueth_emac *emac = netdev_priv(ndev); + struct netdev_queue *netif_txq; + struct prueth_tx_chn *tx_chn; + dma_addr_t desc_dma, buf_dma; + int i, ret = 0, q_idx; + bool in_tx_ts = 0; + int tx_ts_cookie; + void **swdata; + u32 pkt_len; + u32 *epib; + + pkt_len = skb_headlen(skb); + q_idx = skb_get_queue_mapping(skb); + + tx_chn = &emac->tx_chns[q_idx]; + netif_txq = netdev_get_tx_queue(ndev, q_idx); + + /* Map the linear buffer */ + buf_dma = dma_map_single(tx_chn->dma_dev, skb->data, pkt_len, DMA_TO_DEVICE); + if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) { + netdev_err(ndev, "tx: failed to map skb buffer\n"); + ret = NETDEV_TX_OK; + goto drop_free_skb; + } + + first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); + if (!first_desc) { + netdev_dbg(ndev, "tx: failed to allocate descriptor\n"); + dma_unmap_single(tx_chn->dma_dev, buf_dma, pkt_len, DMA_TO_DEVICE); + goto drop_stop_q_busy; + } + + cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT, + PRUETH_NAV_PS_DATA_SIZE); + cppi5_hdesc_set_pkttype(first_desc, 0); + epib = first_desc->epib; + epib[0] = 0; + epib[1] = 0; + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + emac->tx_ts_enabled) { + tx_ts_cookie = prueth_tx_ts_cookie_get(emac); + if (tx_ts_cookie >= 0) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + /* Request TX timestamp */ + epib[0] = (u32)tx_ts_cookie; + epib[1] = 0x80000000; /* TX TS request */ + emac->tx_ts_skb[tx_ts_cookie] = skb_get(skb); + in_tx_ts = 1; + } + } + + /* set dst tag to indicate internal qid at the firmware which is at + * bit8..bit15. bit0..bit7 indicates port num for directed + * packets in case of switch mode operation + */ + cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8))); + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); + cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len); + swdata = cppi5_hdesc_get_swdata(first_desc); + *swdata = skb; + + /* Handle the case where skb is fragmented in pages */ + cur_desc = first_desc; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + u32 frag_size = skb_frag_size(frag); + + next_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); + if (!next_desc) { + netdev_err(ndev, + "tx: failed to allocate frag. descriptor\n"); + goto free_desc_stop_q_busy_cleanup_tx_ts; + } + + buf_dma = skb_frag_dma_map(tx_chn->dma_dev, frag, 0, frag_size, + DMA_TO_DEVICE); + if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) { + netdev_err(ndev, "tx: Failed to map skb page\n"); + k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); + ret = NETDEV_TX_OK; + goto cleanup_tx_ts; + } + + cppi5_hdesc_reset_hbdesc(next_desc); + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); + cppi5_hdesc_attach_buf(next_desc, + buf_dma, frag_size, buf_dma, frag_size); + + desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, + next_desc); + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &desc_dma); + cppi5_hdesc_link_hbdesc(cur_desc, desc_dma); + + pkt_len += frag_size; + cur_desc = next_desc; + } + WARN_ON_ONCE(pkt_len != skb->len); + + /* report bql before sending packet */ + netdev_tx_sent_queue(netif_txq, pkt_len); + + cppi5_hdesc_set_pktlen(first_desc, pkt_len); + desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc); + /* cppi5_desc_dump(first_desc, 64); */ + + skb_tx_timestamp(skb); /* SW timestamp if SKBTX_IN_PROGRESS not set */ + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); + if (ret) { + netdev_err(ndev, "tx: push failed: %d\n", ret); + goto drop_free_descs; + } + + if (in_tx_ts) + atomic_inc(&emac->tx_ts_pending); + + if (k3_cppi_desc_pool_avail(tx_chn->desc_pool) < MAX_SKB_FRAGS) { + netif_tx_stop_queue(netif_txq); + /* Barrier, so that stop_queue visible to other cpus */ + smp_mb__after_atomic(); + + if (k3_cppi_desc_pool_avail(tx_chn->desc_pool) >= + MAX_SKB_FRAGS) + netif_tx_wake_queue(netif_txq); + } + + return NETDEV_TX_OK; + +cleanup_tx_ts: + if (in_tx_ts) { + dev_kfree_skb_any(emac->tx_ts_skb[tx_ts_cookie]); + emac->tx_ts_skb[tx_ts_cookie] = NULL; + } + +drop_free_descs: + prueth_xmit_free(tx_chn, first_desc); + +drop_free_skb: + dev_kfree_skb_any(skb); + + /* error */ + ndev->stats.tx_dropped++; + netdev_err(ndev, "tx: error: %d\n", ret); + + return ret; + +free_desc_stop_q_busy_cleanup_tx_ts: + if (in_tx_ts) { + dev_kfree_skb_any(emac->tx_ts_skb[tx_ts_cookie]); + emac->tx_ts_skb[tx_ts_cookie] = NULL; + } + prueth_xmit_free(tx_chn, first_desc); + +drop_stop_q_busy: + netif_tx_stop_queue(netif_txq); + return NETDEV_TX_BUSY; +} + +static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) +{ + struct prueth_tx_chn *tx_chn = data; + struct cppi5_host_desc_t *desc_tx; + struct sk_buff *skb; + void **swdata; + + desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); + swdata = cppi5_hdesc_get_swdata(desc_tx); + skb = *(swdata); + prueth_xmit_free(tx_chn, desc_tx); + + dev_kfree_skb_any(skb); +} + +irqreturn_t prueth_rx_irq(int irq, void *dev_id) +{ + struct prueth_emac *emac = dev_id; + + disable_irq_nosync(irq); + napi_schedule(&emac->napi_rx); + + return IRQ_HANDLED; +} + +void prueth_emac_stop(struct prueth_emac *emac) +{ + struct prueth *prueth = emac->prueth; + int slice; + + switch (emac->port_id) { + case PRUETH_PORT_MII0: + slice = ICSS_SLICE0; + break; + case PRUETH_PORT_MII1: + slice = ICSS_SLICE1; + break; + default: + netdev_err(emac->ndev, "invalid port\n"); + return; + } + + emac->fw_running = 0; + rproc_shutdown(prueth->txpru[slice]); + rproc_shutdown(prueth->rtu[slice]); + rproc_shutdown(prueth->pru[slice]); +} + +void prueth_cleanup_tx_ts(struct prueth_emac *emac) +{ + int i; + + for (i = 0; i < PRUETH_MAX_TX_TS_REQUESTS; i++) { + if (emac->tx_ts_skb[i]) { + dev_kfree_skb_any(emac->tx_ts_skb[i]); + emac->tx_ts_skb[i] = NULL; + } + } +} + +int emac_napi_rx_poll(struct napi_struct *napi_rx, int budget) +{ + struct prueth_emac *emac = prueth_napi_to_emac(napi_rx); + int rx_flow = PRUETH_RX_FLOW_DATA; + int flow = PRUETH_MAX_RX_FLOWS; + int num_rx = 0; + int cur_budget; + int ret; + + while (flow--) { + cur_budget = budget - num_rx; + + while (cur_budget--) { + ret = emac_rx_packet(emac, flow); + if (ret) + break; + num_rx++; + } + + if (num_rx >= budget) + break; + } + + if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) + enable_irq(emac->rx_chns.irq[rx_flow]); + + return num_rx; +} + +int prueth_prepare_rx_chan(struct prueth_emac *emac, + struct prueth_rx_chn *chn, + int buf_size) +{ + struct sk_buff *skb; + int i, ret; + + for (i = 0; i < chn->descs_num; i++) { + skb = __netdev_alloc_skb_ip_align(NULL, buf_size, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + ret = prueth_dma_rx_push(emac, skb, chn); + if (ret < 0) { + netdev_err(emac->ndev, + "cannot submit skb for rx chan %s ret %d\n", + chn->name, ret); + kfree_skb(skb); + return ret; + } + } + + return 0; +} + +void prueth_reset_tx_chan(struct prueth_emac *emac, int ch_num, + bool free_skb) +{ + int i; + + for (i = 0; i < ch_num; i++) { + if (free_skb) + k3_udma_glue_reset_tx_chn(emac->tx_chns[i].tx_chn, + &emac->tx_chns[i], + prueth_tx_cleanup); + k3_udma_glue_disable_tx_chn(emac->tx_chns[i].tx_chn); + } +} + +void prueth_reset_rx_chan(struct prueth_rx_chn *chn, + int num_flows, bool disable) +{ + int i; + + for (i = 0; i < num_flows; i++) + k3_udma_glue_reset_rx_chn(chn->rx_chn, i, chn, + prueth_rx_cleanup, !!i); + if (disable) + k3_udma_glue_disable_rx_chn(chn->rx_chn); +} + +void emac_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) +{ + ndev->stats.tx_errors++; +} + +static int emac_set_ts_config(struct net_device *ndev, struct ifreq *ifr) +{ + struct prueth_emac *emac = netdev_priv(ndev); + struct hwtstamp_config config; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + emac->tx_ts_enabled = 0; + break; + case HWTSTAMP_TX_ON: + emac->tx_ts_enabled = 1; + break; + default: + return -ERANGE; + } + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + emac->rx_ts_enabled = 0; + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + emac->rx_ts_enabled = 1; + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +static int emac_get_ts_config(struct net_device *ndev, struct ifreq *ifr) +{ + struct prueth_emac *emac = netdev_priv(ndev); + struct hwtstamp_config config; + + config.flags = 0; + config.tx_type = emac->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + config.rx_filter = emac->rx_ts_enabled ? HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE; + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +int emac_ndo_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCGHWTSTAMP: + return emac_get_ts_config(ndev, ifr); + case SIOCSHWTSTAMP: + return emac_set_ts_config(ndev, ifr); + default: + break; + } + + return phy_do_ioctl(ndev, ifr, cmd); +} + +void emac_ndo_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *stats) +{ + struct prueth_emac *emac = netdev_priv(ndev); + + emac_update_hardware_stats(emac); + + stats->rx_packets = emac_get_stat_by_name(emac, "rx_packets"); + stats->rx_bytes = emac_get_stat_by_name(emac, "rx_bytes"); + stats->tx_packets = emac_get_stat_by_name(emac, "tx_packets"); + stats->tx_bytes = emac_get_stat_by_name(emac, "tx_bytes"); + stats->rx_crc_errors = emac_get_stat_by_name(emac, "rx_crc_errors"); + stats->rx_over_errors = emac_get_stat_by_name(emac, "rx_over_errors"); + stats->multicast = emac_get_stat_by_name(emac, "rx_multicast_frames"); + + stats->rx_errors = ndev->stats.rx_errors; + stats->rx_dropped = ndev->stats.rx_dropped; + stats->tx_errors = ndev->stats.tx_errors; + stats->tx_dropped = ndev->stats.tx_dropped; +} + +int emac_ndo_get_phys_port_name(struct net_device *ndev, char *name, + size_t len) +{ + struct prueth_emac *emac = netdev_priv(ndev); + int ret; + + ret = snprintf(name, len, "p%d", emac->port_id); + if (ret >= len) + return -EINVAL; + + return 0; +} + +/* get emac_port corresponding to eth_node name */ +int prueth_node_port(struct device_node *eth_node) +{ + u32 port_id; + int ret; + + ret = of_property_read_u32(eth_node, "reg", &port_id); + if (ret) + return ret; + + if (port_id == 0) + return PRUETH_PORT_MII0; + else if (port_id == 1) + return PRUETH_PORT_MII1; + else + return PRUETH_PORT_INVALID; +} + +/* get MAC instance corresponding to eth_node name */ +int prueth_node_mac(struct device_node *eth_node) +{ + u32 port_id; + int ret; + + ret = of_property_read_u32(eth_node, "reg", &port_id); + if (ret) + return ret; + + if (port_id == 0) + return PRUETH_MAC0; + else if (port_id == 1) + return PRUETH_MAC1; + else + return PRUETH_MAC_INVALID; +} + +void prueth_netdev_exit(struct prueth *prueth, + struct device_node *eth_node) +{ + struct prueth_emac *emac; + enum prueth_mac mac; + + mac = prueth_node_mac(eth_node); + if (mac == PRUETH_MAC_INVALID) + return; + + emac = prueth->emac[mac]; + if (!emac) + return; + + if (of_phy_is_fixed_link(emac->phy_node)) + of_phy_deregister_fixed_link(emac->phy_node); + + netif_napi_del(&emac->napi_rx); + + pruss_release_mem_region(prueth->pruss, &emac->dram); + destroy_workqueue(emac->cmd_wq); + free_netdev(emac->ndev); + prueth->emac[mac] = NULL; +} + +int prueth_get_cores(struct prueth *prueth, int slice) +{ + struct device *dev = prueth->dev; + enum pruss_pru_id pruss_id; + struct device_node *np; + int idx = -1, ret; + + np = dev->of_node; + + switch (slice) { + case ICSS_SLICE0: + idx = 0; + break; + case ICSS_SLICE1: + idx = 3; + break; + default: + return -EINVAL; + } + + prueth->pru[slice] = pru_rproc_get(np, idx, &pruss_id); + if (IS_ERR(prueth->pru[slice])) { + ret = PTR_ERR(prueth->pru[slice]); + prueth->pru[slice] = NULL; + return dev_err_probe(dev, ret, "unable to get PRU%d\n", slice); + } + prueth->pru_id[slice] = pruss_id; + + idx++; + prueth->rtu[slice] = pru_rproc_get(np, idx, NULL); + if (IS_ERR(prueth->rtu[slice])) { + ret = PTR_ERR(prueth->rtu[slice]); + prueth->rtu[slice] = NULL; + return dev_err_probe(dev, ret, "unable to get RTU%d\n", slice); + } + + idx++; + prueth->txpru[slice] = pru_rproc_get(np, idx, NULL); + if (IS_ERR(prueth->txpru[slice])) { + ret = PTR_ERR(prueth->txpru[slice]); + prueth->txpru[slice] = NULL; + return dev_err_probe(dev, ret, "unable to get TX_PRU%d\n", slice); + } + + return 0; +} + +void prueth_put_cores(struct prueth *prueth, int slice) +{ + if (prueth->txpru[slice]) + pru_rproc_put(prueth->txpru[slice]); + + if (prueth->rtu[slice]) + pru_rproc_put(prueth->rtu[slice]); + + if (prueth->pru[slice]) + pru_rproc_put(prueth->pru[slice]); +} + +#ifdef CONFIG_PM_SLEEP +static int prueth_suspend(struct device *dev) +{ + struct prueth *prueth = dev_get_drvdata(dev); + struct net_device *ndev; + int i, ret; + + for (i = 0; i < PRUETH_NUM_MACS; i++) { + ndev = prueth->registered_netdevs[i]; + + if (!ndev) + continue; + + if (netif_running(ndev)) { + netif_device_detach(ndev); + ret = ndev->netdev_ops->ndo_stop(ndev); + if (ret < 0) { + netdev_err(ndev, "failed to stop: %d", ret); + return ret; + } + } + } + + return 0; +} + +static int prueth_resume(struct device *dev) +{ + struct prueth *prueth = dev_get_drvdata(dev); + struct net_device *ndev; + int i, ret; + + for (i = 0; i < PRUETH_NUM_MACS; i++) { + ndev = prueth->registered_netdevs[i]; + + if (!ndev) + continue; + + if (netif_running(ndev)) { + ret = ndev->netdev_ops->ndo_open(ndev); + if (ret < 0) { + netdev_err(ndev, "failed to start: %d", ret); + return ret; + } + netif_device_attach(ndev); + } + } + + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +const struct dev_pm_ops prueth_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(prueth_suspend, prueth_resume) +}; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index cf7b73f8f450..e6eac01f9f99 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -34,568 +34,9 @@ #define PRUETH_MODULE_DESCRIPTION "PRUSS ICSSG Ethernet driver" -/* Netif debug messages possible */ -#define PRUETH_EMAC_DEBUG (NETIF_MSG_DRV | \ - NETIF_MSG_PROBE | \ - NETIF_MSG_LINK | \ - NETIF_MSG_TIMER | \ - NETIF_MSG_IFDOWN | \ - NETIF_MSG_IFUP | \ - NETIF_MSG_RX_ERR | \ - NETIF_MSG_TX_ERR | \ - NETIF_MSG_TX_QUEUED | \ - NETIF_MSG_INTR | \ - NETIF_MSG_TX_DONE | \ - NETIF_MSG_RX_STATUS | \ - NETIF_MSG_PKTDATA | \ - NETIF_MSG_HW | \ - NETIF_MSG_WOL) - -#define prueth_napi_to_emac(napi) container_of(napi, struct prueth_emac, napi_rx) - /* CTRLMMR_ICSSG_RGMII_CTRL register bits */ #define ICSSG_CTRL_RGMII_ID_MODE BIT(24) -#define IEP_DEFAULT_CYCLE_TIME_NS 1000000 /* 1 ms */ - -static void prueth_cleanup_rx_chns(struct prueth_emac *emac, - struct prueth_rx_chn *rx_chn, - int max_rflows) -{ - if (rx_chn->desc_pool) - k3_cppi_desc_pool_destroy(rx_chn->desc_pool); - - if (rx_chn->rx_chn) - k3_udma_glue_release_rx_chn(rx_chn->rx_chn); -} - -static void prueth_cleanup_tx_chns(struct prueth_emac *emac) -{ - int i; - - for (i = 0; i < emac->tx_ch_num; i++) { - struct prueth_tx_chn *tx_chn = &emac->tx_chns[i]; - - if (tx_chn->desc_pool) - k3_cppi_desc_pool_destroy(tx_chn->desc_pool); - - if (tx_chn->tx_chn) - k3_udma_glue_release_tx_chn(tx_chn->tx_chn); - - /* Assume prueth_cleanup_tx_chns() is called at the - * end after all channel resources are freed - */ - memset(tx_chn, 0, sizeof(*tx_chn)); - } -} - -static void prueth_ndev_del_tx_napi(struct prueth_emac *emac, int num) -{ - int i; - - for (i = 0; i < num; i++) { - struct prueth_tx_chn *tx_chn = &emac->tx_chns[i]; - - if (tx_chn->irq) - free_irq(tx_chn->irq, tx_chn); - netif_napi_del(&tx_chn->napi_tx); - } -} - -static void prueth_xmit_free(struct prueth_tx_chn *tx_chn, - struct cppi5_host_desc_t *desc) -{ - struct cppi5_host_desc_t *first_desc, *next_desc; - dma_addr_t buf_dma, next_desc_dma; - u32 buf_dma_len; - - first_desc = desc; - next_desc = first_desc; - - cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); - k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); - - dma_unmap_single(tx_chn->dma_dev, buf_dma, buf_dma_len, - DMA_TO_DEVICE); - - next_desc_dma = cppi5_hdesc_get_next_hbdesc(first_desc); - k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma); - while (next_desc_dma) { - next_desc = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, - next_desc_dma); - cppi5_hdesc_get_obuf(next_desc, &buf_dma, &buf_dma_len); - k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); - - dma_unmap_page(tx_chn->dma_dev, buf_dma, buf_dma_len, - DMA_TO_DEVICE); - - next_desc_dma = cppi5_hdesc_get_next_hbdesc(next_desc); - k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma); - - k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); - } - - k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc); -} - -static int emac_tx_complete_packets(struct prueth_emac *emac, int chn, - int budget) -{ - struct net_device *ndev = emac->ndev; - struct cppi5_host_desc_t *desc_tx; - struct netdev_queue *netif_txq; - struct prueth_tx_chn *tx_chn; - unsigned int total_bytes = 0; - struct sk_buff *skb; - dma_addr_t desc_dma; - int res, num_tx = 0; - void **swdata; - - tx_chn = &emac->tx_chns[chn]; - - while (true) { - res = k3_udma_glue_pop_tx_chn(tx_chn->tx_chn, &desc_dma); - if (res == -ENODATA) - break; - - /* teardown completion */ - if (cppi5_desc_is_tdcm(desc_dma)) { - if (atomic_dec_and_test(&emac->tdown_cnt)) - complete(&emac->tdown_complete); - break; - } - - desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, - desc_dma); - swdata = cppi5_hdesc_get_swdata(desc_tx); - - skb = *(swdata); - prueth_xmit_free(tx_chn, desc_tx); - - ndev = skb->dev; - ndev->stats.tx_packets++; - ndev->stats.tx_bytes += skb->len; - total_bytes += skb->len; - napi_consume_skb(skb, budget); - num_tx++; - } - - if (!num_tx) - return 0; - - netif_txq = netdev_get_tx_queue(ndev, chn); - netdev_tx_completed_queue(netif_txq, num_tx, total_bytes); - - if (netif_tx_queue_stopped(netif_txq)) { - /* If the TX queue was stopped, wake it now - * if we have enough room. - */ - __netif_tx_lock(netif_txq, smp_processor_id()); - if (netif_running(ndev) && - (k3_cppi_desc_pool_avail(tx_chn->desc_pool) >= - MAX_SKB_FRAGS)) - netif_tx_wake_queue(netif_txq); - __netif_tx_unlock(netif_txq); - } - - return num_tx; -} - -static int emac_napi_tx_poll(struct napi_struct *napi_tx, int budget) -{ - struct prueth_tx_chn *tx_chn = prueth_napi_to_tx_chn(napi_tx); - struct prueth_emac *emac = tx_chn->emac; - int num_tx_packets; - - num_tx_packets = emac_tx_complete_packets(emac, tx_chn->id, budget); - - if (num_tx_packets >= budget) - return budget; - - if (napi_complete_done(napi_tx, num_tx_packets)) - enable_irq(tx_chn->irq); - - return num_tx_packets; -} - -static irqreturn_t prueth_tx_irq(int irq, void *dev_id) -{ - struct prueth_tx_chn *tx_chn = dev_id; - - disable_irq_nosync(irq); - napi_schedule(&tx_chn->napi_tx); - - return IRQ_HANDLED; -} - -static int prueth_ndev_add_tx_napi(struct prueth_emac *emac) -{ - struct prueth *prueth = emac->prueth; - int i, ret; - - for (i = 0; i < emac->tx_ch_num; i++) { - struct prueth_tx_chn *tx_chn = &emac->tx_chns[i]; - - netif_napi_add_tx(emac->ndev, &tx_chn->napi_tx, emac_napi_tx_poll); - ret = request_irq(tx_chn->irq, prueth_tx_irq, - IRQF_TRIGGER_HIGH, tx_chn->name, - tx_chn); - if (ret) { - netif_napi_del(&tx_chn->napi_tx); - dev_err(prueth->dev, "unable to request TX IRQ %d\n", - tx_chn->irq); - goto fail; - } - } - - return 0; -fail: - prueth_ndev_del_tx_napi(emac, i); - return ret; -} - -static int prueth_init_tx_chns(struct prueth_emac *emac) -{ - static const struct k3_ring_cfg ring_cfg = { - .elm_size = K3_RINGACC_RING_ELSIZE_8, - .mode = K3_RINGACC_RING_MODE_RING, - .flags = 0, - .size = PRUETH_MAX_TX_DESC, - }; - struct k3_udma_glue_tx_channel_cfg tx_cfg; - struct device *dev = emac->prueth->dev; - struct net_device *ndev = emac->ndev; - int ret, slice, i; - u32 hdesc_size; - - slice = prueth_emac_slice(emac); - if (slice < 0) - return slice; - - init_completion(&emac->tdown_complete); - - hdesc_size = cppi5_hdesc_calc_size(true, PRUETH_NAV_PS_DATA_SIZE, - PRUETH_NAV_SW_DATA_SIZE); - memset(&tx_cfg, 0, sizeof(tx_cfg)); - tx_cfg.swdata_size = PRUETH_NAV_SW_DATA_SIZE; - tx_cfg.tx_cfg = ring_cfg; - tx_cfg.txcq_cfg = ring_cfg; - - for (i = 0; i < emac->tx_ch_num; i++) { - struct prueth_tx_chn *tx_chn = &emac->tx_chns[i]; - - /* To differentiate channels for SLICE0 vs SLICE1 */ - snprintf(tx_chn->name, sizeof(tx_chn->name), - "tx%d-%d", slice, i); - - tx_chn->emac = emac; - tx_chn->id = i; - tx_chn->descs_num = PRUETH_MAX_TX_DESC; - - tx_chn->tx_chn = - k3_udma_glue_request_tx_chn(dev, tx_chn->name, - &tx_cfg); - if (IS_ERR(tx_chn->tx_chn)) { - ret = PTR_ERR(tx_chn->tx_chn); - tx_chn->tx_chn = NULL; - netdev_err(ndev, - "Failed to request tx dma ch: %d\n", ret); - goto fail; - } - - tx_chn->dma_dev = k3_udma_glue_tx_get_dma_device(tx_chn->tx_chn); - tx_chn->desc_pool = - k3_cppi_desc_pool_create_name(tx_chn->dma_dev, - tx_chn->descs_num, - hdesc_size, - tx_chn->name); - if (IS_ERR(tx_chn->desc_pool)) { - ret = PTR_ERR(tx_chn->desc_pool); - tx_chn->desc_pool = NULL; - netdev_err(ndev, "Failed to create tx pool: %d\n", ret); - goto fail; - } - - ret = k3_udma_glue_tx_get_irq(tx_chn->tx_chn); - if (ret < 0) { - netdev_err(ndev, "failed to get tx irq\n"); - goto fail; - } - tx_chn->irq = ret; - - snprintf(tx_chn->name, sizeof(tx_chn->name), "%s-tx%d", - dev_name(dev), tx_chn->id); - } - - return 0; - -fail: - prueth_cleanup_tx_chns(emac); - return ret; -} - -static int prueth_init_rx_chns(struct prueth_emac *emac, - struct prueth_rx_chn *rx_chn, - char *name, u32 max_rflows, - u32 max_desc_num) -{ - struct k3_udma_glue_rx_channel_cfg rx_cfg; - struct device *dev = emac->prueth->dev; - struct net_device *ndev = emac->ndev; - u32 fdqring_id, hdesc_size; - int i, ret = 0, slice; - - slice = prueth_emac_slice(emac); - if (slice < 0) - return slice; - - /* To differentiate channels for SLICE0 vs SLICE1 */ - snprintf(rx_chn->name, sizeof(rx_chn->name), "%s%d", name, slice); - - hdesc_size = cppi5_hdesc_calc_size(true, PRUETH_NAV_PS_DATA_SIZE, - PRUETH_NAV_SW_DATA_SIZE); - memset(&rx_cfg, 0, sizeof(rx_cfg)); - rx_cfg.swdata_size = PRUETH_NAV_SW_DATA_SIZE; - rx_cfg.flow_id_num = max_rflows; - rx_cfg.flow_id_base = -1; /* udmax will auto select flow id base */ - - /* init all flows */ - rx_chn->dev = dev; - rx_chn->descs_num = max_desc_num; - - rx_chn->rx_chn = k3_udma_glue_request_rx_chn(dev, rx_chn->name, - &rx_cfg); - if (IS_ERR(rx_chn->rx_chn)) { - ret = PTR_ERR(rx_chn->rx_chn); - rx_chn->rx_chn = NULL; - netdev_err(ndev, "Failed to request rx dma ch: %d\n", ret); - goto fail; - } - - rx_chn->dma_dev = k3_udma_glue_rx_get_dma_device(rx_chn->rx_chn); - rx_chn->desc_pool = k3_cppi_desc_pool_create_name(rx_chn->dma_dev, - rx_chn->descs_num, - hdesc_size, - rx_chn->name); - if (IS_ERR(rx_chn->desc_pool)) { - ret = PTR_ERR(rx_chn->desc_pool); - rx_chn->desc_pool = NULL; - netdev_err(ndev, "Failed to create rx pool: %d\n", ret); - goto fail; - } - - emac->rx_flow_id_base = k3_udma_glue_rx_get_flow_id_base(rx_chn->rx_chn); - netdev_dbg(ndev, "flow id base = %d\n", emac->rx_flow_id_base); - - fdqring_id = K3_RINGACC_RING_ID_ANY; - for (i = 0; i < rx_cfg.flow_id_num; i++) { - struct k3_ring_cfg rxring_cfg = { - .elm_size = K3_RINGACC_RING_ELSIZE_8, - .mode = K3_RINGACC_RING_MODE_RING, - .flags = 0, - }; - struct k3_ring_cfg fdqring_cfg = { - .elm_size = K3_RINGACC_RING_ELSIZE_8, - .flags = K3_RINGACC_RING_SHARED, - }; - struct k3_udma_glue_rx_flow_cfg rx_flow_cfg = { - .rx_cfg = rxring_cfg, - .rxfdq_cfg = fdqring_cfg, - .ring_rxq_id = K3_RINGACC_RING_ID_ANY, - .src_tag_lo_sel = - K3_UDMA_GLUE_SRC_TAG_LO_USE_REMOTE_SRC_TAG, - }; - - rx_flow_cfg.ring_rxfdq0_id = fdqring_id; - rx_flow_cfg.rx_cfg.size = max_desc_num; - rx_flow_cfg.rxfdq_cfg.size = max_desc_num; - rx_flow_cfg.rxfdq_cfg.mode = emac->prueth->pdata.fdqring_mode; - - ret = k3_udma_glue_rx_flow_init(rx_chn->rx_chn, - i, &rx_flow_cfg); - if (ret) { - netdev_err(ndev, "Failed to init rx flow%d %d\n", - i, ret); - goto fail; - } - if (!i) - fdqring_id = k3_udma_glue_rx_flow_get_fdq_id(rx_chn->rx_chn, - i); - rx_chn->irq[i] = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i); - if (rx_chn->irq[i] <= 0) { - ret = rx_chn->irq[i]; - netdev_err(ndev, "Failed to get rx dma irq"); - goto fail; - } - } - - return 0; - -fail: - prueth_cleanup_rx_chns(emac, rx_chn, max_rflows); - return ret; -} - -static int prueth_dma_rx_push(struct prueth_emac *emac, - struct sk_buff *skb, - struct prueth_rx_chn *rx_chn) -{ - struct net_device *ndev = emac->ndev; - struct cppi5_host_desc_t *desc_rx; - u32 pkt_len = skb_tailroom(skb); - dma_addr_t desc_dma; - dma_addr_t buf_dma; - void **swdata; - - desc_rx = k3_cppi_desc_pool_alloc(rx_chn->desc_pool); - if (!desc_rx) { - netdev_err(ndev, "rx push: failed to allocate descriptor\n"); - return -ENOMEM; - } - desc_dma = k3_cppi_desc_pool_virt2dma(rx_chn->desc_pool, desc_rx); - - buf_dma = dma_map_single(rx_chn->dma_dev, skb->data, pkt_len, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(rx_chn->dma_dev, buf_dma))) { - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - netdev_err(ndev, "rx push: failed to map rx pkt buffer\n"); - return -EINVAL; - } - - cppi5_hdesc_init(desc_rx, CPPI5_INFO0_HDESC_EPIB_PRESENT, - PRUETH_NAV_PS_DATA_SIZE); - k3_udma_glue_rx_dma_to_cppi5_addr(rx_chn->rx_chn, &buf_dma); - cppi5_hdesc_attach_buf(desc_rx, buf_dma, skb_tailroom(skb), buf_dma, skb_tailroom(skb)); - - swdata = cppi5_hdesc_get_swdata(desc_rx); - *swdata = skb; - - return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, 0, - desc_rx, desc_dma); -} - -static u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns) -{ - u32 iepcount_lo, iepcount_hi, hi_rollover_count; - u64 ns; - - iepcount_lo = lo & GENMASK(19, 0); - iepcount_hi = (hi & GENMASK(11, 0)) << 12 | lo >> 20; - hi_rollover_count = hi >> 11; - - ns = ((u64)hi_rollover_count) << 23 | (iepcount_hi + hi_sw); - ns = ns * cycle_time_ns + iepcount_lo; - - return ns; -} - -static void emac_rx_timestamp(struct prueth_emac *emac, - struct sk_buff *skb, u32 *psdata) -{ - struct skb_shared_hwtstamps *ssh; - u64 ns; - - u32 hi_sw = readl(emac->prueth->shram.va + - TIMESYNC_FW_WC_COUNT_HI_SW_OFFSET_OFFSET); - ns = icssg_ts_to_ns(hi_sw, psdata[1], psdata[0], - IEP_DEFAULT_CYCLE_TIME_NS); - - ssh = skb_hwtstamps(skb); - memset(ssh, 0, sizeof(*ssh)); - ssh->hwtstamp = ns_to_ktime(ns); -} - -static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id) -{ - struct prueth_rx_chn *rx_chn = &emac->rx_chns; - u32 buf_dma_len, pkt_len, port_id = 0; - struct net_device *ndev = emac->ndev; - struct cppi5_host_desc_t *desc_rx; - struct sk_buff *skb, *new_skb; - dma_addr_t desc_dma, buf_dma; - void **swdata; - u32 *psdata; - int ret; - - ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma); - if (ret) { - if (ret != -ENODATA) - netdev_err(ndev, "rx pop: failed: %d\n", ret); - return ret; - } - - if (cppi5_desc_is_tdcm(desc_dma)) /* Teardown ? */ - return 0; - - desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); - - swdata = cppi5_hdesc_get_swdata(desc_rx); - skb = *swdata; - - psdata = cppi5_hdesc_get_psdata(desc_rx); - /* RX HW timestamp */ - if (emac->rx_ts_enabled) - emac_rx_timestamp(emac, skb, psdata); - - cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); - k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); - pkt_len = cppi5_hdesc_get_pktlen(desc_rx); - /* firmware adds 4 CRC bytes, strip them */ - pkt_len -= 4; - cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); - - dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - - skb->dev = ndev; - new_skb = netdev_alloc_skb_ip_align(ndev, PRUETH_MAX_PKT_SIZE); - /* if allocation fails we drop the packet but push the - * descriptor back to the ring with old skb to prevent a stall - */ - if (!new_skb) { - ndev->stats.rx_dropped++; - new_skb = skb; - } else { - /* send the filled skb up the n/w stack */ - skb_put(skb, pkt_len); - skb->protocol = eth_type_trans(skb, ndev); - napi_gro_receive(&emac->napi_rx, skb); - ndev->stats.rx_bytes += pkt_len; - ndev->stats.rx_packets++; - } - - /* queue another RX DMA */ - ret = prueth_dma_rx_push(emac, new_skb, &emac->rx_chns); - if (WARN_ON(ret < 0)) { - dev_kfree_skb_any(new_skb); - ndev->stats.rx_errors++; - ndev->stats.rx_dropped++; - } - - return ret; -} - -static void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) -{ - struct prueth_rx_chn *rx_chn = data; - struct cppi5_host_desc_t *desc_rx; - struct sk_buff *skb; - dma_addr_t buf_dma; - u32 buf_dma_len; - void **swdata; - - desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); - swdata = cppi5_hdesc_get_swdata(desc_rx); - skb = *swdata; - cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); - k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); - - dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, - DMA_FROM_DEVICE); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - - dev_kfree_skb_any(skb); -} - static int emac_get_tx_ts(struct prueth_emac *emac, struct emac_tx_ts_response *rsp) { @@ -661,208 +102,6 @@ static void tx_ts_work(struct prueth_emac *emac) } } -static int prueth_tx_ts_cookie_get(struct prueth_emac *emac) -{ - int i; - - /* search and get the next free slot */ - for (i = 0; i < PRUETH_MAX_TX_TS_REQUESTS; i++) { - if (!emac->tx_ts_skb[i]) { - emac->tx_ts_skb[i] = ERR_PTR(-EBUSY); /* reserve slot */ - return i; - } - } - - return -EBUSY; -} - -/** - * emac_ndo_start_xmit - EMAC Transmit function - * @skb: SKB pointer - * @ndev: EMAC network adapter - * - * Called by the system to transmit a packet - we queue the packet in - * EMAC hardware transmit queue - * Doesn't wait for completion we'll check for TX completion in - * emac_tx_complete_packets(). - * - * Return: enum netdev_tx - */ -static enum netdev_tx emac_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev) -{ - struct cppi5_host_desc_t *first_desc, *next_desc, *cur_desc; - struct prueth_emac *emac = netdev_priv(ndev); - struct netdev_queue *netif_txq; - struct prueth_tx_chn *tx_chn; - dma_addr_t desc_dma, buf_dma; - int i, ret = 0, q_idx; - bool in_tx_ts = 0; - int tx_ts_cookie; - void **swdata; - u32 pkt_len; - u32 *epib; - - pkt_len = skb_headlen(skb); - q_idx = skb_get_queue_mapping(skb); - - tx_chn = &emac->tx_chns[q_idx]; - netif_txq = netdev_get_tx_queue(ndev, q_idx); - - /* Map the linear buffer */ - buf_dma = dma_map_single(tx_chn->dma_dev, skb->data, pkt_len, DMA_TO_DEVICE); - if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) { - netdev_err(ndev, "tx: failed to map skb buffer\n"); - ret = NETDEV_TX_OK; - goto drop_free_skb; - } - - first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); - if (!first_desc) { - netdev_dbg(ndev, "tx: failed to allocate descriptor\n"); - dma_unmap_single(tx_chn->dma_dev, buf_dma, pkt_len, DMA_TO_DEVICE); - goto drop_stop_q_busy; - } - - cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT, - PRUETH_NAV_PS_DATA_SIZE); - cppi5_hdesc_set_pkttype(first_desc, 0); - epib = first_desc->epib; - epib[0] = 0; - epib[1] = 0; - if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - emac->tx_ts_enabled) { - tx_ts_cookie = prueth_tx_ts_cookie_get(emac); - if (tx_ts_cookie >= 0) { - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - /* Request TX timestamp */ - epib[0] = (u32)tx_ts_cookie; - epib[1] = 0x80000000; /* TX TS request */ - emac->tx_ts_skb[tx_ts_cookie] = skb_get(skb); - in_tx_ts = 1; - } - } - - /* set dst tag to indicate internal qid at the firmware which is at - * bit8..bit15. bit0..bit7 indicates port num for directed - * packets in case of switch mode operation - */ - cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8))); - k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); - cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len); - swdata = cppi5_hdesc_get_swdata(first_desc); - *swdata = skb; - - /* Handle the case where skb is fragmented in pages */ - cur_desc = first_desc; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - u32 frag_size = skb_frag_size(frag); - - next_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); - if (!next_desc) { - netdev_err(ndev, - "tx: failed to allocate frag. descriptor\n"); - goto free_desc_stop_q_busy_cleanup_tx_ts; - } - - buf_dma = skb_frag_dma_map(tx_chn->dma_dev, frag, 0, frag_size, - DMA_TO_DEVICE); - if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) { - netdev_err(ndev, "tx: Failed to map skb page\n"); - k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); - ret = NETDEV_TX_OK; - goto cleanup_tx_ts; - } - - cppi5_hdesc_reset_hbdesc(next_desc); - k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); - cppi5_hdesc_attach_buf(next_desc, - buf_dma, frag_size, buf_dma, frag_size); - - desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, - next_desc); - k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &desc_dma); - cppi5_hdesc_link_hbdesc(cur_desc, desc_dma); - - pkt_len += frag_size; - cur_desc = next_desc; - } - WARN_ON_ONCE(pkt_len != skb->len); - - /* report bql before sending packet */ - netdev_tx_sent_queue(netif_txq, pkt_len); - - cppi5_hdesc_set_pktlen(first_desc, pkt_len); - desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc); - /* cppi5_desc_dump(first_desc, 64); */ - - skb_tx_timestamp(skb); /* SW timestamp if SKBTX_IN_PROGRESS not set */ - ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); - if (ret) { - netdev_err(ndev, "tx: push failed: %d\n", ret); - goto drop_free_descs; - } - - if (in_tx_ts) - atomic_inc(&emac->tx_ts_pending); - - if (k3_cppi_desc_pool_avail(tx_chn->desc_pool) < MAX_SKB_FRAGS) { - netif_tx_stop_queue(netif_txq); - /* Barrier, so that stop_queue visible to other cpus */ - smp_mb__after_atomic(); - - if (k3_cppi_desc_pool_avail(tx_chn->desc_pool) >= - MAX_SKB_FRAGS) - netif_tx_wake_queue(netif_txq); - } - - return NETDEV_TX_OK; - -cleanup_tx_ts: - if (in_tx_ts) { - dev_kfree_skb_any(emac->tx_ts_skb[tx_ts_cookie]); - emac->tx_ts_skb[tx_ts_cookie] = NULL; - } - -drop_free_descs: - prueth_xmit_free(tx_chn, first_desc); - -drop_free_skb: - dev_kfree_skb_any(skb); - - /* error */ - ndev->stats.tx_dropped++; - netdev_err(ndev, "tx: error: %d\n", ret); - - return ret; - -free_desc_stop_q_busy_cleanup_tx_ts: - if (in_tx_ts) { - dev_kfree_skb_any(emac->tx_ts_skb[tx_ts_cookie]); - emac->tx_ts_skb[tx_ts_cookie] = NULL; - } - prueth_xmit_free(tx_chn, first_desc); - -drop_stop_q_busy: - netif_tx_stop_queue(netif_txq); - return NETDEV_TX_BUSY; -} - -static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) -{ - struct prueth_tx_chn *tx_chn = data; - struct cppi5_host_desc_t *desc_tx; - struct sk_buff *skb; - void **swdata; - - desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); - swdata = cppi5_hdesc_get_swdata(desc_tx); - skb = *(swdata); - prueth_xmit_free(tx_chn, desc_tx); - - dev_kfree_skb_any(skb); -} - static irqreturn_t prueth_tx_ts_irq(int irq, void *dev_id) { struct prueth_emac *emac = dev_id; @@ -873,22 +112,6 @@ static irqreturn_t prueth_tx_ts_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static irqreturn_t prueth_rx_irq(int irq, void *dev_id) -{ - struct prueth_emac *emac = dev_id; - - disable_irq_nosync(irq); - napi_schedule(&emac->napi_rx); - - return IRQ_HANDLED; -} - -struct icssg_firmwares { - char *pru; - char *rtu; - char *txpru; -}; - static struct icssg_firmwares icssg_emac_firmwares[] = { { .pru = "ti-pruss/am65x-sr2-pru0-prueth-fw.elf", @@ -953,41 +176,6 @@ halt_pru: return ret; } -static void prueth_emac_stop(struct prueth_emac *emac) -{ - struct prueth *prueth = emac->prueth; - int slice; - - switch (emac->port_id) { - case PRUETH_PORT_MII0: - slice = ICSS_SLICE0; - break; - case PRUETH_PORT_MII1: - slice = ICSS_SLICE1; - break; - default: - netdev_err(emac->ndev, "invalid port\n"); - return; - } - - emac->fw_running = 0; - rproc_shutdown(prueth->txpru[slice]); - rproc_shutdown(prueth->rtu[slice]); - rproc_shutdown(prueth->pru[slice]); -} - -static void prueth_cleanup_tx_ts(struct prueth_emac *emac) -{ - int i; - - for (i = 0; i < PRUETH_MAX_TX_TS_REQUESTS; i++) { - if (emac->tx_ts_skb[i]) { - dev_kfree_skb_any(emac->tx_ts_skb[i]); - emac->tx_ts_skb[i] = NULL; - } - } -} - /* called back by PHY layer if there is change in link state of hw port*/ static void emac_adjust_link(struct net_device *ndev) { @@ -1055,86 +243,6 @@ static void emac_adjust_link(struct net_device *ndev) } } -static int emac_napi_rx_poll(struct napi_struct *napi_rx, int budget) -{ - struct prueth_emac *emac = prueth_napi_to_emac(napi_rx); - int rx_flow = PRUETH_RX_FLOW_DATA; - int flow = PRUETH_MAX_RX_FLOWS; - int num_rx = 0; - int cur_budget; - int ret; - - while (flow--) { - cur_budget = budget - num_rx; - - while (cur_budget--) { - ret = emac_rx_packet(emac, flow); - if (ret) - break; - num_rx++; - } - - if (num_rx >= budget) - break; - } - - if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) - enable_irq(emac->rx_chns.irq[rx_flow]); - - return num_rx; -} - -static int prueth_prepare_rx_chan(struct prueth_emac *emac, - struct prueth_rx_chn *chn, - int buf_size) -{ - struct sk_buff *skb; - int i, ret; - - for (i = 0; i < chn->descs_num; i++) { - skb = __netdev_alloc_skb_ip_align(NULL, buf_size, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - ret = prueth_dma_rx_push(emac, skb, chn); - if (ret < 0) { - netdev_err(emac->ndev, - "cannot submit skb for rx chan %s ret %d\n", - chn->name, ret); - kfree_skb(skb); - return ret; - } - } - - return 0; -} - -static void prueth_reset_tx_chan(struct prueth_emac *emac, int ch_num, - bool free_skb) -{ - int i; - - for (i = 0; i < ch_num; i++) { - if (free_skb) - k3_udma_glue_reset_tx_chn(emac->tx_chns[i].tx_chn, - &emac->tx_chns[i], - prueth_tx_cleanup); - k3_udma_glue_disable_tx_chn(emac->tx_chns[i].tx_chn); - } -} - -static void prueth_reset_rx_chan(struct prueth_rx_chn *chn, - int num_flows, bool disable) -{ - int i; - - for (i = 0; i < num_flows; i++) - k3_udma_glue_reset_rx_chn(chn->rx_chn, i, chn, - prueth_rx_cleanup, !!i); - if (disable) - k3_udma_glue_disable_rx_chn(chn->rx_chn); -} - static int emac_phy_connect(struct prueth_emac *emac) { struct prueth *prueth = emac->prueth; @@ -1508,11 +616,6 @@ static int emac_ndo_stop(struct net_device *ndev) return 0; } -static void emac_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) -{ - ndev->stats.tx_errors++; -} - static void emac_ndo_set_rx_mode_work(struct work_struct *work) { struct prueth_emac *emac = container_of(work, struct prueth_emac, rx_mode_work); @@ -1558,116 +661,6 @@ static void emac_ndo_set_rx_mode(struct net_device *ndev) queue_work(emac->cmd_wq, &emac->rx_mode_work); } -static int emac_set_ts_config(struct net_device *ndev, struct ifreq *ifr) -{ - struct prueth_emac *emac = netdev_priv(ndev); - struct hwtstamp_config config; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - emac->tx_ts_enabled = 0; - break; - case HWTSTAMP_TX_ON: - emac->tx_ts_enabled = 1; - break; - default: - return -ERANGE; - } - - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - emac->rx_ts_enabled = 0; - break; - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_SOME: - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - case HWTSTAMP_FILTER_NTP_ALL: - emac->rx_ts_enabled = 1; - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - default: - return -ERANGE; - } - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -static int emac_get_ts_config(struct net_device *ndev, struct ifreq *ifr) -{ - struct prueth_emac *emac = netdev_priv(ndev); - struct hwtstamp_config config; - - config.flags = 0; - config.tx_type = emac->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - config.rx_filter = emac->rx_ts_enabled ? HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE; - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -static int emac_ndo_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd) -{ - switch (cmd) { - case SIOCGHWTSTAMP: - return emac_get_ts_config(ndev, ifr); - case SIOCSHWTSTAMP: - return emac_set_ts_config(ndev, ifr); - default: - break; - } - - return phy_do_ioctl(ndev, ifr, cmd); -} - -static void emac_ndo_get_stats64(struct net_device *ndev, - struct rtnl_link_stats64 *stats) -{ - struct prueth_emac *emac = netdev_priv(ndev); - - emac_update_hardware_stats(emac); - - stats->rx_packets = emac_get_stat_by_name(emac, "rx_packets"); - stats->rx_bytes = emac_get_stat_by_name(emac, "rx_bytes"); - stats->tx_packets = emac_get_stat_by_name(emac, "tx_packets"); - stats->tx_bytes = emac_get_stat_by_name(emac, "tx_bytes"); - stats->rx_crc_errors = emac_get_stat_by_name(emac, "rx_crc_errors"); - stats->rx_over_errors = emac_get_stat_by_name(emac, "rx_over_errors"); - stats->multicast = emac_get_stat_by_name(emac, "rx_multicast_frames"); - - stats->rx_errors = ndev->stats.rx_errors; - stats->rx_dropped = ndev->stats.rx_dropped; - stats->tx_errors = ndev->stats.tx_errors; - stats->tx_dropped = ndev->stats.tx_dropped; -} - -static int emac_ndo_get_phys_port_name(struct net_device *ndev, char *name, - size_t len) -{ - struct prueth_emac *emac = netdev_priv(ndev); - int ret; - - ret = snprintf(name, len, "p%d", emac->port_id); - if (ret >= len) - return -EINVAL; - - return 0; -} - static const struct net_device_ops emac_netdev_ops = { .ndo_open = emac_ndo_open, .ndo_stop = emac_ndo_stop, @@ -1681,42 +674,6 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_get_phys_port_name = emac_ndo_get_phys_port_name, }; -/* get emac_port corresponding to eth_node name */ -static int prueth_node_port(struct device_node *eth_node) -{ - u32 port_id; - int ret; - - ret = of_property_read_u32(eth_node, "reg", &port_id); - if (ret) - return ret; - - if (port_id == 0) - return PRUETH_PORT_MII0; - else if (port_id == 1) - return PRUETH_PORT_MII1; - else - return PRUETH_PORT_INVALID; -} - -/* get MAC instance corresponding to eth_node name */ -static int prueth_node_mac(struct device_node *eth_node) -{ - u32 port_id; - int ret; - - ret = of_property_read_u32(eth_node, "reg", &port_id); - if (ret) - return ret; - - if (port_id == 0) - return PRUETH_MAC0; - else if (port_id == 1) - return PRUETH_MAC1; - else - return PRUETH_MAC_INVALID; -} - static int prueth_netdev_init(struct prueth *prueth, struct device_node *eth_node) { @@ -1860,90 +817,6 @@ free_ndev: return ret; } -static void prueth_netdev_exit(struct prueth *prueth, - struct device_node *eth_node) -{ - struct prueth_emac *emac; - enum prueth_mac mac; - - mac = prueth_node_mac(eth_node); - if (mac == PRUETH_MAC_INVALID) - return; - - emac = prueth->emac[mac]; - if (!emac) - return; - - if (of_phy_is_fixed_link(emac->phy_node)) - of_phy_deregister_fixed_link(emac->phy_node); - - netif_napi_del(&emac->napi_rx); - - pruss_release_mem_region(prueth->pruss, &emac->dram); - destroy_workqueue(emac->cmd_wq); - free_netdev(emac->ndev); - prueth->emac[mac] = NULL; -} - -static int prueth_get_cores(struct prueth *prueth, int slice) -{ - struct device *dev = prueth->dev; - enum pruss_pru_id pruss_id; - struct device_node *np; - int idx = -1, ret; - - np = dev->of_node; - - switch (slice) { - case ICSS_SLICE0: - idx = 0; - break; - case ICSS_SLICE1: - idx = 3; - break; - default: - return -EINVAL; - } - - prueth->pru[slice] = pru_rproc_get(np, idx, &pruss_id); - if (IS_ERR(prueth->pru[slice])) { - ret = PTR_ERR(prueth->pru[slice]); - prueth->pru[slice] = NULL; - return dev_err_probe(dev, ret, "unable to get PRU%d\n", slice); - } - prueth->pru_id[slice] = pruss_id; - - idx++; - prueth->rtu[slice] = pru_rproc_get(np, idx, NULL); - if (IS_ERR(prueth->rtu[slice])) { - ret = PTR_ERR(prueth->rtu[slice]); - prueth->rtu[slice] = NULL; - return dev_err_probe(dev, ret, "unable to get RTU%d\n", slice); - } - - idx++; - prueth->txpru[slice] = pru_rproc_get(np, idx, NULL); - if (IS_ERR(prueth->txpru[slice])) { - ret = PTR_ERR(prueth->txpru[slice]); - prueth->txpru[slice] = NULL; - return dev_err_probe(dev, ret, "unable to get TX_PRU%d\n", slice); - } - - return 0; -} - -static void prueth_put_cores(struct prueth *prueth, int slice) -{ - if (prueth->txpru[slice]) - pru_rproc_put(prueth->txpru[slice]); - - if (prueth->rtu[slice]) - pru_rproc_put(prueth->rtu[slice]); - - if (prueth->pru[slice]) - pru_rproc_put(prueth->pru[slice]); -} - static int prueth_probe(struct platform_device *pdev) { struct device_node *eth_node, *eth_ports_node; @@ -2273,62 +1146,6 @@ static void prueth_remove(struct platform_device *pdev) prueth_put_cores(prueth, ICSS_SLICE0); } -#ifdef CONFIG_PM_SLEEP -static int prueth_suspend(struct device *dev) -{ - struct prueth *prueth = dev_get_drvdata(dev); - struct net_device *ndev; - int i, ret; - - for (i = 0; i < PRUETH_NUM_MACS; i++) { - ndev = prueth->registered_netdevs[i]; - - if (!ndev) - continue; - - if (netif_running(ndev)) { - netif_device_detach(ndev); - ret = emac_ndo_stop(ndev); - if (ret < 0) { - netdev_err(ndev, "failed to stop: %d", ret); - return ret; - } - } - } - - return 0; -} - -static int prueth_resume(struct device *dev) -{ - struct prueth *prueth = dev_get_drvdata(dev); - struct net_device *ndev; - int i, ret; - - for (i = 0; i < PRUETH_NUM_MACS; i++) { - ndev = prueth->registered_netdevs[i]; - - if (!ndev) - continue; - - if (netif_running(ndev)) { - ret = emac_ndo_open(ndev); - if (ret < 0) { - netdev_err(ndev, "failed to start: %d", ret); - return ret; - } - netif_device_attach(ndev); - } - } - - return 0; -} -#endif /* CONFIG_PM_SLEEP */ - -static const struct dev_pm_ops prueth_dev_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(prueth_suspend, prueth_resume) -}; - static const struct prueth_pdata am654_icssg_pdata = { .fdqring_mode = K3_RINGACC_RING_MODE_MESSAGE, .quirk_10m_link_issue = 1, diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 8b6d6b497010..5d792e9bade0 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -55,6 +55,8 @@ #define ICSSG_NUM_STANDARD_STATS 31 #define ICSSG_NUM_ETHTOOL_STATS (ICSSG_NUM_STATS - ICSSG_NUM_STANDARD_STATS) +#define IEP_DEFAULT_CYCLE_TIME_NS 1000000 /* 1 ms */ + /* Firmware status codes */ #define ICSS_HS_FW_READY 0x55555555 #define ICSS_HS_FW_DEAD 0xDEAD0000 /* lower 16 bits contain error code */ @@ -188,6 +190,12 @@ struct prueth_pdata { u32 quirk_10m_link_issue:1; }; +struct icssg_firmwares { + char *pru; + char *rtu; + char *txpru; +}; + /** * struct prueth - PRUeth structure * @dev: device @@ -257,6 +265,7 @@ static inline int prueth_emac_slice(struct prueth_emac *emac) } extern const struct ethtool_ops icssg_ethtool_ops; +extern const struct dev_pm_ops prueth_dev_pm_ops; /* Classifier helpers */ void icssg_class_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac); @@ -285,4 +294,54 @@ u32 icssg_queue_level(struct prueth *prueth, int queue); void emac_stats_work_handler(struct work_struct *work); void emac_update_hardware_stats(struct prueth_emac *emac); int emac_get_stat_by_name(struct prueth_emac *emac, char *stat_name); + +/* Common functions */ +void prueth_cleanup_rx_chns(struct prueth_emac *emac, + struct prueth_rx_chn *rx_chn, + int max_rflows); +void prueth_cleanup_tx_chns(struct prueth_emac *emac); +void prueth_ndev_del_tx_napi(struct prueth_emac *emac, int num); +void prueth_xmit_free(struct prueth_tx_chn *tx_chn, + struct cppi5_host_desc_t *desc); +int emac_tx_complete_packets(struct prueth_emac *emac, int chn, + int budget); +int prueth_ndev_add_tx_napi(struct prueth_emac *emac); +int prueth_init_tx_chns(struct prueth_emac *emac); +int prueth_init_rx_chns(struct prueth_emac *emac, + struct prueth_rx_chn *rx_chn, + char *name, u32 max_rflows, + u32 max_desc_num); +int prueth_dma_rx_push(struct prueth_emac *emac, + struct sk_buff *skb, + struct prueth_rx_chn *rx_chn); +void emac_rx_timestamp(struct prueth_emac *emac, + struct sk_buff *skb, u32 *psdata); +enum netdev_tx emac_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev); +irqreturn_t prueth_rx_irq(int irq, void *dev_id); +void prueth_emac_stop(struct prueth_emac *emac); +void prueth_cleanup_tx_ts(struct prueth_emac *emac); +int emac_napi_rx_poll(struct napi_struct *napi_rx, int budget); +int prueth_prepare_rx_chan(struct prueth_emac *emac, + struct prueth_rx_chn *chn, + int buf_size); +void prueth_reset_tx_chan(struct prueth_emac *emac, int ch_num, + bool free_skb); +void prueth_reset_rx_chan(struct prueth_rx_chn *chn, + int num_flows, bool disable); +void emac_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue); +int emac_ndo_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd); +void emac_ndo_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *stats); +int emac_ndo_get_phys_port_name(struct net_device *ndev, char *name, + size_t len); +int prueth_node_port(struct device_node *eth_node); +int prueth_node_mac(struct device_node *eth_node); +void prueth_netdev_exit(struct prueth *prueth, + struct device_node *eth_node); +int prueth_get_cores(struct prueth *prueth, int slice); +void prueth_put_cores(struct prueth *prueth, int slice); + +/* Revision specific helper */ +u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns); + #endif /* __NET_TI_ICSSG_PRUETH_H */ -- cgit v1.2.3 From 6d6a5751cd8e4d78b2d9093c0ba3fbfed551e8cb Mon Sep 17 00:00:00 2001 From: Diogo Ivo Date: Wed, 3 Apr 2024 11:48:14 +0100 Subject: net: ti: icssg-prueth: Add SR1.0-specific configuration bits Define the firmware configuration structure and commands needed to communicate with SR1.0 firmware, as well as SR1.0 buffer information where it differs from SR2.0. Based on the work of Roger Quadros, Murali Karicheri and Grygorii Strashko in TI's 5.10 SDK [1]. [1]: https://git.ti.com/cgit/ti-linux-kernel/ti-linux-kernel/tree/?h=ti-linux-5.10.y Co-developed-by: Jan Kiszka Signed-off-by: Jan Kiszka Signed-off-by: Diogo Ivo Reviewed-by: Roger Quadros Reviewed-by: MD Danish Anwar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_config.h | 56 ++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.h b/drivers/net/ethernet/ti/icssg/icssg_config.h index 43eb0922172a..cf2ea4bd22a2 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_config.h +++ b/drivers/net/ethernet/ti/icssg/icssg_config.h @@ -109,6 +109,62 @@ enum icssg_port_state_cmd { #define ICSSG_FLAG_MASK 0xff00ffff +/* SR1.0-specific bits */ +#define PRUETH_MAX_RX_FLOWS_SR1 4 /* excluding default flow */ +#define PRUETH_RX_FLOW_DATA_SR1 3 /* highest priority flow */ +#define PRUETH_MAX_RX_MGM_DESC_SR1 8 +#define PRUETH_MAX_RX_MGM_FLOWS_SR1 2 /* excluding default flow */ +#define PRUETH_RX_MGM_FLOW_RESPONSE_SR1 0 +#define PRUETH_RX_MGM_FLOW_TIMESTAMP_SR1 1 + +#define PRUETH_NUM_BUF_POOLS_SR1 16 +#define PRUETH_EMAC_BUF_POOL_START_SR1 8 +#define PRUETH_EMAC_BUF_POOL_MIN_SIZE_SR1 128 +#define PRUETH_EMAC_BUF_SIZE_SR1 1536 +#define PRUETH_EMAC_NUM_BUF_SR1 4 +#define PRUETH_EMAC_BUF_POOL_SIZE_SR1 (PRUETH_EMAC_NUM_BUF_SR1 * \ + PRUETH_EMAC_BUF_SIZE_SR1) +#define MSMC_RAM_SIZE_SR1 (SZ_64K + SZ_32K + SZ_2K) /* 0x1880 x 8 x 2 */ + +struct icssg_sr1_config { + __le32 status; /* Firmware status */ + __le32 addr_lo; /* MSMC Buffer pool base address low. */ + __le32 addr_hi; /* MSMC Buffer pool base address high. Must be 0 */ + __le32 tx_buf_sz[16]; /* Array of buffer pool sizes */ + __le32 num_tx_threads; /* Number of active egress threads, 1 to 4 */ + __le32 tx_rate_lim_en; /* Bitmask: Egress rate limit en per thread */ + __le32 rx_flow_id; /* RX flow id for first rx ring */ + __le32 rx_mgr_flow_id; /* RX flow id for the first management ring */ + __le32 flags; /* TBD */ + __le32 n_burst; /* for debug */ + __le32 rtu_status; /* RTU status */ + __le32 info; /* reserved */ + __le32 reserve; + __le32 rand_seed; /* Used for the random number generation at fw */ +} __packed; + +/* SR1.0 shutdown command to stop processing at firmware. + * Command format: 0x8101ss00, where + * - ss: sequence number. Currently not used by driver. + */ +#define ICSSG_SHUTDOWN_CMD_SR1 0x81010000 + +/* SR1.0 pstate speed/duplex command to set speed and duplex settings + * in firmware. + * Command format: 0x8102ssPN, where + * - ss: sequence number. Currently not used by driver. + * - P: port number (for switch mode). + * - N: Speed/Duplex state: + * 0x0 - 10Mbps/Half duplex; + * 0x8 - 10Mbps/Full duplex; + * 0x2 - 100Mbps/Half duplex; + * 0xa - 100Mbps/Full duplex; + * 0xc - 1Gbps/Full duplex; + * NOTE: The above are the same value as bits [3..1](slice 0) + * or bits [7..5](slice 1) of RGMII CFG register. + */ +#define ICSSG_PSTATE_SPEED_DUPLEX_CMD_SR1 0x81020000 + struct icssg_setclock_desc { u8 request; u8 restore; -- cgit v1.2.3 From 8623dea207a7ec01ccb69bf14fd172da9be5a1dc Mon Sep 17 00:00:00 2001 From: Diogo Ivo Date: Wed, 3 Apr 2024 11:48:15 +0100 Subject: net: ti: icssg-prueth: Add SR1.0-specific description bits Add a field to distinguish between SR1.0 and SR2.0 in the driver as well as the necessary structures to program SR1.0. Based on the work of Roger Quadros in TI's 5.10 SDK [1]. [1]: https://git.ti.com/cgit/ti-linux-kernel/ti-linux-kernel/tree/?h=ti-linux-5.10.y Co-developed-by: Jan Kiszka Signed-off-by: Jan Kiszka Signed-off-by: Diogo Ivo Reviewed-by: Roger Quadros Reviewed-by: MD Danish Anwar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 5d792e9bade0..4632d83d4732 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -129,6 +129,7 @@ struct prueth_rx_chn { /* data for each emac port */ struct prueth_emac { + bool is_sr1; bool fw_running; struct prueth *prueth; struct net_device *ndev; @@ -157,6 +158,10 @@ struct prueth_emac { int rx_flow_id_base; int tx_ch_num; + /* SR1.0 Management channel */ + struct prueth_rx_chn rx_mgm_chn; + int rx_mgm_flow_id_base; + spinlock_t lock; /* serialize access */ /* TX HW Timestamping */ @@ -167,7 +172,7 @@ struct prueth_emac { u8 cmd_seq; /* shutdown related */ - u32 cmd_data[4]; + __le32 cmd_data[4]; struct completion cmd_complete; /* Mutex to serialize access to firmware command interface */ struct mutex cmd_lock; @@ -251,6 +256,13 @@ struct emac_tx_ts_response { u32 hi_ts; }; +struct emac_tx_ts_response_sr1 { + __le32 lo_ts; + __le32 hi_ts; + __le32 reserved; + __le32 cookie; +}; + /* get PRUSS SLICE number from prueth_emac */ static inline int prueth_emac_slice(struct prueth_emac *emac) { -- cgit v1.2.3 From 95c2e689331ee53b850ea4c996831ce64a91aea2 Mon Sep 17 00:00:00 2001 From: Diogo Ivo Date: Wed, 3 Apr 2024 11:48:16 +0100 Subject: net: ti: icssg-prueth: Adjust IPG configuration for SR1.0 Correctly adjust the IPG based on the Silicon Revision. Based on the work of Roger Quadros, Vignesh Raghavendra and Grygorii Strashko in TI's 5.10 SDK [1]. [1]: https://git.ti.com/cgit/ti-linux-kernel/ti-linux-kernel/tree/?h=ti-linux-5.10.y Co-developed-by: Jan Kiszka Signed-off-by: Jan Kiszka Signed-off-by: Diogo Ivo Reviewed-by: Roger Quadros Reviewed-by: MD Danish Anwar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_config.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c index 99de8a40ed60..15f2235bf90f 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_config.c +++ b/drivers/net/ethernet/ti/icssg/icssg_config.c @@ -20,6 +20,8 @@ /* IPG is in core_clk cycles */ #define MII_RT_TX_IPG_100M 0x17 #define MII_RT_TX_IPG_1G 0xb +#define MII_RT_TX_IPG_100M_SR1 0x166 +#define MII_RT_TX_IPG_1G_SR1 0x1a #define ICSSG_QUEUES_MAX 64 #define ICSSG_QUEUE_OFFSET 0xd00 @@ -202,23 +204,29 @@ void icssg_config_ipg(struct prueth_emac *emac) { struct prueth *prueth = emac->prueth; int slice = prueth_emac_slice(emac); + u32 ipg; switch (emac->speed) { case SPEED_1000: - icssg_mii_update_ipg(prueth->mii_rt, slice, MII_RT_TX_IPG_1G); + ipg = emac->is_sr1 ? MII_RT_TX_IPG_1G_SR1 : MII_RT_TX_IPG_1G; break; case SPEED_100: - icssg_mii_update_ipg(prueth->mii_rt, slice, MII_RT_TX_IPG_100M); + ipg = emac->is_sr1 ? MII_RT_TX_IPG_100M_SR1 : MII_RT_TX_IPG_100M; break; case SPEED_10: + /* Firmware hardcodes IPG for SR1.0 */ + if (emac->is_sr1) + return; /* IPG for 10M is same as 100M */ - icssg_mii_update_ipg(prueth->mii_rt, slice, MII_RT_TX_IPG_100M); + ipg = MII_RT_TX_IPG_100M; break; default: /* Other links speeds not supported */ netdev_err(emac->ndev, "Unsupported link speed\n"); return; } + + icssg_mii_update_ipg(prueth->mii_rt, slice, ipg); } static void emac_r30_cmd_init(struct prueth_emac *emac) -- cgit v1.2.3 From 604e603d73ec75365d9a2325991c0234ca420ac5 Mon Sep 17 00:00:00 2001 From: Diogo Ivo Date: Wed, 3 Apr 2024 11:48:17 +0100 Subject: net: ti: icssg-prueth: Adjust the number of TX channels for SR1.0 As SR1.0 uses the current higher priority channel to send commands to the firmware, take this into account when setting/getting the number of channels to/from the user. Based on the work of Roger Quadros in TI's 5.10 SDK [1]. [1]: https://git.ti.com/cgit/ti-linux-kernel/ti-linux-kernel/tree/?h=ti-linux-5.10.y Co-developed-by: Jan Kiszka Signed-off-by: Jan Kiszka Signed-off-by: Diogo Ivo Reviewed-by: Roger Quadros Reviewed-by: MD Danish Anwar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_ethtool.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c index 9a7dd7efcf69..ca20325d4d3e 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c +++ b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c @@ -142,6 +142,9 @@ static int emac_set_channels(struct net_device *ndev, emac->tx_ch_num = ch->tx_count; + if (emac->is_sr1) + emac->tx_ch_num++; + return 0; } @@ -152,8 +155,17 @@ static void emac_get_channels(struct net_device *ndev, ch->max_rx = 1; ch->max_tx = PRUETH_MAX_TX_QUEUES; + + /* Disable multiple TX channels due to timeouts + * when using more than one queue */ + if (emac->is_sr1) + ch->max_tx = 1; + ch->rx_count = 1; ch->tx_count = emac->tx_ch_num; + + if (emac->is_sr1) + ch->tx_count--; } static const struct ethtool_rmon_hist_range emac_rmon_ranges[] = { -- cgit v1.2.3 From 0a74a9de79c142e6c6fdedc22e55933034efd24a Mon Sep 17 00:00:00 2001 From: Diogo Ivo Date: Wed, 3 Apr 2024 11:48:18 +0100 Subject: net: ti: icssg-prueth: Add functions to configure SR1.0 packet classifier Add the functions to configure the SR1.0 packet classifier. Based on the work of Roger Quadros in TI's 5.10 SDK [1]. [1]: https://git.ti.com/cgit/ti-linux-kernel/ti-linux-kernel/tree/?h=ti-linux-5.10.y Co-developed-by: Jan Kiszka Signed-off-by: Jan Kiszka Signed-off-by: Diogo Ivo Reviewed-by: Roger Quadros Reviewed-by: MD Danish Anwar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_classifier.c | 113 +++++++++++++++++++++-- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 2 +- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 6 +- 3 files changed, 110 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/icssg/icssg_classifier.c b/drivers/net/ethernet/ti/icssg/icssg_classifier.c index 6df53ab17fbc..79ba47bb3602 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_classifier.c +++ b/drivers/net/ethernet/ti/icssg/icssg_classifier.c @@ -274,6 +274,16 @@ static void rx_class_set_or(struct regmap *miig_rt, int slice, int n, regmap_write(miig_rt, offset, data); } +static u32 rx_class_get_or(struct regmap *miig_rt, int slice, int n) +{ + u32 offset, val; + + offset = RX_CLASS_N_REG(slice, n, RX_CLASS_OR_EN); + regmap_read(miig_rt, offset, &val); + + return val; +} + void icssg_class_set_host_mac_addr(struct regmap *miig_rt, const u8 *mac) { regmap_write(miig_rt, MAC_INTERFACE_0, (u32)(mac[0] | mac[1] << 8 | @@ -288,6 +298,26 @@ void icssg_class_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac) regmap_write(miig_rt, offs[slice].mac1, (u32)(mac[4] | mac[5] << 8)); } +static void icssg_class_ft1_add_mcast(struct regmap *miig_rt, int slice, + int slot, const u8 *addr, const u8 *mask) +{ + u32 val; + int i; + + WARN(slot >= FT1_NUM_SLOTS, "invalid slot: %d\n", slot); + + rx_class_ft1_set_da(miig_rt, slice, slot, addr); + rx_class_ft1_set_da_mask(miig_rt, slice, slot, mask); + rx_class_ft1_cfg_set_type(miig_rt, slice, slot, FT1_CFG_TYPE_EQ); + + /* Enable the FT1 slot in OR enable for all classifiers */ + for (i = 0; i < ICSSG_NUM_CLASSIFIERS_IN_USE; i++) { + val = rx_class_get_or(miig_rt, slice, i); + val |= RX_CLASS_FT_FT1_MATCH(slot); + rx_class_set_or(miig_rt, slice, i, val); + } +} + /* disable all RX traffic */ void icssg_class_disable(struct regmap *miig_rt, int slice) { @@ -331,30 +361,95 @@ void icssg_class_disable(struct regmap *miig_rt, int slice) regmap_write(miig_rt, offs[slice].rx_class_cfg2, 0); } -void icssg_class_default(struct regmap *miig_rt, int slice, bool allmulti) +void icssg_class_default(struct regmap *miig_rt, int slice, bool allmulti, + bool is_sr1) { + int num_classifiers = is_sr1 ? ICSSG_NUM_CLASSIFIERS_IN_USE : 1; u32 data; + int n; /* defaults */ icssg_class_disable(miig_rt, slice); /* Setup Classifier */ - /* match on Broadcast or MAC_PRU address */ - data = RX_CLASS_FT_BC | RX_CLASS_FT_DA_P; + for (n = 0; n < num_classifiers; n++) { + /* match on Broadcast or MAC_PRU address */ + data = RX_CLASS_FT_BC | RX_CLASS_FT_DA_P; - /* multicast */ - if (allmulti) - data |= RX_CLASS_FT_MC; + /* multicast */ + if (allmulti) + data |= RX_CLASS_FT_MC; - rx_class_set_or(miig_rt, slice, 0, data); + rx_class_set_or(miig_rt, slice, n, data); - /* set CFG1 for OR_OR_AND for classifier */ - rx_class_sel_set_type(miig_rt, slice, 0, RX_CLASS_SEL_TYPE_OR_OR_AND); + /* set CFG1 for OR_OR_AND for classifier */ + rx_class_sel_set_type(miig_rt, slice, n, + RX_CLASS_SEL_TYPE_OR_OR_AND); + } /* clear CFG2 */ regmap_write(miig_rt, offs[slice].rx_class_cfg2, 0); } +void icssg_class_promiscuous_sr1(struct regmap *miig_rt, int slice) +{ + u32 data, offset; + int n; + + /* defaults */ + icssg_class_disable(miig_rt, slice); + + /* Setup Classifier */ + for (n = 0; n < ICSSG_NUM_CLASSIFIERS_IN_USE; n++) { + /* set RAW_MASK to bypass filters */ + offset = RX_CLASS_GATES_N_REG(slice, n); + regmap_read(miig_rt, offset, &data); + data |= RX_CLASS_GATES_RAW_MASK; + regmap_write(miig_rt, offset, data); + } +} + +void icssg_class_add_mcast_sr1(struct regmap *miig_rt, int slice, + struct net_device *ndev) +{ + u8 mask_addr[6] = { 0, 0, 0, 0, 0, 0xff }; + struct netdev_hw_addr *ha; + int slot = 2; + + rx_class_ft1_set_start_len(miig_rt, slice, 0, 6); + /* reserve first 2 slots for + * 1) 01-80-C2-00-00-XX Known Service Ethernet Multicast addresses + * 2) 01-00-5e-00-00-XX Local Network Control Block + * (224.0.0.0 - 224.0.0.255 (224.0.0/24)) + */ + icssg_class_ft1_add_mcast(miig_rt, slice, 0, + eth_reserved_addr_base, mask_addr); + icssg_class_ft1_add_mcast(miig_rt, slice, 1, + eth_ipv4_mcast_addr_base, mask_addr); + mask_addr[5] = 0; + netdev_for_each_mc_addr(ha, ndev) { + /* skip addresses matching reserved slots */ + if (!memcmp(eth_reserved_addr_base, ha->addr, 5) || + !memcmp(eth_ipv4_mcast_addr_base, ha->addr, 5)) { + netdev_dbg(ndev, "mcast skip %pM\n", ha->addr); + continue; + } + + if (slot >= FT1_NUM_SLOTS) { + netdev_dbg(ndev, + "can't add more than %d MC addresses, enabling allmulti\n", + FT1_NUM_SLOTS); + icssg_class_default(miig_rt, slice, 1, true); + break; + } + + netdev_dbg(ndev, "mcast add %pM\n", ha->addr); + icssg_class_ft1_add_mcast(miig_rt, slice, slot, + ha->addr, mask_addr); + slot++; + } +} + /* required for SAV check */ void icssg_ft1_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac_addr) { diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index e6eac01f9f99..7d9db9683e18 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -437,7 +437,7 @@ static int emac_ndo_open(struct net_device *ndev) icssg_class_set_mac_addr(prueth->miig_rt, slice, emac->mac_addr); icssg_ft1_set_mac_addr(prueth->miig_rt, slice, emac->mac_addr); - icssg_class_default(prueth->miig_rt, slice, 0); + icssg_class_default(prueth->miig_rt, slice, 0, false); /* Notify the stack of the actual queue counts. */ ret = netif_set_real_num_tx_queues(ndev, num_data_chn); diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 4632d83d4732..5441f2c26430 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -283,7 +283,11 @@ extern const struct dev_pm_ops prueth_dev_pm_ops; void icssg_class_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac); void icssg_class_set_host_mac_addr(struct regmap *miig_rt, const u8 *mac); void icssg_class_disable(struct regmap *miig_rt, int slice); -void icssg_class_default(struct regmap *miig_rt, int slice, bool allmulti); +void icssg_class_default(struct regmap *miig_rt, int slice, bool allmulti, + bool is_sr1); +void icssg_class_promiscuous_sr1(struct regmap *miig_rt, int slice); +void icssg_class_add_mcast_sr1(struct regmap *miig_rt, int slice, + struct net_device *ndev); void icssg_ft1_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac_addr); /* config helpers */ -- cgit v1.2.3 From ce95cb4c8d26b6917debf0eb1cd7c05230c0e7aa Mon Sep 17 00:00:00 2001 From: Diogo Ivo Date: Wed, 3 Apr 2024 11:48:19 +0100 Subject: net: ti: icssg-prueth: Modify common functions for SR1.0 Some parts of the logic differ only slightly between Silicon Revisions. In these cases add the bits that differ to a common function that executes those bits conditionally based on the Silicon Revision. Based on the work of Roger Quadros, Vignesh Raghavendra and Grygorii Strashko in TI's 5.10 SDK [1]. [1]: https://git.ti.com/cgit/ti-linux-kernel/ti-linux-kernel/tree/?h=ti-linux-5.10.y Co-developed-by: Jan Kiszka Signed-off-by: Jan Kiszka Signed-off-by: Diogo Ivo Reviewed-by: Roger Quadros Reviewed-by: MD Danish Anwar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_common.c | 45 +++++++++++++++++++++------- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 4 +-- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 2 +- 3 files changed, 37 insertions(+), 14 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 99f27ecc9352..1d62c05b5f7c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -152,6 +152,12 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_tx); + /* was this command's TX complete? */ + if (emac->is_sr1 && *(swdata) == emac->cmd_data) { + prueth_xmit_free(tx_chn, desc_tx); + continue; + } + skb = *(swdata); prueth_xmit_free(tx_chn, desc_tx); @@ -327,6 +333,7 @@ int prueth_init_rx_chns(struct prueth_emac *emac, struct net_device *ndev = emac->ndev; u32 fdqring_id, hdesc_size; int i, ret = 0, slice; + int flow_id_base; slice = prueth_emac_slice(emac); if (slice < 0) @@ -367,8 +374,14 @@ int prueth_init_rx_chns(struct prueth_emac *emac, goto fail; } - emac->rx_flow_id_base = k3_udma_glue_rx_get_flow_id_base(rx_chn->rx_chn); - netdev_dbg(ndev, "flow id base = %d\n", emac->rx_flow_id_base); + flow_id_base = k3_udma_glue_rx_get_flow_id_base(rx_chn->rx_chn); + if (emac->is_sr1 && !strcmp(name, "rxmgm")) { + emac->rx_mgm_flow_id_base = flow_id_base; + netdev_dbg(ndev, "mgm flow id base = %d\n", flow_id_base); + } else { + emac->rx_flow_id_base = flow_id_base; + netdev_dbg(ndev, "flow id base = %d\n", flow_id_base); + } fdqring_id = K3_RINGACC_RING_ID_ANY; for (i = 0; i < rx_cfg.flow_id_num; i++) { @@ -477,10 +490,14 @@ void emac_rx_timestamp(struct prueth_emac *emac, struct skb_shared_hwtstamps *ssh; u64 ns; - u32 hi_sw = readl(emac->prueth->shram.va + - TIMESYNC_FW_WC_COUNT_HI_SW_OFFSET_OFFSET); - ns = icssg_ts_to_ns(hi_sw, psdata[1], psdata[0], - IEP_DEFAULT_CYCLE_TIME_NS); + if (emac->is_sr1) { + ns = (u64)psdata[1] << 32 | psdata[0]; + } else { + u32 hi_sw = readl(emac->prueth->shram.va + + TIMESYNC_FW_WC_COUNT_HI_SW_OFFSET_OFFSET); + ns = icssg_ts_to_ns(hi_sw, psdata[1], psdata[0], + IEP_DEFAULT_CYCLE_TIME_NS); + } ssh = skb_hwtstamps(skb); memset(ssh, 0, sizeof(*ssh)); @@ -809,7 +826,8 @@ void prueth_emac_stop(struct prueth_emac *emac) } emac->fw_running = 0; - rproc_shutdown(prueth->txpru[slice]); + if (!emac->is_sr1) + rproc_shutdown(prueth->txpru[slice]); rproc_shutdown(prueth->rtu[slice]); rproc_shutdown(prueth->pru[slice]); } @@ -829,8 +847,10 @@ void prueth_cleanup_tx_ts(struct prueth_emac *emac) int emac_napi_rx_poll(struct napi_struct *napi_rx, int budget) { struct prueth_emac *emac = prueth_napi_to_emac(napi_rx); - int rx_flow = PRUETH_RX_FLOW_DATA; - int flow = PRUETH_MAX_RX_FLOWS; + int rx_flow = emac->is_sr1 ? + PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA; + int flow = emac->is_sr1 ? + PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS; int num_rx = 0; int cur_budget; int ret; @@ -1082,7 +1102,7 @@ void prueth_netdev_exit(struct prueth *prueth, prueth->emac[mac] = NULL; } -int prueth_get_cores(struct prueth *prueth, int slice) +int prueth_get_cores(struct prueth *prueth, int slice, bool is_sr1) { struct device *dev = prueth->dev; enum pruss_pru_id pruss_id; @@ -1096,7 +1116,7 @@ int prueth_get_cores(struct prueth *prueth, int slice) idx = 0; break; case ICSS_SLICE1: - idx = 3; + idx = is_sr1 ? 2 : 3; break; default: return -EINVAL; @@ -1118,6 +1138,9 @@ int prueth_get_cores(struct prueth *prueth, int slice) return dev_err_probe(dev, ret, "unable to get RTU%d\n", slice); } + if (is_sr1) + return 0; + idx++; prueth->txpru[slice] = pru_rproc_get(np, idx, NULL); if (IS_ERR(prueth->txpru[slice])) { diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 7d9db9683e18..186b0365c2e5 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -907,13 +907,13 @@ static int prueth_probe(struct platform_device *pdev) } if (eth0_node) { - ret = prueth_get_cores(prueth, ICSS_SLICE0); + ret = prueth_get_cores(prueth, ICSS_SLICE0, false); if (ret) goto put_cores; } if (eth1_node) { - ret = prueth_get_cores(prueth, ICSS_SLICE1); + ret = prueth_get_cores(prueth, ICSS_SLICE1, false); if (ret) goto put_cores; } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 5441f2c26430..82e38ef5635b 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -354,7 +354,7 @@ int prueth_node_port(struct device_node *eth_node); int prueth_node_mac(struct device_node *eth_node); void prueth_netdev_exit(struct prueth *prueth, struct device_node *eth_node); -int prueth_get_cores(struct prueth *prueth, int slice); +int prueth_get_cores(struct prueth *prueth, int slice, bool is_sr1); void prueth_put_cores(struct prueth *prueth, int slice); /* Revision specific helper */ -- cgit v1.2.3 From e654b85a693e3cad58cf248c0770c02c346c8824 Mon Sep 17 00:00:00 2001 From: Diogo Ivo Date: Wed, 3 Apr 2024 11:48:20 +0100 Subject: net: ti: icssg-prueth: Add ICSSG Ethernet driver for AM65x SR1.0 platforms Add the PRUeth driver for the ICSSG subsystem found in AM65x SR1.0 devices. The main differences that set SR1.0 and SR2.0 apart are the missing TXPRU core in SR1.0, two extra DMA channels for management purposes and different firmware that needs to be configured accordingly. Based on the work of Roger Quadros, Vignesh Raghavendra and Grygorii Strashko in TI's 5.10 SDK [1]. [1]: https://git.ti.com/cgit/ti-linux-kernel/ti-linux-kernel/tree/?h=ti-linux-5.10.y Co-developed-by: Jan Kiszka Signed-off-by: Jan Kiszka Signed-off-by: Diogo Ivo Reviewed-by: MD Danish Anwar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/Kconfig | 15 + drivers/net/ethernet/ti/Makefile | 8 + drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c | 1181 ++++++++++++++++++++++ 3 files changed, 1204 insertions(+) create mode 100644 drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index cfe9bd6b1d62..1729eb0e0b41 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -198,6 +198,21 @@ config TI_ICSSG_PRUETH to support the Ethernet operation. Currently, it supports Ethernet with 1G and 100M link speed. +config TI_ICSSG_PRUETH_SR1 + tristate "TI Gigabit PRU SR1.0 Ethernet driver" + select PHYLIB + select TI_ICSS_IEP + select TI_K3_CPPI_DESC_POOL + depends on PRU_REMOTEPROC + depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER + help + Support dual Gigabit Ethernet ports over the ICSSG PRU Subsystem. + This subsystem is available on the AM65 SR1.0 platform. + + This driver requires firmware binaries which will run on the PRUs + to support the Ethernet operation. Currently, it supports Ethernet + with 1G, 100M and 10M link speed. + config TI_ICSS_IEP tristate "TI PRU ICSS IEP driver" depends on PTP_1588_CLOCK_OPTIONAL diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile index 4876f20aa495..6e086b4c0384 100644 --- a/drivers/net/ethernet/ti/Makefile +++ b/drivers/net/ethernet/ti/Makefile @@ -40,4 +40,12 @@ icssg-prueth-y := icssg/icssg_prueth.o \ icssg/icssg_mii_cfg.o \ icssg/icssg_stats.o \ icssg/icssg_ethtool.o +obj-$(CONFIG_TI_ICSSG_PRUETH_SR1) += icssg-prueth-sr1.o +icssg-prueth-sr1-y := icssg/icssg_prueth_sr1.o \ + icssg/icssg_common.o \ + icssg/icssg_classifier.o \ + icssg/icssg_config.o \ + icssg/icssg_mii_cfg.o \ + icssg/icssg_stats.o \ + icssg/icssg_ethtool.o obj-$(CONFIG_TI_ICSS_IEP) += icssg/icss_iep.o diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c new file mode 100644 index 000000000000..7b3304bbd7fc --- /dev/null +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c @@ -0,0 +1,1181 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Texas Instruments ICSSG SR1.0 Ethernet Driver + * + * Copyright (C) 2018-2022 Texas Instruments Incorporated - https://www.ti.com/ + * Copyright (c) Siemens AG, 2024 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "icssg_prueth.h" +#include "icssg_mii_rt.h" +#include "../k3-cppi-desc-pool.h" + +#define PRUETH_MODULE_DESCRIPTION "PRUSS ICSSG SR1.0 Ethernet driver" + +/* SR1: Set buffer sizes for the pools. There are 8 internal queues + * implemented in firmware, but only 4 tx channels/threads in the Egress + * direction to firmware. Need a high priority queue for management + * messages since they shouldn't be blocked even during high traffic + * situation. So use Q0-Q2 as data queues and Q3 as management queue + * in the max case. However for ease of configuration, use the max + * data queue + 1 for management message if we are not using max + * case. + * + * Allocate 4 MTU buffers per data queue. Firmware requires + * pool sizes to be set for internal queues. Set the upper 5 queue + * pool size to min size of 128 bytes since there are only 3 tx + * data channels and management queue requires only minimum buffer. + * i.e lower queues are used by driver and highest priority queue + * from that is used for management message. + */ + +static int emac_egress_buf_pool_size[] = { + PRUETH_EMAC_BUF_POOL_SIZE_SR1, PRUETH_EMAC_BUF_POOL_SIZE_SR1, + PRUETH_EMAC_BUF_POOL_SIZE_SR1, PRUETH_EMAC_BUF_POOL_MIN_SIZE_SR1, + PRUETH_EMAC_BUF_POOL_MIN_SIZE_SR1, PRUETH_EMAC_BUF_POOL_MIN_SIZE_SR1, + PRUETH_EMAC_BUF_POOL_MIN_SIZE_SR1, PRUETH_EMAC_BUF_POOL_MIN_SIZE_SR1 +}; + +static void icssg_config_sr1(struct prueth *prueth, struct prueth_emac *emac, + int slice) +{ + struct icssg_sr1_config config; + void __iomem *va; + int i, index; + + memset(&config, 0, sizeof(config)); + config.addr_lo = cpu_to_le32(lower_32_bits(prueth->msmcram.pa)); + config.addr_hi = cpu_to_le32(upper_32_bits(prueth->msmcram.pa)); + config.rx_flow_id = cpu_to_le32(emac->rx_flow_id_base); /* flow id for host port */ + config.rx_mgr_flow_id = cpu_to_le32(emac->rx_mgm_flow_id_base); /* for mgm ch */ + config.rand_seed = cpu_to_le32(get_random_u32()); + + for (i = PRUETH_EMAC_BUF_POOL_START_SR1; i < PRUETH_NUM_BUF_POOLS_SR1; i++) { + index = i - PRUETH_EMAC_BUF_POOL_START_SR1; + config.tx_buf_sz[i] = cpu_to_le32(emac_egress_buf_pool_size[index]); + } + + va = prueth->shram.va + slice * ICSSG_CONFIG_OFFSET_SLICE1; + memcpy_toio(va, &config, sizeof(config)); + + emac->speed = SPEED_1000; + emac->duplex = DUPLEX_FULL; +} + +static int emac_send_command_sr1(struct prueth_emac *emac, u32 cmd) +{ + struct cppi5_host_desc_t *first_desc; + u32 pkt_len = sizeof(emac->cmd_data); + __le32 *data = emac->cmd_data; + dma_addr_t desc_dma, buf_dma; + struct prueth_tx_chn *tx_chn; + void **swdata; + int ret = 0; + u32 *epib; + + netdev_dbg(emac->ndev, "Sending cmd %x\n", cmd); + + /* only one command at a time allowed to firmware */ + mutex_lock(&emac->cmd_lock); + data[0] = cpu_to_le32(cmd); + + /* highest priority channel for management messages */ + tx_chn = &emac->tx_chns[emac->tx_ch_num - 1]; + + /* Map the linear buffer */ + buf_dma = dma_map_single(tx_chn->dma_dev, data, pkt_len, DMA_TO_DEVICE); + if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) { + netdev_err(emac->ndev, "cmd %x: failed to map cmd buffer\n", cmd); + ret = -EINVAL; + goto err_unlock; + } + + first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); + if (!first_desc) { + netdev_err(emac->ndev, "cmd %x: failed to allocate descriptor\n", cmd); + dma_unmap_single(tx_chn->dma_dev, buf_dma, pkt_len, DMA_TO_DEVICE); + ret = -ENOMEM; + goto err_unlock; + } + + cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT, + PRUETH_NAV_PS_DATA_SIZE); + cppi5_hdesc_set_pkttype(first_desc, PRUETH_PKT_TYPE_CMD); + epib = first_desc->epib; + epib[0] = 0; + epib[1] = 0; + + cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len); + swdata = cppi5_hdesc_get_swdata(first_desc); + *swdata = data; + + cppi5_hdesc_set_pktlen(first_desc, pkt_len); + desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc); + + /* send command */ + reinit_completion(&emac->cmd_complete); + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); + if (ret) { + netdev_err(emac->ndev, "cmd %x: push failed: %d\n", cmd, ret); + goto free_desc; + } + ret = wait_for_completion_timeout(&emac->cmd_complete, msecs_to_jiffies(100)); + if (!ret) + netdev_err(emac->ndev, "cmd %x: completion timeout\n", cmd); + + mutex_unlock(&emac->cmd_lock); + + return ret; +free_desc: + prueth_xmit_free(tx_chn, first_desc); +err_unlock: + mutex_unlock(&emac->cmd_lock); + + return ret; +} + +static void icssg_config_set_speed_sr1(struct prueth_emac *emac) +{ + u32 cmd = ICSSG_PSTATE_SPEED_DUPLEX_CMD_SR1, val; + struct prueth *prueth = emac->prueth; + int slice = prueth_emac_slice(emac); + + val = icssg_rgmii_get_speed(prueth->miig_rt, slice); + /* firmware expects speed settings in bit 2-1 */ + val <<= 1; + cmd |= val; + + val = icssg_rgmii_get_fullduplex(prueth->miig_rt, slice); + /* firmware expects full duplex settings in bit 3 */ + val <<= 3; + cmd |= val; + + emac_send_command_sr1(emac, cmd); +} + +/* called back by PHY layer if there is change in link state of hw port*/ +static void emac_adjust_link_sr1(struct net_device *ndev) +{ + struct prueth_emac *emac = netdev_priv(ndev); + struct phy_device *phydev = ndev->phydev; + struct prueth *prueth = emac->prueth; + bool new_state = false; + unsigned long flags; + + if (phydev->link) { + /* check the mode of operation - full/half duplex */ + if (phydev->duplex != emac->duplex) { + new_state = true; + emac->duplex = phydev->duplex; + } + if (phydev->speed != emac->speed) { + new_state = true; + emac->speed = phydev->speed; + } + if (!emac->link) { + new_state = true; + emac->link = 1; + } + } else if (emac->link) { + new_state = true; + emac->link = 0; + + /* f/w should support 100 & 1000 */ + emac->speed = SPEED_1000; + + /* half duplex may not be supported by f/w */ + emac->duplex = DUPLEX_FULL; + } + + if (new_state) { + phy_print_status(phydev); + + /* update RGMII and MII configuration based on PHY negotiated + * values + */ + if (emac->link) { + /* Set the RGMII cfg for gig en and full duplex */ + icssg_update_rgmii_cfg(prueth->miig_rt, emac); + + /* update the Tx IPG based on 100M/1G speed */ + spin_lock_irqsave(&emac->lock, flags); + icssg_config_ipg(emac); + spin_unlock_irqrestore(&emac->lock, flags); + icssg_config_set_speed_sr1(emac); + } + } + + if (emac->link) { + /* reactivate the transmit queue */ + netif_tx_wake_all_queues(ndev); + } else { + netif_tx_stop_all_queues(ndev); + prueth_cleanup_tx_ts(emac); + } +} + +static int emac_phy_connect(struct prueth_emac *emac) +{ + struct prueth *prueth = emac->prueth; + struct net_device *ndev = emac->ndev; + /* connect PHY */ + ndev->phydev = of_phy_connect(emac->ndev, emac->phy_node, + &emac_adjust_link_sr1, 0, + emac->phy_if); + if (!ndev->phydev) { + dev_err(prueth->dev, "couldn't connect to phy %s\n", + emac->phy_node->full_name); + return -ENODEV; + } + + if (!emac->half_duplex) { + dev_dbg(prueth->dev, "half duplex mode is not supported\n"); + phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT); + } + + /* Remove 100Mbits half-duplex due to RGMII misreporting connection + * as full duplex */ + phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT); + + /* remove unsupported modes */ + phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT); + phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_Pause_BIT); + phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_Asym_Pause_BIT); + + if (emac->phy_if == PHY_INTERFACE_MODE_MII) + phy_set_max_speed(ndev->phydev, SPEED_100); + + return 0; +} + +/* get one packet from requested flow_id + * + * Returns skb pointer if packet found else NULL + * Caller must free the returned skb. + */ +static struct sk_buff *prueth_process_rx_mgm(struct prueth_emac *emac, + u32 flow_id) +{ + struct prueth_rx_chn *rx_chn = &emac->rx_mgm_chn; + struct net_device *ndev = emac->ndev; + struct cppi5_host_desc_t *desc_rx; + struct sk_buff *skb, *new_skb; + dma_addr_t desc_dma, buf_dma; + u32 buf_dma_len, pkt_len; + void **swdata; + int ret; + + ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma); + if (ret) { + if (ret != -ENODATA) + netdev_err(ndev, "rx mgm pop: failed: %d\n", ret); + return NULL; + } + + if (cppi5_desc_is_tdcm(desc_dma)) /* Teardown */ + return NULL; + + desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); + + /* Fix FW bug about incorrect PSDATA size */ + if (cppi5_hdesc_get_psdata_size(desc_rx) != PRUETH_NAV_PS_DATA_SIZE) { + cppi5_hdesc_update_psdata_size(desc_rx, + PRUETH_NAV_PS_DATA_SIZE); + } + + swdata = cppi5_hdesc_get_swdata(desc_rx); + skb = *swdata; + cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); + pkt_len = cppi5_hdesc_get_pktlen(desc_rx); + + dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); + + new_skb = netdev_alloc_skb_ip_align(ndev, PRUETH_MAX_PKT_SIZE); + /* if allocation fails we drop the packet but push the + * descriptor back to the ring with old skb to prevent a stall + */ + if (!new_skb) { + netdev_err(ndev, + "skb alloc failed, dropped mgm pkt from flow %d\n", + flow_id); + new_skb = skb; + skb = NULL; /* return NULL */ + } else { + /* return the filled skb */ + skb_put(skb, pkt_len); + } + + /* queue another DMA */ + ret = prueth_dma_rx_push(emac, new_skb, &emac->rx_mgm_chn); + if (WARN_ON(ret < 0)) + dev_kfree_skb_any(new_skb); + + return skb; +} + +static void prueth_tx_ts_sr1(struct prueth_emac *emac, + struct emac_tx_ts_response_sr1 *tsr) +{ + struct skb_shared_hwtstamps ssh; + u32 hi_ts, lo_ts, cookie; + struct sk_buff *skb; + u64 ns; + + hi_ts = le32_to_cpu(tsr->hi_ts); + lo_ts = le32_to_cpu(tsr->lo_ts); + + ns = (u64)hi_ts << 32 | lo_ts; + + cookie = le32_to_cpu(tsr->cookie); + if (cookie >= PRUETH_MAX_TX_TS_REQUESTS) { + netdev_dbg(emac->ndev, "Invalid TX TS cookie 0x%x\n", + cookie); + return; + } + + skb = emac->tx_ts_skb[cookie]; + emac->tx_ts_skb[cookie] = NULL; /* free slot */ + + memset(&ssh, 0, sizeof(ssh)); + ssh.hwtstamp = ns_to_ktime(ns); + + skb_tstamp_tx(skb, &ssh); + dev_consume_skb_any(skb); +} + +static irqreturn_t prueth_rx_mgm_ts_thread_sr1(int irq, void *dev_id) +{ + struct prueth_emac *emac = dev_id; + struct sk_buff *skb; + + skb = prueth_process_rx_mgm(emac, PRUETH_RX_MGM_FLOW_TIMESTAMP_SR1); + if (!skb) + return IRQ_NONE; + + prueth_tx_ts_sr1(emac, (void *)skb->data); + dev_kfree_skb_any(skb); + + return IRQ_HANDLED; +} + +static irqreturn_t prueth_rx_mgm_rsp_thread(int irq, void *dev_id) +{ + struct prueth_emac *emac = dev_id; + struct sk_buff *skb; + u32 rsp; + + skb = prueth_process_rx_mgm(emac, PRUETH_RX_MGM_FLOW_RESPONSE_SR1); + if (!skb) + return IRQ_NONE; + + /* Process command response */ + rsp = le32_to_cpu(*(__le32 *)skb->data) & 0xffff0000; + if (rsp == ICSSG_SHUTDOWN_CMD_SR1) { + netdev_dbg(emac->ndev, "f/w Shutdown cmd resp %x\n", rsp); + complete(&emac->cmd_complete); + } else if (rsp == ICSSG_PSTATE_SPEED_DUPLEX_CMD_SR1) { + netdev_dbg(emac->ndev, "f/w Speed/Duplex cmd rsp %x\n", rsp); + complete(&emac->cmd_complete); + } + + dev_kfree_skb_any(skb); + + return IRQ_HANDLED; +} + +static struct icssg_firmwares icssg_sr1_emac_firmwares[] = { + { + .pru = "ti-pruss/am65x-pru0-prueth-fw.elf", + .rtu = "ti-pruss/am65x-rtu0-prueth-fw.elf", + }, + { + .pru = "ti-pruss/am65x-pru1-prueth-fw.elf", + .rtu = "ti-pruss/am65x-rtu1-prueth-fw.elf", + } +}; + +static int prueth_emac_start(struct prueth *prueth, struct prueth_emac *emac) +{ + struct icssg_firmwares *firmwares; + struct device *dev = prueth->dev; + int slice, ret; + + firmwares = icssg_sr1_emac_firmwares; + + slice = prueth_emac_slice(emac); + if (slice < 0) { + netdev_err(emac->ndev, "invalid port\n"); + return -EINVAL; + } + + icssg_config_sr1(prueth, emac, slice); + + ret = rproc_set_firmware(prueth->pru[slice], firmwares[slice].pru); + ret = rproc_boot(prueth->pru[slice]); + if (ret) { + dev_err(dev, "failed to boot PRU%d: %d\n", slice, ret); + return -EINVAL; + } + + ret = rproc_set_firmware(prueth->rtu[slice], firmwares[slice].rtu); + ret = rproc_boot(prueth->rtu[slice]); + if (ret) { + dev_err(dev, "failed to boot RTU%d: %d\n", slice, ret); + goto halt_pru; + } + + emac->fw_running = 1; + return 0; + +halt_pru: + rproc_shutdown(prueth->pru[slice]); + + return ret; +} + +/** + * emac_ndo_open - EMAC device open + * @ndev: network adapter device + * + * Called when system wants to start the interface. + * + * Return: 0 for a successful open, or appropriate error code + */ +static int emac_ndo_open(struct net_device *ndev) +{ + struct prueth_emac *emac = netdev_priv(ndev); + int num_data_chn = emac->tx_ch_num - 1; + struct prueth *prueth = emac->prueth; + int slice = prueth_emac_slice(emac); + struct device *dev = prueth->dev; + int max_rx_flows, rx_flow; + int ret, i; + + /* clear SMEM and MSMC settings for all slices */ + if (!prueth->emacs_initialized) { + memset_io(prueth->msmcram.va, 0, prueth->msmcram.size); + memset_io(prueth->shram.va, 0, ICSSG_CONFIG_OFFSET_SLICE1 * PRUETH_NUM_MACS); + } + + /* set h/w MAC as user might have re-configured */ + ether_addr_copy(emac->mac_addr, ndev->dev_addr); + + icssg_class_set_mac_addr(prueth->miig_rt, slice, emac->mac_addr); + + icssg_class_default(prueth->miig_rt, slice, 0, true); + + /* Notify the stack of the actual queue counts. */ + ret = netif_set_real_num_tx_queues(ndev, num_data_chn); + if (ret) { + dev_err(dev, "cannot set real number of tx queues\n"); + return ret; + } + + init_completion(&emac->cmd_complete); + ret = prueth_init_tx_chns(emac); + if (ret) { + dev_err(dev, "failed to init tx channel: %d\n", ret); + return ret; + } + + max_rx_flows = PRUETH_MAX_RX_FLOWS_SR1; + ret = prueth_init_rx_chns(emac, &emac->rx_chns, "rx", + max_rx_flows, PRUETH_MAX_RX_DESC); + if (ret) { + dev_err(dev, "failed to init rx channel: %d\n", ret); + goto cleanup_tx; + } + + ret = prueth_init_rx_chns(emac, &emac->rx_mgm_chn, "rxmgm", + PRUETH_MAX_RX_MGM_FLOWS_SR1, + PRUETH_MAX_RX_MGM_DESC_SR1); + if (ret) { + dev_err(dev, "failed to init rx mgmt channel: %d\n", + ret); + goto cleanup_rx; + } + + ret = prueth_ndev_add_tx_napi(emac); + if (ret) + goto cleanup_rx_mgm; + + /* we use only the highest priority flow for now i.e. @irq[3] */ + rx_flow = PRUETH_RX_FLOW_DATA_SR1; + ret = request_irq(emac->rx_chns.irq[rx_flow], prueth_rx_irq, + IRQF_TRIGGER_HIGH, dev_name(dev), emac); + if (ret) { + dev_err(dev, "unable to request RX IRQ\n"); + goto cleanup_napi; + } + + ret = request_threaded_irq(emac->rx_mgm_chn.irq[PRUETH_RX_MGM_FLOW_RESPONSE_SR1], + NULL, prueth_rx_mgm_rsp_thread, + IRQF_ONESHOT | IRQF_TRIGGER_HIGH, + dev_name(dev), emac); + if (ret) { + dev_err(dev, "unable to request RX Management RSP IRQ\n"); + goto free_rx_irq; + } + + ret = request_threaded_irq(emac->rx_mgm_chn.irq[PRUETH_RX_MGM_FLOW_TIMESTAMP_SR1], + NULL, prueth_rx_mgm_ts_thread_sr1, + IRQF_ONESHOT | IRQF_TRIGGER_HIGH, + dev_name(dev), emac); + if (ret) { + dev_err(dev, "unable to request RX Management TS IRQ\n"); + goto free_rx_mgm_rsp_irq; + } + + /* reset and start PRU firmware */ + ret = prueth_emac_start(prueth, emac); + if (ret) + goto free_rx_mgmt_ts_irq; + + icssg_mii_update_mtu(prueth->mii_rt, slice, ndev->max_mtu); + + /* Prepare RX */ + ret = prueth_prepare_rx_chan(emac, &emac->rx_chns, PRUETH_MAX_PKT_SIZE); + if (ret) + goto stop; + + ret = prueth_prepare_rx_chan(emac, &emac->rx_mgm_chn, 64); + if (ret) + goto reset_rx_chn; + + ret = k3_udma_glue_enable_rx_chn(emac->rx_mgm_chn.rx_chn); + if (ret) + goto reset_rx_chn; + + ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn); + if (ret) + goto reset_rx_mgm_chn; + + for (i = 0; i < emac->tx_ch_num; i++) { + ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn); + if (ret) + goto reset_tx_chan; + } + + /* Enable NAPI in Tx and Rx direction */ + for (i = 0; i < emac->tx_ch_num; i++) + napi_enable(&emac->tx_chns[i].napi_tx); + napi_enable(&emac->napi_rx); + + /* start PHY */ + phy_start(ndev->phydev); + + prueth->emacs_initialized++; + + queue_work(system_long_wq, &emac->stats_work.work); + + return 0; + +reset_tx_chan: + /* Since interface is not yet up, there is wouldn't be + * any SKB for completion. So set false to free_skb + */ + prueth_reset_tx_chan(emac, i, false); +reset_rx_mgm_chn: + prueth_reset_rx_chan(&emac->rx_mgm_chn, + PRUETH_MAX_RX_MGM_FLOWS_SR1, true); +reset_rx_chn: + prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, false); +stop: + prueth_emac_stop(emac); +free_rx_mgmt_ts_irq: + free_irq(emac->rx_mgm_chn.irq[PRUETH_RX_MGM_FLOW_TIMESTAMP_SR1], + emac); +free_rx_mgm_rsp_irq: + free_irq(emac->rx_mgm_chn.irq[PRUETH_RX_MGM_FLOW_RESPONSE_SR1], + emac); +free_rx_irq: + free_irq(emac->rx_chns.irq[rx_flow], emac); +cleanup_napi: + prueth_ndev_del_tx_napi(emac, emac->tx_ch_num); +cleanup_rx_mgm: + prueth_cleanup_rx_chns(emac, &emac->rx_mgm_chn, + PRUETH_MAX_RX_MGM_FLOWS_SR1); +cleanup_rx: + prueth_cleanup_rx_chns(emac, &emac->rx_chns, max_rx_flows); +cleanup_tx: + prueth_cleanup_tx_chns(emac); + + return ret; +} + +/** + * emac_ndo_stop - EMAC device stop + * @ndev: network adapter device + * + * Called when system wants to stop or down the interface. + * + * Return: Always 0 (Success) + */ +static int emac_ndo_stop(struct net_device *ndev) +{ + struct prueth_emac *emac = netdev_priv(ndev); + int rx_flow = PRUETH_RX_FLOW_DATA_SR1; + struct prueth *prueth = emac->prueth; + int max_rx_flows; + int ret, i; + + /* inform the upper layers. */ + netif_tx_stop_all_queues(ndev); + + /* block packets from wire */ + if (ndev->phydev) + phy_stop(ndev->phydev); + + icssg_class_disable(prueth->miig_rt, prueth_emac_slice(emac)); + + emac_send_command_sr1(emac, ICSSG_SHUTDOWN_CMD_SR1); + + atomic_set(&emac->tdown_cnt, emac->tx_ch_num); + /* ensure new tdown_cnt value is visible */ + smp_mb__after_atomic(); + /* tear down and disable UDMA channels */ + reinit_completion(&emac->tdown_complete); + for (i = 0; i < emac->tx_ch_num; i++) + k3_udma_glue_tdown_tx_chn(emac->tx_chns[i].tx_chn, false); + + ret = wait_for_completion_timeout(&emac->tdown_complete, + msecs_to_jiffies(1000)); + if (!ret) + netdev_err(ndev, "tx teardown timeout\n"); + + prueth_reset_tx_chan(emac, emac->tx_ch_num, true); + for (i = 0; i < emac->tx_ch_num; i++) + napi_disable(&emac->tx_chns[i].napi_tx); + + max_rx_flows = PRUETH_MAX_RX_FLOWS_SR1; + k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true); + + prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true); + /* Teardown RX MGM channel */ + k3_udma_glue_tdown_rx_chn(emac->rx_mgm_chn.rx_chn, true); + prueth_reset_rx_chan(&emac->rx_mgm_chn, + PRUETH_MAX_RX_MGM_FLOWS_SR1, true); + + napi_disable(&emac->napi_rx); + + /* Destroying the queued work in ndo_stop() */ + cancel_delayed_work_sync(&emac->stats_work); + + /* stop PRUs */ + prueth_emac_stop(emac); + + free_irq(emac->rx_mgm_chn.irq[PRUETH_RX_MGM_FLOW_TIMESTAMP_SR1], emac); + free_irq(emac->rx_mgm_chn.irq[PRUETH_RX_MGM_FLOW_RESPONSE_SR1], emac); + free_irq(emac->rx_chns.irq[rx_flow], emac); + prueth_ndev_del_tx_napi(emac, emac->tx_ch_num); + prueth_cleanup_tx_chns(emac); + + prueth_cleanup_rx_chns(emac, &emac->rx_mgm_chn, PRUETH_MAX_RX_MGM_FLOWS_SR1); + prueth_cleanup_rx_chns(emac, &emac->rx_chns, max_rx_flows); + + prueth->emacs_initialized--; + + return 0; +} + +static void emac_ndo_set_rx_mode_sr1(struct net_device *ndev) +{ + struct prueth_emac *emac = netdev_priv(ndev); + bool allmulti = ndev->flags & IFF_ALLMULTI; + bool promisc = ndev->flags & IFF_PROMISC; + struct prueth *prueth = emac->prueth; + int slice = prueth_emac_slice(emac); + + if (promisc) { + icssg_class_promiscuous_sr1(prueth->miig_rt, slice); + return; + } + + if (allmulti) { + icssg_class_default(prueth->miig_rt, slice, 1, true); + return; + } + + icssg_class_default(prueth->miig_rt, slice, 0, true); + if (!netdev_mc_empty(ndev)) { + /* program multicast address list into Classifier */ + icssg_class_add_mcast_sr1(prueth->miig_rt, slice, ndev); + } +} + +static const struct net_device_ops emac_netdev_ops = { + .ndo_open = emac_ndo_open, + .ndo_stop = emac_ndo_stop, + .ndo_start_xmit = emac_ndo_start_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_tx_timeout = emac_ndo_tx_timeout, + .ndo_set_rx_mode = emac_ndo_set_rx_mode_sr1, + .ndo_eth_ioctl = emac_ndo_ioctl, + .ndo_get_stats64 = emac_ndo_get_stats64, + .ndo_get_phys_port_name = emac_ndo_get_phys_port_name, +}; + +static int prueth_netdev_init(struct prueth *prueth, + struct device_node *eth_node) +{ + struct prueth_emac *emac; + struct net_device *ndev; + enum prueth_port port; + enum prueth_mac mac; + /* Only enable one TX channel due to timeouts when + * using multiple channels */ + int num_tx_chn = 1; + int ret; + + port = prueth_node_port(eth_node); + if (port == PRUETH_PORT_INVALID) + return -EINVAL; + + mac = prueth_node_mac(eth_node); + if (mac == PRUETH_MAC_INVALID) + return -EINVAL; + + ndev = alloc_etherdev_mq(sizeof(*emac), num_tx_chn); + if (!ndev) + return -ENOMEM; + + emac = netdev_priv(ndev); + emac->is_sr1 = 1; + emac->prueth = prueth; + emac->ndev = ndev; + emac->port_id = port; + emac->cmd_wq = create_singlethread_workqueue("icssg_cmd_wq"); + if (!emac->cmd_wq) { + ret = -ENOMEM; + goto free_ndev; + } + + INIT_DELAYED_WORK(&emac->stats_work, emac_stats_work_handler); + + ret = pruss_request_mem_region(prueth->pruss, + port == PRUETH_PORT_MII0 ? + PRUSS_MEM_DRAM0 : PRUSS_MEM_DRAM1, + &emac->dram); + if (ret) { + dev_err(prueth->dev, "unable to get DRAM: %d\n", ret); + ret = -ENOMEM; + goto free_wq; + } + + /* SR1.0 uses a dedicated high priority channel + * to send commands to the firmware + */ + emac->tx_ch_num = 2; + + SET_NETDEV_DEV(ndev, prueth->dev); + spin_lock_init(&emac->lock); + mutex_init(&emac->cmd_lock); + + emac->phy_node = of_parse_phandle(eth_node, "phy-handle", 0); + if (!emac->phy_node && !of_phy_is_fixed_link(eth_node)) { + dev_err(prueth->dev, "couldn't find phy-handle\n"); + ret = -ENODEV; + goto free; + } else if (of_phy_is_fixed_link(eth_node)) { + ret = of_phy_register_fixed_link(eth_node); + if (ret) { + ret = dev_err_probe(prueth->dev, ret, + "failed to register fixed-link phy\n"); + goto free; + } + + emac->phy_node = eth_node; + } + + ret = of_get_phy_mode(eth_node, &emac->phy_if); + if (ret) { + dev_err(prueth->dev, "could not get phy-mode property\n"); + goto free; + } + + if (emac->phy_if != PHY_INTERFACE_MODE_MII && + !phy_interface_mode_is_rgmii(emac->phy_if)) { + dev_err(prueth->dev, "PHY mode unsupported %s\n", phy_modes(emac->phy_if)); + ret = -EINVAL; + goto free; + } + + /* AM65 SR2.0 has TX Internal delay always enabled by hardware + * and it is not possible to disable TX Internal delay. The below + * switch case block describes how we handle different phy modes + * based on hardware restriction. + */ + switch (emac->phy_if) { + case PHY_INTERFACE_MODE_RGMII_ID: + emac->phy_if = PHY_INTERFACE_MODE_RGMII_RXID; + break; + case PHY_INTERFACE_MODE_RGMII_TXID: + emac->phy_if = PHY_INTERFACE_MODE_RGMII; + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_RXID: + dev_err(prueth->dev, "RGMII mode without TX delay is not supported"); + ret = -EINVAL; + goto free; + default: + break; + } + + /* get mac address from DT and set private and netdev addr */ + ret = of_get_ethdev_address(eth_node, ndev); + if (!is_valid_ether_addr(ndev->dev_addr)) { + eth_hw_addr_random(ndev); + dev_warn(prueth->dev, "port %d: using random MAC addr: %pM\n", + port, ndev->dev_addr); + } + ether_addr_copy(emac->mac_addr, ndev->dev_addr); + + ndev->min_mtu = PRUETH_MIN_PKT_SIZE; + ndev->max_mtu = PRUETH_MAX_MTU; + ndev->netdev_ops = &emac_netdev_ops; + ndev->ethtool_ops = &icssg_ethtool_ops; + ndev->hw_features = NETIF_F_SG; + ndev->features = ndev->hw_features; + + netif_napi_add(ndev, &emac->napi_rx, emac_napi_rx_poll); + prueth->emac[mac] = emac; + + return 0; + +free: + pruss_release_mem_region(prueth->pruss, &emac->dram); +free_wq: + destroy_workqueue(emac->cmd_wq); +free_ndev: + emac->ndev = NULL; + prueth->emac[mac] = NULL; + free_netdev(ndev); + + return ret; +} + +static int prueth_probe(struct platform_device *pdev) +{ + struct device_node *eth_node, *eth_ports_node; + struct device_node *eth0_node = NULL; + struct device_node *eth1_node = NULL; + struct device *dev = &pdev->dev; + struct device_node *np; + struct prueth *prueth; + struct pruss *pruss; + u32 msmc_ram_size; + int i, ret; + + np = dev->of_node; + + prueth = devm_kzalloc(dev, sizeof(*prueth), GFP_KERNEL); + if (!prueth) + return -ENOMEM; + + dev_set_drvdata(dev, prueth); + prueth->pdev = pdev; + prueth->pdata = *(const struct prueth_pdata *)device_get_match_data(dev); + + prueth->dev = dev; + eth_ports_node = of_get_child_by_name(np, "ethernet-ports"); + if (!eth_ports_node) + return -ENOENT; + + for_each_child_of_node(eth_ports_node, eth_node) { + u32 reg; + + if (strcmp(eth_node->name, "port")) + continue; + ret = of_property_read_u32(eth_node, "reg", ®); + if (ret < 0) { + dev_err(dev, "%pOF error reading port_id %d\n", + eth_node, ret); + } + + of_node_get(eth_node); + + if (reg == 0) { + eth0_node = eth_node; + if (!of_device_is_available(eth0_node)) { + of_node_put(eth0_node); + eth0_node = NULL; + } + } else if (reg == 1) { + eth1_node = eth_node; + if (!of_device_is_available(eth1_node)) { + of_node_put(eth1_node); + eth1_node = NULL; + } + } else { + dev_err(dev, "port reg should be 0 or 1\n"); + } + } + + of_node_put(eth_ports_node); + + /* At least one node must be present and available else we fail */ + if (!eth0_node && !eth1_node) { + dev_err(dev, "neither port0 nor port1 node available\n"); + return -ENODEV; + } + + if (eth0_node == eth1_node) { + dev_err(dev, "port0 and port1 can't have same reg\n"); + of_node_put(eth0_node); + return -ENODEV; + } + + prueth->eth_node[PRUETH_MAC0] = eth0_node; + prueth->eth_node[PRUETH_MAC1] = eth1_node; + + prueth->miig_rt = syscon_regmap_lookup_by_phandle(np, "ti,mii-g-rt"); + if (IS_ERR(prueth->miig_rt)) { + dev_err(dev, "couldn't get ti,mii-g-rt syscon regmap\n"); + return -ENODEV; + } + + prueth->mii_rt = syscon_regmap_lookup_by_phandle(np, "ti,mii-rt"); + if (IS_ERR(prueth->mii_rt)) { + dev_err(dev, "couldn't get ti,mii-rt syscon regmap\n"); + return -ENODEV; + } + + if (eth0_node) { + ret = prueth_get_cores(prueth, ICSS_SLICE0, true); + if (ret) + goto put_cores; + } + + if (eth1_node) { + ret = prueth_get_cores(prueth, ICSS_SLICE1, true); + if (ret) + goto put_cores; + } + + pruss = pruss_get(eth0_node ? + prueth->pru[ICSS_SLICE0] : prueth->pru[ICSS_SLICE1]); + if (IS_ERR(pruss)) { + ret = PTR_ERR(pruss); + dev_err(dev, "unable to get pruss handle\n"); + goto put_cores; + } + + prueth->pruss = pruss; + + ret = pruss_request_mem_region(pruss, PRUSS_MEM_SHRD_RAM2, + &prueth->shram); + if (ret) { + dev_err(dev, "unable to get PRUSS SHRD RAM2: %d\n", ret); + goto put_pruss; + } + + prueth->sram_pool = of_gen_pool_get(np, "sram", 0); + if (!prueth->sram_pool) { + dev_err(dev, "unable to get SRAM pool\n"); + ret = -ENODEV; + + goto put_mem; + } + + msmc_ram_size = MSMC_RAM_SIZE_SR1; + + prueth->msmcram.va = (void __iomem *)gen_pool_alloc(prueth->sram_pool, + msmc_ram_size); + + if (!prueth->msmcram.va) { + ret = -ENOMEM; + dev_err(dev, "unable to allocate MSMC resource\n"); + goto put_mem; + } + prueth->msmcram.pa = gen_pool_virt_to_phys(prueth->sram_pool, + (unsigned long)prueth->msmcram.va); + prueth->msmcram.size = msmc_ram_size; + memset_io(prueth->msmcram.va, 0, msmc_ram_size); + dev_dbg(dev, "sram: pa %llx va %p size %zx\n", prueth->msmcram.pa, + prueth->msmcram.va, prueth->msmcram.size); + + if (eth0_node) { + ret = prueth_netdev_init(prueth, eth0_node); + if (ret) { + dev_err_probe(dev, ret, "netdev init %s failed\n", + eth0_node->name); + goto free_pool; + } + + if (of_find_property(eth0_node, "ti,half-duplex-capable", NULL)) + prueth->emac[PRUETH_MAC0]->half_duplex = 1; + } + + if (eth1_node) { + ret = prueth_netdev_init(prueth, eth1_node); + if (ret) { + dev_err_probe(dev, ret, "netdev init %s failed\n", + eth1_node->name); + goto netdev_exit; + } + + if (of_find_property(eth1_node, "ti,half-duplex-capable", NULL)) + prueth->emac[PRUETH_MAC1]->half_duplex = 1; + } + + /* register the network devices */ + if (eth0_node) { + ret = register_netdev(prueth->emac[PRUETH_MAC0]->ndev); + if (ret) { + dev_err(dev, "can't register netdev for port MII0\n"); + goto netdev_exit; + } + + prueth->registered_netdevs[PRUETH_MAC0] = prueth->emac[PRUETH_MAC0]->ndev; + emac_phy_connect(prueth->emac[PRUETH_MAC0]); + phy_attached_info(prueth->emac[PRUETH_MAC0]->ndev->phydev); + } + + if (eth1_node) { + ret = register_netdev(prueth->emac[PRUETH_MAC1]->ndev); + if (ret) { + dev_err(dev, "can't register netdev for port MII1\n"); + goto netdev_unregister; + } + + prueth->registered_netdevs[PRUETH_MAC1] = prueth->emac[PRUETH_MAC1]->ndev; + emac_phy_connect(prueth->emac[PRUETH_MAC1]); + phy_attached_info(prueth->emac[PRUETH_MAC1]->ndev->phydev); + } + + dev_info(dev, "TI PRU SR1.0 ethernet driver initialized: %s EMAC mode\n", + (!eth0_node || !eth1_node) ? "single" : "dual"); + + if (eth1_node) + of_node_put(eth1_node); + if (eth0_node) + of_node_put(eth0_node); + + return 0; + +netdev_unregister: + for (i = 0; i < PRUETH_NUM_MACS; i++) { + if (!prueth->registered_netdevs[i]) + continue; + + if (prueth->emac[i]->ndev->phydev) { + phy_disconnect(prueth->emac[i]->ndev->phydev); + prueth->emac[i]->ndev->phydev = NULL; + } + unregister_netdev(prueth->registered_netdevs[i]); + } + +netdev_exit: + for (i = 0; i < PRUETH_NUM_MACS; i++) { + eth_node = prueth->eth_node[i]; + if (!eth_node) + continue; + + prueth_netdev_exit(prueth, eth_node); + } + +free_pool: + gen_pool_free(prueth->sram_pool, + (unsigned long)prueth->msmcram.va, msmc_ram_size); + +put_mem: + pruss_release_mem_region(prueth->pruss, &prueth->shram); + +put_pruss: + pruss_put(prueth->pruss); + +put_cores: + if (eth1_node) { + prueth_put_cores(prueth, ICSS_SLICE1); + of_node_put(eth1_node); + } + + if (eth0_node) { + prueth_put_cores(prueth, ICSS_SLICE0); + of_node_put(eth0_node); + } + + return ret; +} + +static void prueth_remove(struct platform_device *pdev) +{ + struct prueth *prueth = platform_get_drvdata(pdev); + struct device_node *eth_node; + int i; + + for (i = 0; i < PRUETH_NUM_MACS; i++) { + if (!prueth->registered_netdevs[i]) + continue; + phy_stop(prueth->emac[i]->ndev->phydev); + phy_disconnect(prueth->emac[i]->ndev->phydev); + prueth->emac[i]->ndev->phydev = NULL; + unregister_netdev(prueth->registered_netdevs[i]); + } + + for (i = 0; i < PRUETH_NUM_MACS; i++) { + eth_node = prueth->eth_node[i]; + if (!eth_node) + continue; + + prueth_netdev_exit(prueth, eth_node); + } + + gen_pool_free(prueth->sram_pool, + (unsigned long)prueth->msmcram.va, + MSMC_RAM_SIZE_SR1); + + pruss_release_mem_region(prueth->pruss, &prueth->shram); + + pruss_put(prueth->pruss); + + if (prueth->eth_node[PRUETH_MAC1]) + prueth_put_cores(prueth, ICSS_SLICE1); + + if (prueth->eth_node[PRUETH_MAC0]) + prueth_put_cores(prueth, ICSS_SLICE0); +} + +static const struct prueth_pdata am654_sr1_icssg_pdata = { + .fdqring_mode = K3_RINGACC_RING_MODE_MESSAGE, +}; + +static const struct of_device_id prueth_dt_match[] = { + { .compatible = "ti,am654-sr1-icssg-prueth", .data = &am654_sr1_icssg_pdata }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, prueth_dt_match); + +static struct platform_driver prueth_driver = { + .probe = prueth_probe, + .remove_new = prueth_remove, + .driver = { + .name = "icssg-prueth-sr1", + .of_match_table = prueth_dt_match, + .pm = &prueth_dev_pm_ops, + }, +}; +module_platform_driver(prueth_driver); + +MODULE_AUTHOR("Roger Quadros "); +MODULE_AUTHOR("Md Danish Anwar "); +MODULE_AUTHOR("Diogo Ivo "); +MODULE_DESCRIPTION(PRUETH_MODULE_DESCRIPTION); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 48ba00da2eb4b54a7e6ed2ca3a9f2e575dff48c9 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 5 Apr 2024 09:44:49 +0200 Subject: net: sparx5: add support for tc flower mirred action. Add support for tc flower mirred action. Two VCAP actions are encoded in the rule - one for the port mask, and one for the port mask mode. When the rule is hit, the destination mask is OR'ed with the port mask. Also add new VCAP function for supporting 72-bit wide actions, and a tc helper for setting the port forwarding mask. Signed-off-by: Daniel Machon Signed-off-by: Paolo Abeni --- .../ethernet/microchip/sparx5/sparx5_tc_flower.c | 39 ++++++++++++++++++++++ drivers/net/ethernet/microchip/vcap/vcap_api.c | 12 +++++++ .../net/ethernet/microchip/vcap/vcap_api_client.h | 2 ++ 3 files changed, 53 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c index 523e0c470894..fb2e3004183a 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c @@ -1004,6 +1004,40 @@ static int sparx5_tc_action_vlan_push(struct vcap_admin *admin, return err; } +static void sparx5_tc_flower_set_port_mask(struct vcap_u72_action *ports, + struct net_device *ndev) +{ + struct sparx5_port *port = netdev_priv(ndev); + int byidx = port->portno / BITS_PER_BYTE; + int biidx = port->portno % BITS_PER_BYTE; + + ports->value[byidx] |= BIT(biidx); +} + +static int sparx5_tc_action_mirred(struct vcap_admin *admin, + struct vcap_rule *vrule, + struct flow_cls_offload *fco, + struct flow_action_entry *act) +{ + struct vcap_u72_action ports = {0}; + int err; + + if (admin->vtype != VCAP_TYPE_IS0 && admin->vtype != VCAP_TYPE_IS2) { + NL_SET_ERR_MSG_MOD(fco->common.extack, + "Mirror action not supported in this VCAP"); + return -EOPNOTSUPP; + } + + err = vcap_rule_add_action_u32(vrule, VCAP_AF_MASK_MODE, + SPX5_PMM_OR_DSTMASK); + if (err) + return err; + + sparx5_tc_flower_set_port_mask(&ports, act->dev); + + return vcap_rule_add_action_u72(vrule, VCAP_AF_PORT_MASK, &ports); +} + /* Remove rule keys that may prevent templates from matching a keyset */ static void sparx5_tc_flower_simplify_rule(struct vcap_admin *admin, struct vcap_rule *vrule, @@ -1150,6 +1184,11 @@ static int sparx5_tc_flower_replace(struct net_device *ndev, if (err) goto out; break; + case FLOW_ACTION_MIRRED: + err = sparx5_tc_action_mirred(admin, vrule, fco, act); + if (err) + goto out; + break; case FLOW_ACTION_ACCEPT: err = sparx5_tc_set_actionset(admin, vrule); if (err) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c index ef980e4e5bc2..80ae5e1708a6 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c @@ -2907,6 +2907,18 @@ int vcap_rule_add_action_u32(struct vcap_rule *rule, } EXPORT_SYMBOL_GPL(vcap_rule_add_action_u32); +/* Add a 72 bit action field with value to the rule */ +int vcap_rule_add_action_u72(struct vcap_rule *rule, + enum vcap_action_field action, + struct vcap_u72_action *fieldval) +{ + struct vcap_client_actionfield_data data; + + memcpy(&data.u72, fieldval, sizeof(data.u72)); + return vcap_rule_add_action(rule, action, VCAP_FIELD_U72, &data); +} +EXPORT_SYMBOL_GPL(vcap_rule_add_action_u72); + static int vcap_read_counter(struct vcap_rule_internal *ri, struct vcap_counter *ctr) { diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h index 88641508f885..56874f2adbba 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h @@ -200,6 +200,8 @@ int vcap_rule_add_action_bit(struct vcap_rule *rule, enum vcap_action_field action, enum vcap_bit val); int vcap_rule_add_action_u32(struct vcap_rule *rule, enum vcap_action_field action, u32 value); +int vcap_rule_add_action_u72(struct vcap_rule *rule, enum vcap_action_field action, + struct vcap_u72_action *fieldval); /* Get number of rules in a vcap instance lookup chain id range */ int vcap_admin_rule_count(struct vcap_admin *admin, int cid); -- cgit v1.2.3 From 1164b8e0b108507b41e2564a9461bc0a6e38283c Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 5 Apr 2024 09:44:50 +0200 Subject: net: sparx5: add support for tc flower redirect action Add support for the flower redirect action. Two VCAP actions are encoded in the rule - one for the port mask, and one for the port mask mode. When the rule is hit, the port mask is used as the final destination set, replacing all other port masks. Signed-off-by: Daniel Machon Signed-off-by: Paolo Abeni --- .../ethernet/microchip/sparx5/sparx5_tc_flower.c | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c index fb2e3004183a..82dd270adae6 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c @@ -1038,6 +1038,30 @@ static int sparx5_tc_action_mirred(struct vcap_admin *admin, return vcap_rule_add_action_u72(vrule, VCAP_AF_PORT_MASK, &ports); } +static int sparx5_tc_action_redirect(struct vcap_admin *admin, + struct vcap_rule *vrule, + struct flow_cls_offload *fco, + struct flow_action_entry *act) +{ + struct vcap_u72_action ports = {0}; + int err; + + if (admin->vtype != VCAP_TYPE_IS0 && admin->vtype != VCAP_TYPE_IS2) { + NL_SET_ERR_MSG_MOD(fco->common.extack, + "Redirect action not supported in this VCAP"); + return -EOPNOTSUPP; + } + + err = vcap_rule_add_action_u32(vrule, VCAP_AF_MASK_MODE, + SPX5_PMM_REPLACE_ALL); + if (err) + return err; + + sparx5_tc_flower_set_port_mask(&ports, act->dev); + + return vcap_rule_add_action_u72(vrule, VCAP_AF_PORT_MASK, &ports); +} + /* Remove rule keys that may prevent templates from matching a keyset */ static void sparx5_tc_flower_simplify_rule(struct vcap_admin *admin, struct vcap_rule *vrule, @@ -1189,6 +1213,11 @@ static int sparx5_tc_flower_replace(struct net_device *ndev, if (err) goto out; break; + case FLOW_ACTION_REDIRECT: + err = sparx5_tc_action_redirect(admin, vrule, fco, act); + if (err) + goto out; + break; case FLOW_ACTION_ACCEPT: err = sparx5_tc_set_actionset(admin, vrule); if (err) -- cgit v1.2.3 From 545d95e5f1ba87db17534ee8c36409dd2ade848b Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Mon, 8 Apr 2024 16:55:04 +0000 Subject: cxgb4: flower: use NL_SET_ERR_MSG_MOD for validation errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace netdev_{warn,err} with NL_SET_ERR_MSG_{FMT_,}MOD to better inform the user about the problem. Only compile-tested, no access to HW. Signed-off-by: Asbjørn Sloth Tønnesen Link: https://lore.kernel.org/r/20240408165506.94483-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 64 ++++++++++------------ 1 file changed, 30 insertions(+), 34 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 72ac4a34424b..3a6987cafe59 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -305,7 +305,7 @@ static void cxgb4_process_flow_match(struct net_device *dev, fs->mask.iport = ~0; } -static int cxgb4_validate_flow_match(struct net_device *dev, +static int cxgb4_validate_flow_match(struct netlink_ext_ack *extack, struct flow_rule *rule) { struct flow_dissector *dissector = rule->match.dissector; @@ -321,8 +321,9 @@ static int cxgb4_validate_flow_match(struct net_device *dev, BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) | BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) | BIT_ULL(FLOW_DISSECTOR_KEY_IP))) { - netdev_warn(dev, "Unsupported key used: 0x%llx\n", - dissector->used_keys); + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported key used: 0x%llx", + dissector->used_keys); return -EOPNOTSUPP; } @@ -339,13 +340,15 @@ static int cxgb4_validate_flow_match(struct net_device *dev, struct flow_match_ip match; if (eth_ip_type != ETH_P_IP && eth_ip_type != ETH_P_IPV6) { - netdev_err(dev, "IP Key supported only with IPv4/v6"); + NL_SET_ERR_MSG_MOD(extack, + "IP Key supported only with IPv4/v6"); return -EINVAL; } flow_rule_match_ip(rule, &match); if (match.mask->ttl) { - netdev_warn(dev, "ttl match unsupported for offload"); + NL_SET_ERR_MSG_MOD(extack, + "ttl match unsupported for offload"); return -EOPNOTSUPP; } } @@ -576,7 +579,7 @@ static bool valid_l4_mask(u32 mask) return hi && lo ? false : true; } -static bool valid_pedit_action(struct net_device *dev, +static bool valid_pedit_action(struct netlink_ext_ack *extack, const struct flow_action_entry *act, u8 *natmode_flags) { @@ -595,8 +598,7 @@ static bool valid_pedit_action(struct net_device *dev, case PEDIT_ETH_SMAC_47_16: break; default: - netdev_err(dev, "%s: Unsupported pedit field\n", - __func__); + NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit field"); return false; } break; @@ -609,8 +611,7 @@ static bool valid_pedit_action(struct net_device *dev, *natmode_flags |= CXGB4_ACTION_NATMODE_DIP; break; default: - netdev_err(dev, "%s: Unsupported pedit field\n", - __func__); + NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit field"); return false; } break; @@ -629,8 +630,7 @@ static bool valid_pedit_action(struct net_device *dev, *natmode_flags |= CXGB4_ACTION_NATMODE_DIP; break; default: - netdev_err(dev, "%s: Unsupported pedit field\n", - __func__); + NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit field"); return false; } break; @@ -638,8 +638,8 @@ static bool valid_pedit_action(struct net_device *dev, switch (offset) { case PEDIT_TCP_SPORT_DPORT: if (!valid_l4_mask(~mask)) { - netdev_err(dev, "%s: Unsupported mask for TCP L4 ports\n", - __func__); + NL_SET_ERR_MSG_MOD(extack, + "Unsupported mask for TCP L4 ports"); return false; } if (~mask & PEDIT_TCP_UDP_SPORT_MASK) @@ -648,8 +648,7 @@ static bool valid_pedit_action(struct net_device *dev, *natmode_flags |= CXGB4_ACTION_NATMODE_DPORT; break; default: - netdev_err(dev, "%s: Unsupported pedit field\n", - __func__); + NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit field"); return false; } break; @@ -657,8 +656,8 @@ static bool valid_pedit_action(struct net_device *dev, switch (offset) { case PEDIT_UDP_SPORT_DPORT: if (!valid_l4_mask(~mask)) { - netdev_err(dev, "%s: Unsupported mask for UDP L4 ports\n", - __func__); + NL_SET_ERR_MSG_MOD(extack, + "Unsupported mask for UDP L4 ports"); return false; } if (~mask & PEDIT_TCP_UDP_SPORT_MASK) @@ -667,13 +666,12 @@ static bool valid_pedit_action(struct net_device *dev, *natmode_flags |= CXGB4_ACTION_NATMODE_DPORT; break; default: - netdev_err(dev, "%s: Unsupported pedit field\n", - __func__); + NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit field"); return false; } break; default: - netdev_err(dev, "%s: Unsupported pedit type\n", __func__); + NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit type"); return false; } return true; @@ -727,8 +725,7 @@ int cxgb4_validate_flow_actions(struct net_device *dev, * the provided output port is not valid */ if (!found) { - netdev_err(dev, "%s: Out port invalid\n", - __func__); + NL_SET_ERR_MSG_MOD(extack, "Out port invalid"); return -EINVAL; } act_redir = true; @@ -745,21 +742,21 @@ int cxgb4_validate_flow_actions(struct net_device *dev, case FLOW_ACTION_VLAN_PUSH: case FLOW_ACTION_VLAN_MANGLE: if (proto != ETH_P_8021Q) { - netdev_err(dev, "%s: Unsupported vlan proto\n", - __func__); + NL_SET_ERR_MSG_MOD(extack, + "Unsupported vlan proto"); return -EOPNOTSUPP; } break; default: - netdev_err(dev, "%s: Unsupported vlan action\n", - __func__); + NL_SET_ERR_MSG_MOD(extack, + "Unsupported vlan action"); return -EOPNOTSUPP; } act_vlan = true; } break; case FLOW_ACTION_MANGLE: { - bool pedit_valid = valid_pedit_action(dev, act, + bool pedit_valid = valid_pedit_action(extack, act, &natmode_flags); if (!pedit_valid) @@ -771,14 +768,14 @@ int cxgb4_validate_flow_actions(struct net_device *dev, /* Do nothing. cxgb4_set_filter will validate */ break; default: - netdev_err(dev, "%s: Unsupported action\n", __func__); + NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); return -EOPNOTSUPP; } } if ((act_pedit || act_vlan) && !act_redir) { - netdev_err(dev, "%s: pedit/vlan rewrite invalid without egress redirect\n", - __func__); + NL_SET_ERR_MSG_MOD(extack, + "pedit/vlan rewrite invalid without egress redirect"); return -EINVAL; } @@ -864,7 +861,7 @@ int cxgb4_flow_rule_replace(struct net_device *dev, struct flow_rule *rule, if (cxgb4_validate_flow_actions(dev, &rule->action, extack, 0)) return -EOPNOTSUPP; - if (cxgb4_validate_flow_match(dev, rule)) + if (cxgb4_validate_flow_match(extack, rule)) return -EOPNOTSUPP; cxgb4_process_flow_match(dev, rule, fs); @@ -901,8 +898,7 @@ int cxgb4_flow_rule_replace(struct net_device *dev, struct flow_rule *rule, init_completion(&ctx.completion); ret = __cxgb4_set_filter(dev, fidx, fs, &ctx); if (ret) { - netdev_err(dev, "%s: filter creation err %d\n", - __func__, ret); + NL_SET_ERR_MSG_FMT_MOD(extack, "filter creation err %d", ret); return ret; } -- cgit v1.2.3 From 930fd7fe10d977ef880654e926b3a2c3dd52c657 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 9 Apr 2024 15:22:14 +0200 Subject: mlxsw: spectrum_ethtool: Add support for 100Gb/s per lane link modes The Spectrum-4 ASIC supports 100Gb/s per lane link modes, but the only one currently supported by the driver is 800Gb/s over eight lanes. Add support for 100Gb/s over one lane, 200Gb/s over two lanes and 400Gb/s over four lanes. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/1d77830f6abcc4f0d57a7f845e5a6d97a75a434b.1712667750.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 3 ++ .../net/ethernet/mellanox/mlxsw/spectrum_ethtool.c | 60 ++++++++++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 8892654c685f..8adf86a6f5cc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4786,8 +4786,11 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4); #define MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR BIT(8) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4 BIT(9) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2 BIT(10) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_1_100GBASE_CR_KR BIT(11) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4 BIT(12) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_2_200GBASE_CR2_KR2 BIT(13) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8 BIT(15) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_4_400GBASE_CR4_KR4 BIT(16) #define MLXSW_REG_PTYS_EXT_ETH_SPEED_800GAUI_8 BIT(19) /* reg_ptys_ext_eth_proto_cap diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 0f29e9c19411..a755b0a901d3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -1648,6 +1648,18 @@ mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2[] = { #define MLXSW_SP2_MASK_ETHTOOL_100GAUI_2_100GBASE_CR2_KR2_LEN \ ARRAY_SIZE(mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2) +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_100gaui_1_100gbase_cr_kr[] = { + ETHTOOL_LINK_MODE_100000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_100GAUI_1_100GBASE_CR_KR_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_100gaui_1_100gbase_cr_kr) + static const enum ethtool_link_mode_bit_indices mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4[] = { ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT, @@ -1660,6 +1672,18 @@ mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4[] = { #define MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN \ ARRAY_SIZE(mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4) +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_200gaui_2_200gbase_cr2_kr2[] = { + ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_200GAUI_2_200GBASE_CR2_KR2_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_200gaui_2_200gbase_cr2_kr2) + static const enum ethtool_link_mode_bit_indices mlxsw_sp2_mask_ethtool_400gaui_8[] = { ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT, @@ -1672,6 +1696,18 @@ mlxsw_sp2_mask_ethtool_400gaui_8[] = { #define MLXSW_SP2_MASK_ETHTOOL_400GAUI_8_LEN \ ARRAY_SIZE(mlxsw_sp2_mask_ethtool_400gaui_8) +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_400gaui_4_400gbase_cr4_kr4[] = { + ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_400GAUI_4_400GBASE_CR4_KR4_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_400gaui_4_400gbase_cr4_kr4) + static const enum ethtool_link_mode_bit_indices mlxsw_sp2_mask_ethtool_800gaui_8[] = { ETHTOOL_LINK_MODE_800000baseCR8_Full_BIT, @@ -1816,6 +1852,14 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .speed = SPEED_100000, .width = 2, }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_1_100GBASE_CR_KR, + .mask_ethtool = mlxsw_sp2_mask_ethtool_100gaui_1_100gbase_cr_kr, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_100GAUI_1_100GBASE_CR_KR_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X, + .speed = SPEED_100000, + .width = 1, + }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4, .mask_ethtool = mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4, @@ -1825,6 +1869,14 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .speed = SPEED_200000, .width = 4, }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_2_200GBASE_CR2_KR2, + .mask_ethtool = mlxsw_sp2_mask_ethtool_200gaui_2_200gbase_cr2_kr2, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_200GAUI_2_200GBASE_CR2_KR2_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_2X, + .speed = SPEED_200000, + .width = 2, + }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8, .mask_ethtool = mlxsw_sp2_mask_ethtool_400gaui_8, @@ -1833,6 +1885,14 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .speed = SPEED_400000, .width = 8, }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_4_400GBASE_CR4_KR4, + .mask_ethtool = mlxsw_sp2_mask_ethtool_400gaui_4_400gbase_cr4_kr4, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_400GAUI_4_400GBASE_CR4_KR4_LEN, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_4X, + .speed = SPEED_400000, + .width = 4, + }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_800GAUI_8, .mask_ethtool = mlxsw_sp2_mask_ethtool_800gaui_8, -- cgit v1.2.3 From 17b0dfa1f35bf58c17ae75da4af99e6b2c51ed57 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Tue, 9 Apr 2024 14:54:25 -0700 Subject: bnxt_en: Skip ethtool RSS context configuration in ifdown state The current implementation requires the ifstate to be up when configuring the RSS contexts. It will try to fetch the RX ring IDs and will crash if it is in ifdown state. Return error if !netif_running() to prevent the crash. An improved implementation is in the works to allow RSS contexts to be changed while in ifdown state. Fixes: b3d0083caf9a ("bnxt_en: Support RSS contexts in ethtool .{get|set}_rxfh()") Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240409215431.41424-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 9c49f629d565..68444234b268 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1876,6 +1876,11 @@ static int bnxt_set_rxfh_context(struct bnxt *bp, return -EOPNOTSUPP; } + if (!netif_running(bp->dev)) { + NL_SET_ERR_MSG_MOD(extack, "Unable to set RSS contexts when interface is down"); + return -EAGAIN; + } + if (*rss_context != ETH_RXFH_CONTEXT_ALLOC) { rss_ctx = bnxt_get_rss_ctx_from_index(bp, *rss_context); if (!rss_ctx) { -- cgit v1.2.3 From 43226dccd1bd74c6bf97d8d357e782f4922d39e3 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 9 Apr 2024 14:54:26 -0700 Subject: bnxt_en: Remove a redundant NULL check in bnxt_register_dev() The memory for "edev->ulp_tbl" is allocated inside the bnxt_rdma_aux_device_init() function. If it fails, the driver will not create the auxiliary device for RoCE. Hence the NULL check inside bnxt_register_dev() is unnecessary. Reviewed-by: Vikas Gupta Reviewed-by: Somnath Kotur Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240409215431.41424-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 86dcd2c76587..fd890819d4bc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -64,9 +64,6 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, return -ENOMEM; ulp = edev->ulp_tbl; - if (!ulp) - return -ENOMEM; - ulp->handle = handle; rcu_assign_pointer(ulp->ulp_ops, ulp_ops); -- cgit v1.2.3 From b58f5a9c7034e02c391f4ae70fc62c927ce8f04f Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Tue, 9 Apr 2024 14:54:27 -0700 Subject: bnxt_en: Remove unneeded MSIX base structure fields and code Ever since commit: 303432211324 ("bnxt_en: Remove runtime interrupt vector allocation") The MSIX base vector is effectively always 0. Remove all unneeded structure fields and code referencing the MSIX base. Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240409215431.41424-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 ++++----------------------- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 20 ++++------------- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h | 2 -- 3 files changed, 8 insertions(+), 45 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 795f3f957eb5..8213a37cba3e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3905,13 +3905,12 @@ static int bnxt_alloc_cp_sub_ring(struct bnxt *bp, static int bnxt_alloc_cp_rings(struct bnxt *bp) { bool sh = !!(bp->flags & BNXT_FLAG_SHARED_RINGS); - int i, j, rc, ulp_base_vec, ulp_msix; + int i, j, rc, ulp_msix; int tcs = bp->num_tc; if (!tcs) tcs = 1; ulp_msix = bnxt_get_ulp_msix_num(bp); - ulp_base_vec = bnxt_get_ulp_msix_base(bp); for (i = 0, j = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; struct bnxt_cp_ring_info *cpr, *cpr2; @@ -3930,10 +3929,7 @@ static int bnxt_alloc_cp_rings(struct bnxt *bp) if (rc) return rc; - if (ulp_msix && i >= ulp_base_vec) - ring->map_idx = i + ulp_msix; - else - ring->map_idx = i; + ring->map_idx = ulp_msix + i; if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) continue; @@ -7347,17 +7343,7 @@ static int bnxt_hwrm_reserve_rings(struct bnxt *bp, struct bnxt_hw_rings *hwr) int bnxt_nq_rings_in_use(struct bnxt *bp) { - int cp = bp->cp_nr_rings; - int ulp_msix, ulp_base; - - ulp_msix = bnxt_get_ulp_msix_num(bp); - if (ulp_msix) { - ulp_base = bnxt_get_ulp_msix_base(bp); - cp += ulp_msix; - if ((ulp_base + ulp_msix) > cp) - cp = ulp_base + ulp_msix; - } - return cp; + return bp->cp_nr_rings + bnxt_get_ulp_msix_num(bp); } static int bnxt_cp_rings_in_use(struct bnxt *bp) @@ -7373,16 +7359,7 @@ static int bnxt_cp_rings_in_use(struct bnxt *bp) static int bnxt_get_func_stat_ctxs(struct bnxt *bp) { - int ulp_stat = bnxt_get_ulp_stat_ctxs(bp); - int cp = bp->cp_nr_rings; - - if (!ulp_stat) - return cp; - - if (bnxt_nq_rings_in_use(bp) > cp + bnxt_get_ulp_msix_num(bp)) - return bnxt_get_ulp_msix_base(bp) + ulp_stat; - - return cp + ulp_stat; + return bp->cp_nr_rings + bnxt_get_ulp_stat_ctxs(bp); } static int bnxt_get_total_rss_ctxs(struct bnxt *bp, struct bnxt_hw_rings *hwr) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index fd890819d4bc..cae88747b110 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -31,21 +31,20 @@ static DEFINE_IDA(bnxt_aux_dev_ids); static void bnxt_fill_msix_vecs(struct bnxt *bp, struct bnxt_msix_entry *ent) { struct bnxt_en_dev *edev = bp->edev; - int num_msix, idx, i; + int num_msix, i; if (!edev->ulp_tbl->msix_requested) { netdev_warn(bp->dev, "Requested MSI-X vectors insufficient\n"); return; } num_msix = edev->ulp_tbl->msix_requested; - idx = edev->ulp_tbl->msix_base; for (i = 0; i < num_msix; i++) { - ent[i].vector = bp->irq_tbl[idx + i].vector; - ent[i].ring_idx = idx + i; + ent[i].vector = bp->irq_tbl[i].vector; + ent[i].ring_idx = i; if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) ent[i].db_offset = bp->db_offset; else - ent[i].db_offset = (idx + i) * 0x80; + ent[i].db_offset = i * 0x80; } } @@ -111,17 +110,6 @@ int bnxt_get_ulp_msix_num(struct bnxt *bp) min_t(u32, roce_msix, num_online_cpus()) : 0); } -int bnxt_get_ulp_msix_base(struct bnxt *bp) -{ - if (bnxt_ulp_registered(bp->edev)) { - struct bnxt_en_dev *edev = bp->edev; - - if (edev->ulp_tbl->msix_requested) - return edev->ulp_tbl->msix_base; - } - return 0; -} - int bnxt_get_ulp_stat_ctxs(struct bnxt *bp) { if (bnxt_ulp_registered(bp->edev)) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index b9e73de14b57..f8df4095399f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -46,7 +46,6 @@ struct bnxt_ulp { unsigned long *async_events_bmap; u16 max_async_event_id; u16 msix_requested; - u16 msix_base; atomic_t ref_count; }; @@ -96,7 +95,6 @@ static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev) } int bnxt_get_ulp_msix_num(struct bnxt *bp); -int bnxt_get_ulp_msix_base(struct bnxt *bp); int bnxt_get_ulp_stat_ctxs(struct bnxt *bp); void bnxt_ulp_stop(struct bnxt *bp); void bnxt_ulp_start(struct bnxt *bp, int err); -- cgit v1.2.3 From 194fad5b27815ca80e832ac875c0026ff96fc243 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Tue, 9 Apr 2024 14:54:28 -0700 Subject: bnxt_en: Refactor bnxt_rdma_aux_device_init/uninit functions In its current form, bnxt_rdma_aux_device_init() not only initializes the necessary data structures of the newly created aux device but also adds the aux device into the aux bus subsytem. Refactor the logic into separate functions, first function to initialize the aux device along with the required resources and second, to actually add the device to the aux bus subsytem. This separation helps to create bnxt_en_dev much earlier and save its resources separately. Reviewed-by: Andy Gospodarek Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240409215431.41424-5-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 ++++++-- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 33 ++++++++++++++++++++------- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h | 2 ++ 3 files changed, 35 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8213a37cba3e..8c24e83ba050 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -14786,10 +14786,13 @@ static void bnxt_remove_one(struct pci_dev *pdev) if (BNXT_PF(bp)) bnxt_sriov_disable(bp); - bnxt_rdma_aux_device_uninit(bp); + bnxt_rdma_aux_device_del(bp); bnxt_ptp_clear(bp); unregister_netdev(dev); + + bnxt_rdma_aux_device_uninit(bp); + bnxt_free_l2_filters(bp, true); bnxt_free_ntp_fltrs(bp, true); if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) @@ -15408,13 +15411,15 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) bnxt_init_multi_rss_ctx(bp); + bnxt_rdma_aux_device_init(bp); + rc = register_netdev(dev); if (rc) goto init_err_cleanup; bnxt_dl_fw_reporters_create(bp); - bnxt_rdma_aux_device_init(bp); + bnxt_rdma_aux_device_add(bp); bnxt_print_device_info(bp); @@ -15422,6 +15427,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; init_err_cleanup: + bnxt_rdma_aux_device_uninit(bp); bnxt_dl_unregister(bp); init_err_dl: bnxt_shutdown_tc(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index cae88747b110..ae2b367b1226 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -291,7 +291,6 @@ void bnxt_rdma_aux_device_uninit(struct bnxt *bp) aux_priv = bp->aux_priv; adev = &aux_priv->aux_dev; - auxiliary_device_delete(adev); auxiliary_device_uninit(adev); } @@ -309,6 +308,14 @@ static void bnxt_aux_dev_release(struct device *dev) bp->aux_priv = NULL; } +void bnxt_rdma_aux_device_del(struct bnxt *bp) +{ + if (!bp->edev) + return; + + auxiliary_device_delete(&bp->aux_priv->aux_dev); +} + static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp) { edev->net = bp->dev; @@ -332,6 +339,23 @@ static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp) edev->ulp_tbl->msix_requested = bnxt_get_ulp_msix_num(bp); } +void bnxt_rdma_aux_device_add(struct bnxt *bp) +{ + struct auxiliary_device *aux_dev; + int rc; + + if (!bp->edev) + return; + + aux_dev = &bp->aux_priv->aux_dev; + rc = auxiliary_device_add(aux_dev); + if (rc) { + netdev_warn(bp->dev, "Failed to add auxiliary device for ROCE\n"); + auxiliary_device_uninit(aux_dev); + bp->flags &= ~BNXT_FLAG_ROCE_CAP; + } +} + void bnxt_rdma_aux_device_init(struct bnxt *bp) { struct auxiliary_device *aux_dev; @@ -386,13 +410,6 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp) bp->edev = edev; bnxt_set_edev_info(edev, bp); - rc = auxiliary_device_add(aux_dev); - if (rc) { - netdev_warn(bp->dev, - "Failed to add auxiliary device for ROCE\n"); - goto aux_dev_uninit; - } - return; aux_dev_uninit: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index f8df4095399f..ae7266ddf167 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -103,6 +103,8 @@ void bnxt_ulp_irq_stop(struct bnxt *bp); void bnxt_ulp_irq_restart(struct bnxt *bp, int err); void bnxt_ulp_async_events(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl); void bnxt_rdma_aux_device_uninit(struct bnxt *bp); +void bnxt_rdma_aux_device_del(struct bnxt *bp); +void bnxt_rdma_aux_device_add(struct bnxt *bp); void bnxt_rdma_aux_device_init(struct bnxt *bp); int bnxt_register_dev(struct bnxt_en_dev *edev, struct bnxt_ulp_ops *ulp_ops, void *handle); -- cgit v1.2.3 From 2e4592dc9bee5ca4fd3da4c5451c7f97c76442bf Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Tue, 9 Apr 2024 14:54:29 -0700 Subject: bnxt_en: Change MSIX/NQs allocation policy The existing scheme sets aside a number of MSIX/NQs for the RoCE driver whether the RoCE driver is registered or not. This scheme is not flexible and limits the resources available for the L2 rings if RoCE is never used. Modify the scheme so that the RoCE MSIX/NQs can be used by the L2 driver if they are not used for RoCE. The MSIX/NQs are now represented by 3 fields. bp->ulp_num_msix_want contains the desired default value, edev->ulp_num_msix_vec contains the available value (but not necessarily in use), and ulp_tbl->msix_requested contains the actual value in use by RoCE. The L2 driver can dip into edev->ulp_num_msix_vec if necessary. We need to add rtnl_lock() back in bnxt_register_dev() and bnxt_unregister_dev() to synchronize the MSIX usage between L2 and RoCE. Reviewed-by: Andy Gospodarek Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240409215431.41424-6-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 25 ++++++++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 77 +++++++++++++++++++++------ drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h | 8 ++- 4 files changed, 92 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8c24e83ba050..88cf8f47e071 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7549,6 +7549,20 @@ static int __bnxt_reserve_rings(struct bnxt *bp) if (!netif_is_rxfh_configured(bp->dev)) bnxt_set_dflt_rss_indir_tbl(bp, NULL); + if (!bnxt_ulp_registered(bp->edev) && BNXT_NEW_RM(bp)) { + int resv_msix, resv_ctx, ulp_msix, ulp_ctxs; + struct bnxt_hw_resc *hw_resc; + + hw_resc = &bp->hw_resc; + resv_msix = hw_resc->resv_irqs - bp->cp_nr_rings; + ulp_msix = bnxt_get_ulp_msix_num(bp); + ulp_msix = min_t(int, resv_msix, ulp_msix); + bnxt_set_ulp_msix_num(bp, ulp_msix); + resv_ctx = hw_resc->resv_stat_ctxs - bp->cp_nr_rings; + ulp_ctxs = min(resv_ctx, bnxt_get_ulp_stat_ctxs(bp)); + bnxt_set_ulp_stat_ctxs(bp, ulp_ctxs); + } + return rc; } @@ -14982,6 +14996,7 @@ static void bnxt_trim_dflt_sh_rings(struct bnxt *bp) static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) { int dflt_rings, max_rx_rings, max_tx_rings, rc; + int avail_msix; if (!bnxt_can_reserve_rings(bp)) return 0; @@ -15009,6 +15024,14 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; bp->tx_nr_rings = bp->tx_nr_rings_per_tc; + avail_msix = bnxt_get_max_func_irqs(bp) - bp->cp_nr_rings; + if (avail_msix >= BNXT_MIN_ROCE_CP_RINGS) { + int ulp_num_msix = min(avail_msix, bp->ulp_num_msix_want); + + bnxt_set_ulp_msix_num(bp, ulp_num_msix); + bnxt_set_dflt_ulp_stat_ctxs(bp); + } + rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "Unable to reserve tx rings\n"); @@ -15358,6 +15381,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bnxt_set_rx_skb_mode(bp, false); bnxt_set_tpa_flags(bp); bnxt_set_ring_params(bp); + bnxt_rdma_aux_device_init(bp); rc = bnxt_set_dflt_rings(bp, true); if (rc) { if (BNXT_VF(bp) && rc == -ENODEV) { @@ -15411,7 +15435,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) bnxt_init_multi_rss_ctx(bp); - bnxt_rdma_aux_device_init(bp); rc = register_netdev(dev); if (rc) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 0640fcb57ef8..ad57ef051798 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2303,6 +2303,7 @@ struct bnxt { struct bnxt_irq *irq_tbl; int total_irqs; + int ulp_num_msix_want; u8 mac_addr[ETH_ALEN]; #ifdef CONFIG_BNXT_DCB diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index ae2b367b1226..de2cb1d4cd98 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -48,6 +48,46 @@ static void bnxt_fill_msix_vecs(struct bnxt *bp, struct bnxt_msix_entry *ent) } } +int bnxt_get_ulp_msix_num(struct bnxt *bp) +{ + if (bp->edev) + return bp->edev->ulp_num_msix_vec; + return 0; +} + +void bnxt_set_ulp_msix_num(struct bnxt *bp, int num) +{ + if (bp->edev) + bp->edev->ulp_num_msix_vec = num; +} + +int bnxt_get_ulp_stat_ctxs(struct bnxt *bp) +{ + if (bp->edev) + return bp->edev->ulp_num_ctxs; + return 0; +} + +void bnxt_set_ulp_stat_ctxs(struct bnxt *bp, int num_ulp_ctx) +{ + if (bp->edev) + bp->edev->ulp_num_ctxs = num_ulp_ctx; +} + +void bnxt_set_dflt_ulp_stat_ctxs(struct bnxt *bp) +{ + if (bp->edev) { + bp->edev->ulp_num_ctxs = BNXT_MIN_ROCE_STAT_CTXS; + /* Reserve one additional stat_ctx for PF0 (except + * on 1-port NICs) as it also creates one stat_ctx + * for PF1 in case of RoCE bonding. + */ + if (BNXT_PF(bp) && !bp->pf.port_id && + bp->port_count > 1) + bp->edev->ulp_num_ctxs++; + } +} + int bnxt_register_dev(struct bnxt_en_dev *edev, struct bnxt_ulp_ops *ulp_ops, void *handle) @@ -56,11 +96,19 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, struct bnxt *bp = netdev_priv(dev); unsigned int max_stat_ctxs; struct bnxt_ulp *ulp; + int rc = 0; + rtnl_lock(); + if (!bp->irq_tbl) { + rc = -ENODEV; + goto exit; + } max_stat_ctxs = bnxt_get_max_func_stat_ctxs(bp); if (max_stat_ctxs <= BNXT_MIN_ROCE_STAT_CTXS || - bp->cp_nr_rings == max_stat_ctxs) - return -ENOMEM; + bp->cp_nr_rings == max_stat_ctxs) { + rc = -ENOMEM; + goto exit; + } ulp = edev->ulp_tbl; ulp->handle = handle; @@ -69,9 +117,13 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, if (test_bit(BNXT_STATE_OPEN, &bp->state)) bnxt_hwrm_vnic_cfg(bp, &bp->vnic_info[BNXT_VNIC_DEFAULT]); + edev->ulp_tbl->msix_requested = bnxt_get_ulp_msix_num(bp); + bnxt_fill_msix_vecs(bp, bp->edev->msix_entries); edev->flags |= BNXT_EN_FLAG_MSIX_REQUESTED; - return 0; +exit: + rtnl_unlock(); + return rc; } EXPORT_SYMBOL(bnxt_register_dev); @@ -83,8 +135,10 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) int i = 0; ulp = edev->ulp_tbl; + rtnl_lock(); if (ulp->msix_requested) edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED; + edev->ulp_tbl->msix_requested = 0; if (ulp->max_async_event_id) bnxt_hwrm_func_drv_rgtr(bp, NULL, 0, true); @@ -97,11 +151,12 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) msleep(100); i++; } + rtnl_unlock(); return; } EXPORT_SYMBOL(bnxt_unregister_dev); -int bnxt_get_ulp_msix_num(struct bnxt *bp) +static int bnxt_set_dflt_ulp_msix(struct bnxt *bp) { u32 roce_msix = BNXT_VF(bp) ? BNXT_MAX_VF_ROCE_MSIX : BNXT_MAX_ROCE_MSIX; @@ -110,18 +165,6 @@ int bnxt_get_ulp_msix_num(struct bnxt *bp) min_t(u32, roce_msix, num_online_cpus()) : 0); } -int bnxt_get_ulp_stat_ctxs(struct bnxt *bp) -{ - if (bnxt_ulp_registered(bp->edev)) { - struct bnxt_en_dev *edev = bp->edev; - - if (edev->ulp_tbl->msix_requested) - return BNXT_MIN_ROCE_STAT_CTXS; - } - - return 0; -} - int bnxt_send_msg(struct bnxt_en_dev *edev, struct bnxt_fw_msg *fw_msg) { @@ -336,7 +379,6 @@ static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp) edev->pf_port_id = bp->pf.port_id; edev->en_state = bp->state; edev->bar0 = bp->bar0; - edev->ulp_tbl->msix_requested = bnxt_get_ulp_msix_num(bp); } void bnxt_rdma_aux_device_add(struct bnxt *bp) @@ -409,6 +451,7 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp) aux_priv->edev = edev; bp->edev = edev; bnxt_set_edev_info(edev, bp); + bp->ulp_num_msix_want = bnxt_set_dflt_ulp_msix(bp); return; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index ae7266ddf167..04ce3328e66f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -85,17 +85,23 @@ struct bnxt_en_dev { * updated in resume. */ void __iomem *bar0; + + u16 ulp_num_msix_vec; + u16 ulp_num_ctxs; }; static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev) { - if (edev && edev->ulp_tbl) + if (edev && rcu_access_pointer(edev->ulp_tbl->ulp_ops)) return true; return false; } int bnxt_get_ulp_msix_num(struct bnxt *bp); +void bnxt_set_ulp_msix_num(struct bnxt *bp, int num); int bnxt_get_ulp_stat_ctxs(struct bnxt *bp); +void bnxt_set_ulp_stat_ctxs(struct bnxt *bp, int num_ctxs); +void bnxt_set_dflt_ulp_stat_ctxs(struct bnxt *bp); void bnxt_ulp_stop(struct bnxt *bp); void bnxt_ulp_start(struct bnxt *bp, int err); void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs); -- cgit v1.2.3 From d630624ebd708fb659b18a8d1423e8e26de075ab Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Tue, 9 Apr 2024 14:54:30 -0700 Subject: bnxt_en: Utilize ulp client resources if RoCE is not registered If the RoCE driver is not registered for a RoCE capable device, add flexibility to use the RoCE resources (MSIX/NQs) for L2 purposes, such as additional rings configured by the user or for XDP. Reviewed-by: Andy Gospodarek Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240409215431.41424-7-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 41 +++++++++++++++++++++------ drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 14 +++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h | 2 ++ 3 files changed, 48 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 88cf8f47e071..a2e21fe64ab9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7470,14 +7470,27 @@ static bool bnxt_rings_ok(struct bnxt *bp, struct bnxt_hw_rings *hwr) static int __bnxt_reserve_rings(struct bnxt *bp) { struct bnxt_hw_rings hwr = {0}; + int cp = bp->cp_nr_rings; int rx_rings, rc; + int ulp_msix = 0; bool sh = false; int tx_cp; if (!bnxt_need_reserve_rings(bp)) return 0; - hwr.cp = bnxt_nq_rings_in_use(bp); + if (!bnxt_ulp_registered(bp->edev)) { + ulp_msix = bnxt_get_avail_msix(bp, bp->ulp_num_msix_want); + if (!ulp_msix) + bnxt_set_ulp_stat_ctxs(bp, 0); + + if (ulp_msix > bp->ulp_num_msix_want) + ulp_msix = bp->ulp_num_msix_want; + hwr.cp = cp + ulp_msix; + } else { + hwr.cp = bnxt_nq_rings_in_use(bp); + } + hwr.tx = bp->tx_nr_rings; hwr.rx = bp->rx_nr_rings; if (bp->flags & BNXT_FLAG_SHARED_RINGS) @@ -7550,12 +7563,11 @@ static int __bnxt_reserve_rings(struct bnxt *bp) bnxt_set_dflt_rss_indir_tbl(bp, NULL); if (!bnxt_ulp_registered(bp->edev) && BNXT_NEW_RM(bp)) { - int resv_msix, resv_ctx, ulp_msix, ulp_ctxs; + int resv_msix, resv_ctx, ulp_ctxs; struct bnxt_hw_resc *hw_resc; hw_resc = &bp->hw_resc; resv_msix = hw_resc->resv_irqs - bp->cp_nr_rings; - ulp_msix = bnxt_get_ulp_msix_num(bp); ulp_msix = min_t(int, resv_msix, ulp_msix); bnxt_set_ulp_msix_num(bp, ulp_msix); resv_ctx = hw_resc->resv_stat_ctxs - bp->cp_nr_rings; @@ -10609,13 +10621,23 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init) { bool irq_cleared = false; int tcs = bp->num_tc; + int irqs_required; int rc; if (!bnxt_need_reserve_rings(bp)) return 0; - if (irq_re_init && BNXT_NEW_RM(bp) && - bnxt_get_num_msix(bp) != bp->total_irqs) { + if (!bnxt_ulp_registered(bp->edev)) { + int ulp_msix = bnxt_get_avail_msix(bp, bp->ulp_num_msix_want); + + if (ulp_msix > bp->ulp_num_msix_want) + ulp_msix = bp->ulp_num_msix_want; + irqs_required = ulp_msix + bp->cp_nr_rings; + } else { + irqs_required = bnxt_get_num_msix(bp); + } + + if (irq_re_init && BNXT_NEW_RM(bp) && irqs_required != bp->total_irqs) { bnxt_ulp_irq_stop(bp); bnxt_clear_int_mode(bp); irq_cleared = true; @@ -13625,8 +13647,8 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, return -ENOMEM; hwr.stat = hwr.cp; if (BNXT_NEW_RM(bp)) { - hwr.cp += bnxt_get_ulp_msix_num(bp); - hwr.stat += bnxt_get_ulp_stat_ctxs(bp); + hwr.cp += bnxt_get_ulp_msix_num_in_use(bp); + hwr.stat += bnxt_get_ulp_stat_ctxs_in_use(bp); hwr.grp = rx; hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr); } @@ -14899,8 +14921,9 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, *max_rx = hw_resc->max_rx_rings; *max_cp = bnxt_get_max_func_cp_rings_for_en(bp); max_irq = min_t(int, bnxt_get_max_func_irqs(bp) - - bnxt_get_ulp_msix_num(bp), - hw_resc->max_stat_ctxs - bnxt_get_ulp_stat_ctxs(bp)); + bnxt_get_ulp_msix_num_in_use(bp), + hw_resc->max_stat_ctxs - + bnxt_get_ulp_stat_ctxs_in_use(bp)); if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) *max_cp = min_t(int, *max_cp, max_irq); max_ring_grps = hw_resc->max_hw_ring_grps; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index de2cb1d4cd98..edb10aebd095 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -61,6 +61,13 @@ void bnxt_set_ulp_msix_num(struct bnxt *bp, int num) bp->edev->ulp_num_msix_vec = num; } +int bnxt_get_ulp_msix_num_in_use(struct bnxt *bp) +{ + if (bnxt_ulp_registered(bp->edev)) + return bp->edev->ulp_num_msix_vec; + return 0; +} + int bnxt_get_ulp_stat_ctxs(struct bnxt *bp) { if (bp->edev) @@ -74,6 +81,13 @@ void bnxt_set_ulp_stat_ctxs(struct bnxt *bp, int num_ulp_ctx) bp->edev->ulp_num_ctxs = num_ulp_ctx; } +int bnxt_get_ulp_stat_ctxs_in_use(struct bnxt *bp) +{ + if (bnxt_ulp_registered(bp->edev)) + return bp->edev->ulp_num_ctxs; + return 0; +} + void bnxt_set_dflt_ulp_stat_ctxs(struct bnxt *bp) { if (bp->edev) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index 04ce3328e66f..b86baf901a5d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -98,9 +98,11 @@ static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev) } int bnxt_get_ulp_msix_num(struct bnxt *bp); +int bnxt_get_ulp_msix_num_in_use(struct bnxt *bp); void bnxt_set_ulp_msix_num(struct bnxt *bp, int num); int bnxt_get_ulp_stat_ctxs(struct bnxt *bp); void bnxt_set_ulp_stat_ctxs(struct bnxt *bp, int num_ctxs); +int bnxt_get_ulp_stat_ctxs_in_use(struct bnxt *bp); void bnxt_set_dflt_ulp_stat_ctxs(struct bnxt *bp); void bnxt_ulp_stop(struct bnxt *bp); void bnxt_ulp_start(struct bnxt *bp, int err); -- cgit v1.2.3 From 008ce0fd39036fb077d6b24ffd146ae5ebe3bd77 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 9 Apr 2024 14:54:31 -0700 Subject: bnxt_en: Update MODULE_DESCRIPTION Update MODULE_DESCRIPTION to the more generic adapter family name. The old name only includes the first generation of supported adapters. Signed-off-by: Michael Chan Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240409215431.41424-8-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a2e21fe64ab9..d728df139c9c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -76,7 +76,7 @@ NETIF_MSG_TX_ERR) MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Broadcom BCM573xx network driver"); +MODULE_DESCRIPTION("Broadcom NetXtreme network driver"); #define BNXT_RX_OFFSET (NET_SKB_PAD + NET_IP_ALIGN) #define BNXT_RX_DMA_OFFSET NET_SKB_PAD -- cgit v1.2.3 From a68292eb431619a5f8db9d4868346837c5606424 Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Sat, 6 Apr 2024 16:23:37 +0200 Subject: net: mana: Avoid open coded arithmetic This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows [1][2]. As the "req" variable is a pointer to "struct mana_cfg_rx_steer_req_v2" and this structure ends in a flexible array: struct mana_cfg_rx_steer_req_v2 { [...] mana_handle_t indir_tab[] __counted_by(num_indir_entries); }; the preferred way in the kernel is to use the struct_size() helper to do the arithmetic instead of the calculation "size + size * count" in the kzalloc() function. Moreover, use the "offsetof" helper to get the indirect table offset instead of the "sizeof" operator and avoid the open-coded arithmetic in pointers using the new flex member. This new structure member also allow us to remove the "req_indir_tab" variable since it is no longer needed. Now, it is also possible to use the "flex_array_size" helper to compute the size of these trailing elements in the "memcpy" function. This way, the code is more readable and safer. This code was detected with the help of Coccinelle, and audited and modified manually. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1] Link: https://github.com/KSPP/linux/issues/160 [2] Signed-off-by: Erick Archer Link: https://lore.kernel.org/r/AS8PR02MB7237A21355C86EC0DCC0D83B8B022@AS8PR02MB7237.eurprd02.prod.outlook.com Reviewed-by: Justin Stitt Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/microsoft/mana/mana_en.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 59287c6e6cee..62bf3e5661a6 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1058,11 +1058,10 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, struct mana_cfg_rx_steer_req_v2 *req; struct mana_cfg_rx_steer_resp resp = {}; struct net_device *ndev = apc->ndev; - mana_handle_t *req_indir_tab; u32 req_buf_size; int err; - req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries; + req_buf_size = struct_size(req, indir_tab, num_entries); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) return -ENOMEM; @@ -1074,7 +1073,8 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, req->vport = apc->port_handle; req->num_indir_entries = num_entries; - req->indir_tab_offset = sizeof(*req); + req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, + indir_tab); req->rx_enable = rx; req->rss_enable = apc->rss_state; req->update_default_rxobj = update_default_rxobj; @@ -1086,11 +1086,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, if (update_key) memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); - if (update_tab) { - req_indir_tab = (mana_handle_t *)(req + 1); - memcpy(req_indir_tab, apc->rxobj_table, - req->num_indir_entries * sizeof(mana_handle_t)); - } + if (update_tab) + memcpy(req->indir_tab, apc->rxobj_table, + flex_array_size(req, indir_tab, req->num_indir_entries)); err = mana_send_request(apc->ac, req, req_buf_size, &resp, sizeof(resp)); -- cgit v1.2.3 From f6d827b180bda01f8805bf5e85307419b0d6f890 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 10 Apr 2024 12:05:01 -0700 Subject: net: move skb ref helpers to new header Add a new header, linux/skbuff_ref.h, which contains all the skb_*_ref() helpers. Many of the consumers of skbuff.h do not actually use any of the skb ref helpers, and we can speed up compilation a bit by minimizing this header file. Additionally in the later patch in the series we add page_pool support to skb_frag_ref(), which requires some page_pool dependencies. We can now add these dependencies to skbuff_ref.h instead of a very ubiquitous skbuff.h Signed-off-by: Mina Almasry Link: https://lore.kernel.org/r/20240410190505.1225848-2-almasrymina@google.com Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 1 + drivers/net/ethernet/marvell/sky2.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 1 + drivers/net/ethernet/sun/cassini.c | 1 + drivers/net/veth.c | 1 + drivers/net/xen-netback/netback.c | 1 + include/linux/skbuff.h | 63 ------------------ include/linux/skbuff_ref.h | 75 ++++++++++++++++++++++ net/core/gro.c | 1 + net/core/skbuff.c | 1 + net/ipv4/esp4.c | 1 + net/ipv4/tcp_output.c | 1 + net/ipv6/esp6.c | 1 + net/tls/tls_device.c | 1 + net/tls/tls_device_fallback.c | 1 + net/tls/tls_strp.c | 1 + 16 files changed, 89 insertions(+), 63 deletions(-) create mode 100644 include/linux/skbuff_ref.h (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 6482728794dd..e8e460a92e0e 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "chcr_ktls.h" static LIST_HEAD(uld_ctx_list); diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 07720841a8d7..f3f7f4cc27b3 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -34,6 +34,7 @@ #include #include #include +#include #include diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index eac49657bd07..8328df8645d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #if IS_ENABLED(CONFIG_IPV6) diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index bfb903506367..8f1f43dbb76d 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/veth.c b/drivers/net/veth.c index bcdfbf61eb66..426e68a95067 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #define DRV_NAME "veth" diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index ef76850d9bcd..48254fc07d64 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -38,6 +38,7 @@ #include #include #include +#include #include diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7135a3e94afd..4072a7ee3859 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3492,73 +3492,10 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag) return netmem_to_page(frag->netmem); } -/** - * __skb_frag_ref - take an addition reference on a paged fragment. - * @frag: the paged fragment - * - * Takes an additional reference on the paged fragment @frag. - */ -static inline void __skb_frag_ref(skb_frag_t *frag) -{ - get_page(skb_frag_page(frag)); -} - -/** - * skb_frag_ref - take an addition reference on a paged fragment of an skb. - * @skb: the buffer - * @f: the fragment offset. - * - * Takes an additional reference on the @f'th paged fragment of @skb. - */ -static inline void skb_frag_ref(struct sk_buff *skb, int f) -{ - __skb_frag_ref(&skb_shinfo(skb)->frags[f]); -} - int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, struct bpf_prog *prog); -bool napi_pp_put_page(struct page *page); - -static inline void -skb_page_unref(struct page *page, bool recycle) -{ -#ifdef CONFIG_PAGE_POOL - if (recycle && napi_pp_put_page(page)) - return; -#endif - put_page(page); -} - -/** - * __skb_frag_unref - release a reference on a paged fragment. - * @frag: the paged fragment - * @recycle: recycle the page if allocated via page_pool - * - * Releases a reference on the paged fragment @frag - * or recycles the page via the page_pool API. - */ -static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) -{ - skb_page_unref(skb_frag_page(frag), recycle); -} - -/** - * skb_frag_unref - release a reference on a paged fragment of an skb. - * @skb: the buffer - * @f: the fragment offset - * - * Releases a reference on the @f'th paged fragment of @skb. - */ -static inline void skb_frag_unref(struct sk_buff *skb, int f) -{ - struct skb_shared_info *shinfo = skb_shinfo(skb); - - if (!skb_zcopy_managed(skb)) - __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle); -} - /** * skb_frag_address - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h new file mode 100644 index 000000000000..11f0a4063403 --- /dev/null +++ b/include/linux/skbuff_ref.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Skb ref helpers. + * + */ + +#ifndef _LINUX_SKBUFF_REF_H +#define _LINUX_SKBUFF_REF_H + +#include + +/** + * __skb_frag_ref - take an addition reference on a paged fragment. + * @frag: the paged fragment + * + * Takes an additional reference on the paged fragment @frag. + */ +static inline void __skb_frag_ref(skb_frag_t *frag) +{ + get_page(skb_frag_page(frag)); +} + +/** + * skb_frag_ref - take an addition reference on a paged fragment of an skb. + * @skb: the buffer + * @f: the fragment offset. + * + * Takes an additional reference on the @f'th paged fragment of @skb. + */ +static inline void skb_frag_ref(struct sk_buff *skb, int f) +{ + __skb_frag_ref(&skb_shinfo(skb)->frags[f]); +} + +bool napi_pp_put_page(struct page *page); + +static inline void +skb_page_unref(struct page *page, bool recycle) +{ +#ifdef CONFIG_PAGE_POOL + if (recycle && napi_pp_put_page(page)) + return; +#endif + put_page(page); +} + +/** + * __skb_frag_unref - release a reference on a paged fragment. + * @frag: the paged fragment + * @recycle: recycle the page if allocated via page_pool + * + * Releases a reference on the paged fragment @frag + * or recycles the page via the page_pool API. + */ +static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) +{ + skb_page_unref(skb_frag_page(frag), recycle); +} + +/** + * skb_frag_unref - release a reference on a paged fragment of an skb. + * @skb: the buffer + * @f: the fragment offset + * + * Releases a reference on the @f'th paged fragment of @skb. + */ +static inline void skb_frag_unref(struct sk_buff *skb, int f) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (!skb_zcopy_managed(skb)) + __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle); +} + +#endif /* _LINUX_SKBUFF_REF_H */ diff --git a/net/core/gro.c b/net/core/gro.c index 83f35d99a682..2459ab697f7f 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -3,6 +3,7 @@ #include #include #include +#include #define MAX_GRO_SKBS 8 diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ab970ded8a7b..2554a6f5f386 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -51,6 +51,7 @@ #endif #include #include +#include #include #include #include diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 40330253f076..dff04580318f 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -20,6 +20,7 @@ #include #include #include +#include #include diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9282fafc0e61..61119d42b0fd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -44,6 +44,7 @@ #include #include #include +#include #include diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index fb431d0a3475..6bc0a84c8d05 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -36,6 +36,7 @@ #include #include #include +#include #include diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index bf8ed36b1ad6..ab6e694f7bc2 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "tls.h" #include "trace.h" diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 4e7228f275fa..f9e3d3d90dcf 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "tls.h" diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index ca1e0e198ceb..58c4b06f4f0c 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -2,6 +2,7 @@ /* Copyright (c) 2016 Tom Herbert */ #include +#include #include #include #include -- cgit v1.2.3 From a580ea994fd37f4105028f5a85c38ff6508a2b25 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 10 Apr 2024 12:05:02 -0700 Subject: net: mirror skb frag ref/unref helpers Refactor some of the skb frag ref/unref helpers for improved clarity. Implement napi_pp_get_page() to be the mirror counterpart of napi_pp_put_page(). Implement skb_page_ref() to be the mirror of skb_page_unref(). Improve __skb_frag_ref() to become a mirror counterpart of __skb_frag_unref(). Previously unref could handle pp & non-pp pages, while the ref could only handle non-pp pages. Now both the ref & unref helpers can correctly handle both pp & non-pp pages. Now that __skb_frag_ref() can handle both pp & non-pp pages, remove skb_pp_frag_ref(), and use __skb_frag_ref() instead. This lets us remove pp specific handling from skb_try_coalesce. Additionally, since __skb_frag_ref() can now handle both pp & non-pp pages, a latent issue in skb_shift() should now be fixed. Previously this function would do a non-pp ref & pp unref on potential pp frags (fragfrom). After this patch, skb_shift() should correctly do a pp ref/unref on pp frags. Signed-off-by: Mina Almasry Reviewed-by: Dragos Tatulea Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240410190505.1225848-3-almasrymina@google.com Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 2 +- drivers/net/ethernet/sun/cassini.c | 4 +- drivers/net/veth.c | 2 +- include/linux/skbuff_ref.h | 39 ++++++++++++++++-- net/core/skbuff.c | 46 ++-------------------- net/tls/tls_device_fallback.c | 2 +- 6 files changed, 44 insertions(+), 51 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index e8e460a92e0e..3832c2e8ea5a 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1659,7 +1659,7 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb, for (i = 0; i < record->num_frags; i++) { skb_shinfo(nskb)->frags[i] = record->frags[i]; /* increase the frag ref count */ - __skb_frag_ref(&skb_shinfo(nskb)->frags[i]); + __skb_frag_ref(&skb_shinfo(nskb)->frags[i], nskb->pp_recycle); } skb_shinfo(nskb)->nr_frags = record->num_frags; diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 8f1f43dbb76d..f058e154a3bc 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -2000,7 +2000,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, skb->len += hlen - swivel; skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel); - __skb_frag_ref(frag); + __skb_frag_ref(frag, skb->pp_recycle); /* any more data? */ if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) { @@ -2024,7 +2024,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, frag++; skb_frag_fill_page_desc(frag, page->buffer, 0, hlen); - __skb_frag_ref(frag); + __skb_frag_ref(frag, skb->pp_recycle); RX_USED_ADD(page, hlen + cp->crc_size); } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 426e68a95067..0b0293629329 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -717,7 +717,7 @@ static void veth_xdp_get(struct xdp_buff *xdp) return; for (i = 0; i < sinfo->nr_frags; i++) - __skb_frag_ref(&sinfo->frags[i]); + __skb_frag_ref(&sinfo->frags[i], false); } static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 11f0a4063403..4dcdbe9fbc5f 100644 --- a/include/linux/skbuff_ref.h +++ b/include/linux/skbuff_ref.h @@ -8,16 +8,47 @@ #define _LINUX_SKBUFF_REF_H #include +#include + +#ifdef CONFIG_PAGE_POOL +static inline bool is_pp_page(struct page *page) +{ + return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; +} + +static inline bool napi_pp_get_page(struct page *page) +{ + page = compound_head(page); + + if (!is_pp_page(page)) + return false; + + page_pool_ref_page(page); + return true; +} +#endif + +static inline void skb_page_ref(struct page *page, bool recycle) +{ +#ifdef CONFIG_PAGE_POOL + if (recycle && napi_pp_get_page(page)) + return; +#endif + get_page(page); +} /** * __skb_frag_ref - take an addition reference on a paged fragment. * @frag: the paged fragment + * @recycle: skb->pp_recycle param of the parent skb. False if no parent skb. * - * Takes an additional reference on the paged fragment @frag. + * Takes an additional reference on the paged fragment @frag. Obtains the + * correct reference count depending on whether skb->pp_recycle is set and + * whether the frag is a page pool frag. */ -static inline void __skb_frag_ref(skb_frag_t *frag) +static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle) { - get_page(skb_frag_page(frag)); + skb_page_ref(skb_frag_page(frag), recycle); } /** @@ -29,7 +60,7 @@ static inline void __skb_frag_ref(skb_frag_t *frag) */ static inline void skb_frag_ref(struct sk_buff *skb, int f) { - __skb_frag_ref(&skb_shinfo(skb)->frags[f]); + __skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle); } bool napi_pp_put_page(struct page *page); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2554a6f5f386..ea052fa710d8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -907,11 +907,6 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } -static bool is_pp_page(struct page *page) -{ - return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; -} - int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom) { @@ -1033,37 +1028,6 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data) return napi_pp_put_page(virt_to_page(data)); } -/** - * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb - * @skb: page pool aware skb - * - * Increase the fragment reference count (pp_ref_count) of a skb. This is - * intended to gain fragment references only for page pool aware skbs, - * i.e. when skb->pp_recycle is true, and not for fragments in a - * non-pp-recycling skb. It has a fallback to increase references on normal - * pages, as page pool aware skbs may also have normal page fragments. - */ -static int skb_pp_frag_ref(struct sk_buff *skb) -{ - struct skb_shared_info *shinfo; - struct page *head_page; - int i; - - if (!skb->pp_recycle) - return -EINVAL; - - shinfo = skb_shinfo(skb); - - for (i = 0; i < shinfo->nr_frags; i++) { - head_page = compound_head(skb_frag_page(&shinfo->frags[i])); - if (likely(is_pp_page(head_page))) - page_pool_ref_page(head_page); - else - page_ref_inc(head_page); - } - return 0; -} - static void skb_kfree_head(void *head, unsigned int end_offset) { if (end_offset == SKB_SMALL_HEAD_HEADROOM) @@ -4176,7 +4140,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) to++; } else { - __skb_frag_ref(fragfrom); + __skb_frag_ref(fragfrom, skb->pp_recycle); skb_frag_page_copy(fragto, fragfrom); skb_frag_off_copy(fragto, fragfrom); skb_frag_size_set(fragto, todo); @@ -4826,7 +4790,7 @@ normal: } *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag; - __skb_frag_ref(nskb_frag); + __skb_frag_ref(nskb_frag, nskb->pp_recycle); size = skb_frag_size(nskb_frag); if (pos < offset) { @@ -5957,10 +5921,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, /* if the skb is not cloned this does nothing * since we set nr_frags to 0. */ - if (skb_pp_frag_ref(from)) { - for (i = 0; i < from_shinfo->nr_frags; i++) - __skb_frag_ref(&from_shinfo->frags[i]); - } + for (i = 0; i < from_shinfo->nr_frags; i++) + __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle); to->truesize += delta; to->len += len; diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index f9e3d3d90dcf..9237dded4467 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -278,7 +278,7 @@ static int fill_sg_in(struct scatterlist *sg_in, for (i = 0; remaining > 0; i++) { skb_frag_t *frag = &record->frags[i]; - __skb_frag_ref(frag); + __skb_frag_ref(frag, false); sg_set_page(sg_in + i, skb_frag_page(frag), skb_frag_size(frag), skb_frag_off(frag)); -- cgit v1.2.3 From 919b38a916b4c616f1ead7d551de74bfcbe13e3c Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 11 Apr 2024 00:41:54 +0300 Subject: net/mlx5e: Expose the VF/SF RX drop counter on the representor Q counters are device-level counters that track specific events, among which are out_of_buffer events. These events occur when packets are dropped due to a lack of receive buffer in the RX queue. Expose the total number of out_of_buffer events on the VFs/SFs to their respective representor, using the "ip stats group link" under the name of "rx_missed". The "rx_missed" equals the sum of all Q counters out_of_buffer values allocated on the VFs/SFs. Signed-off-by: Carolina Jubran Reviewed-by: Gal Pressman Reviewed-by: Aya Levin Reviewed-by: Rahul Rameshbabu Signed-off-by: Tariq Toukan Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240410214154.250583-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 42 +++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 2 ++ 2 files changed, 43 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index a74ee698671c..6acecf2e7cf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -273,6 +273,40 @@ out: kvfree(out); } +static int mlx5e_rep_query_aggr_q_counter(struct mlx5_core_dev *dev, int vport, void *out) +{ + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + MLX5_SET(query_q_counter_in, in, other_vport, 1); + MLX5_SET(query_q_counter_in, in, vport_number, vport); + MLX5_SET(query_q_counter_in, in, aggregate, 1); + + return mlx5_cmd_exec_inout(dev, query_q_counter, in, out); +} + +static void mlx5e_rep_update_vport_q_counter(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_stats *rep_stats = &priv->stats.rep_stats; + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, q_counter_other_vport) || + !MLX5_CAP_GEN(priv->mdev, q_counter_aggregation)) + return; + + err = mlx5e_rep_query_aggr_q_counter(priv->mdev, rep->vport, out); + if (err) { + netdev_warn(priv->netdev, "failed reading stats on vport %d, error %d\n", + rep->vport, err); + return; + } + + rep_stats->rx_vport_out_of_buffer = MLX5_GET(query_q_counter_out, out, out_of_buffer); +} + static void mlx5e_rep_get_strings(struct net_device *dev, u32 stringset, u8 *data) { @@ -1229,6 +1263,12 @@ static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) return 0; } +static void mlx5e_rep_stats_update_ndo_stats(struct mlx5e_priv *priv) +{ + mlx5e_stats_update_ndo_stats(priv); + mlx5e_rep_update_vport_q_counter(priv); +} + static int mlx5e_rep_event_mpesw(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1421,7 +1461,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .enable = mlx5e_rep_enable, .disable = mlx5e_rep_disable, .update_rx = mlx5e_update_rep_rx, - .update_stats = mlx5e_stats_update_ndo_stats, + .update_stats = mlx5e_rep_stats_update_ndo_stats, .rx_handlers = &mlx5e_rx_handlers_rep, .max_tc = 1, .stats_grps = mlx5e_rep_stats_grps, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 9cee4c9472e9..650732288616 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -484,6 +484,7 @@ struct mlx5e_rep_stats { u64 tx_vport_rdma_multicast_bytes; u64 vport_loopback_packets; u64 vport_loopback_bytes; + u64 rx_vport_out_of_buffer; }; struct mlx5e_stats { @@ -504,6 +505,7 @@ static inline void mlx5e_stats_copy_rep_stats(struct rtnl_link_stats64 *vf_vport vf_vport->tx_packets = rep_stats->vport_tx_packets; vf_vport->rx_bytes = rep_stats->vport_rx_bytes; vf_vport->tx_bytes = rep_stats->vport_tx_bytes; + vf_vport->rx_missed_errors = rep_stats->rx_vport_out_of_buffer; } extern mlx5e_stats_grp_t mlx5e_nic_stats_grps[]; -- cgit v1.2.3 From 8910f93b95706b4b11485bb0f318f88918a46b04 Mon Sep 17 00:00:00 2001 From: Fei Qin Date: Wed, 10 Apr 2024 13:26:36 +0200 Subject: nfp: update devlink device info output Newer NIC will introduce a new part number, now add it into devlink device info. This patch also updates the information of "board.id" in nfp.rst to match the devlink-info.rst. Signed-off-by: Fei Qin Signed-off-by: Louis Peens Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- Documentation/networking/devlink/nfp.rst | 5 ++++- drivers/net/ethernet/netronome/nfp/nfp_devlink.c | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/Documentation/networking/devlink/nfp.rst b/Documentation/networking/devlink/nfp.rst index a1717db0dfcc..3093642bdae4 100644 --- a/Documentation/networking/devlink/nfp.rst +++ b/Documentation/networking/devlink/nfp.rst @@ -32,7 +32,7 @@ The ``nfp`` driver reports the following versions - Description * - ``board.id`` - fixed - - Part number identifying the board design + - Identifier of the board design * - ``board.rev`` - fixed - Revision of the board design @@ -42,6 +42,9 @@ The ``nfp`` driver reports the following versions * - ``board.model`` - fixed - Model name of the board design + * - ``board.part_number`` + - fixed + - Part number of the board and its components * - ``fw.bundle_id`` - stored, running - Firmware bundle id diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 635d33c0d6d3..ea75b9a06313 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -160,6 +160,7 @@ static const struct nfp_devlink_versions_simple { { DEVLINK_INFO_VERSION_GENERIC_BOARD_REV, "assembly.revision", }, { DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE, "assembly.vendor", }, { "board.model", /* code name */ "assembly.model", }, + { DEVLINK_INFO_VERSION_GENERIC_BOARD_PART_NUMBER, "pn", }, }; static int -- cgit v1.2.3 From 0a66e976430402d0064687853fddc33088faa98a Mon Sep 17 00:00:00 2001 From: Lukasz Plachno Date: Wed, 3 Apr 2024 12:24:01 +0200 Subject: ice: Remove unnecessary argument from ice_fdir_comp_rules() Passing v6 argument is unnecessary as flow_type is still analyzed inside the function. Reviewed-by: Przemek Kitszel Signed-off-by: Lukasz Plachno Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_fdir.c | 85 ++++++++++++++----------------- 1 file changed, 39 insertions(+), 46 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c index 5840c3e04a5b..1f7b26f38818 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_fdir.c @@ -1189,52 +1189,54 @@ static int ice_cmp_ipv6_addr(__be32 *a, __be32 *b) * ice_fdir_comp_rules - compare 2 filters * @a: a Flow Director filter data structure * @b: a Flow Director filter data structure - * @v6: bool true if v6 filter * * Returns true if the filters match */ static bool -ice_fdir_comp_rules(struct ice_fdir_fltr *a, struct ice_fdir_fltr *b, bool v6) +ice_fdir_comp_rules(struct ice_fdir_fltr *a, struct ice_fdir_fltr *b) { enum ice_fltr_ptype flow_type = a->flow_type; /* The calling function already checks that the two filters have the * same flow_type. */ - if (!v6) { - if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP || - flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP || - flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP) { - if (a->ip.v4.dst_ip == b->ip.v4.dst_ip && - a->ip.v4.src_ip == b->ip.v4.src_ip && - a->ip.v4.dst_port == b->ip.v4.dst_port && - a->ip.v4.src_port == b->ip.v4.src_port) - return true; - } else if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) { - if (a->ip.v4.dst_ip == b->ip.v4.dst_ip && - a->ip.v4.src_ip == b->ip.v4.src_ip && - a->ip.v4.l4_header == b->ip.v4.l4_header && - a->ip.v4.proto == b->ip.v4.proto && - a->ip.v4.ip_ver == b->ip.v4.ip_ver && - a->ip.v4.tos == b->ip.v4.tos) - return true; - } - } else { - if (flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP || - flow_type == ICE_FLTR_PTYPE_NONF_IPV6_TCP || - flow_type == ICE_FLTR_PTYPE_NONF_IPV6_SCTP) { - if (a->ip.v6.dst_port == b->ip.v6.dst_port && - a->ip.v6.src_port == b->ip.v6.src_port && - !ice_cmp_ipv6_addr(a->ip.v6.dst_ip, - b->ip.v6.dst_ip) && - !ice_cmp_ipv6_addr(a->ip.v6.src_ip, - b->ip.v6.src_ip)) - return true; - } else if (flow_type == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) { - if (a->ip.v6.dst_port == b->ip.v6.dst_port && - a->ip.v6.src_port == b->ip.v6.src_port) - return true; - } + switch (flow_type) { + case ICE_FLTR_PTYPE_NONF_IPV4_TCP: + case ICE_FLTR_PTYPE_NONF_IPV4_UDP: + case ICE_FLTR_PTYPE_NONF_IPV4_SCTP: + if (a->ip.v4.dst_ip == b->ip.v4.dst_ip && + a->ip.v4.src_ip == b->ip.v4.src_ip && + a->ip.v4.dst_port == b->ip.v4.dst_port && + a->ip.v4.src_port == b->ip.v4.src_port) + return true; + break; + case ICE_FLTR_PTYPE_NONF_IPV4_OTHER: + if (a->ip.v4.dst_ip == b->ip.v4.dst_ip && + a->ip.v4.src_ip == b->ip.v4.src_ip && + a->ip.v4.l4_header == b->ip.v4.l4_header && + a->ip.v4.proto == b->ip.v4.proto && + a->ip.v4.ip_ver == b->ip.v4.ip_ver && + a->ip.v4.tos == b->ip.v4.tos) + return true; + break; + case ICE_FLTR_PTYPE_NONF_IPV6_UDP: + case ICE_FLTR_PTYPE_NONF_IPV6_TCP: + case ICE_FLTR_PTYPE_NONF_IPV6_SCTP: + if (a->ip.v6.dst_port == b->ip.v6.dst_port && + a->ip.v6.src_port == b->ip.v6.src_port && + !ice_cmp_ipv6_addr(a->ip.v6.dst_ip, + b->ip.v6.dst_ip) && + !ice_cmp_ipv6_addr(a->ip.v6.src_ip, + b->ip.v6.src_ip)) + return true; + break; + case ICE_FLTR_PTYPE_NONF_IPV6_OTHER: + if (a->ip.v6.dst_port == b->ip.v6.dst_port && + a->ip.v6.src_port == b->ip.v6.src_port) + return true; + break; + default: + break; } return false; @@ -1253,19 +1255,10 @@ bool ice_fdir_is_dup_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input) bool ret = false; list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) { - enum ice_fltr_ptype flow_type; - if (rule->flow_type != input->flow_type) continue; - flow_type = input->flow_type; - if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP || - flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP || - flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP || - flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) - ret = ice_fdir_comp_rules(rule, input, false); - else - ret = ice_fdir_comp_rules(rule, input, true); + ret = ice_fdir_comp_rules(rule, input); if (ret) { if (rule->fltr_id == input->fltr_id && rule->q_index != input->q_index) -- cgit v1.2.3 From ae67389c53928418975b46d13b51fde4e40ee15f Mon Sep 17 00:00:00 2001 From: Jakub Buchocki Date: Wed, 3 Apr 2024 12:24:02 +0200 Subject: ice: Implement 'flow-type ether' rules Add support for 'flow-type ether' Flow Director rules via ethtool. Create packet segment info for filter configuration based on ethtool command parameters. Reuse infrastructure already created for ipv4 and ipv6 flows to convert packet segment into extraction sequence, which is later used to program the filter inside Flow Director block of the Rx pipeline. Rules not containing masks are processed by the Flow Director, and support the following set of input parameters in all combinations: src, dst, proto, vlan-etype, vlan, action. It is possible to specify address mask in ethtool parameters but only 00:00:00:00:00 and FF:FF:FF:FF:FF are valid. The same applies to proto, vlan-etype and vlan masks: only 0x0000 and 0xffff masks are valid. Testing: (DUT) iperf3 -s (DUT) ethtool -U ens785f0np0 flow-type ether dst \ action 10 (DUT) watch 'ethtool -S ens785f0np0 | grep rx_queue' (LP) iperf3 -c ${DUT_IP} Counters increase only for: 'rx_queue_10_packets' 'rx_queue_10_bytes' Signed-off-by: Jakub Buchocki Co-developed-by: Mateusz Pacuszka Signed-off-by: Mateusz Pacuszka Reviewed-by: Przemek Kitszel Signed-off-by: Lukasz Plachno Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 140 +++++++++++++++++++++- drivers/net/ethernet/intel/ice/ice_fdir.c | 26 ++++ drivers/net/ethernet/intel/ice/ice_fdir.h | 5 + drivers/net/ethernet/intel/ice/ice_type.h | 1 + 4 files changed, 171 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 9a1a04f5f146..e3cab8e98f52 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -41,6 +41,8 @@ static struct in6_addr zero_ipv6_addr_mask = { static int ice_fltr_to_ethtool_flow(enum ice_fltr_ptype flow) { switch (flow) { + case ICE_FLTR_PTYPE_NONF_ETH: + return ETHER_FLOW; case ICE_FLTR_PTYPE_NONF_IPV4_TCP: return TCP_V4_FLOW; case ICE_FLTR_PTYPE_NONF_IPV4_UDP: @@ -72,6 +74,8 @@ static int ice_fltr_to_ethtool_flow(enum ice_fltr_ptype flow) static enum ice_fltr_ptype ice_ethtool_flow_to_fltr(int eth) { switch (eth) { + case ETHER_FLOW: + return ICE_FLTR_PTYPE_NONF_ETH; case TCP_V4_FLOW: return ICE_FLTR_PTYPE_NONF_IPV4_TCP; case UDP_V4_FLOW: @@ -137,6 +141,10 @@ int ice_get_ethtool_fdir_entry(struct ice_hw *hw, struct ethtool_rxnfc *cmd) memset(&fsp->m_ext, 0, sizeof(fsp->m_ext)); switch (fsp->flow_type) { + case ETHER_FLOW: + fsp->h_u.ether_spec = rule->eth; + fsp->m_u.ether_spec = rule->eth_mask; + break; case IPV4_USER_FLOW: fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4; fsp->h_u.usr_ip4_spec.proto = 0; @@ -1193,6 +1201,122 @@ ice_set_fdir_ip6_usr_seg(struct ice_flow_seg_info *seg, return 0; } +/** + * ice_fdir_vlan_valid - validate VLAN data for Flow Director rule + * @dev: network interface device structure + * @fsp: pointer to ethtool Rx flow specification + * + * Return: true if vlan data is valid, false otherwise + */ +static bool ice_fdir_vlan_valid(struct device *dev, + struct ethtool_rx_flow_spec *fsp) +{ + if (fsp->m_ext.vlan_etype && !eth_type_vlan(fsp->h_ext.vlan_etype)) + return false; + + if (fsp->m_ext.vlan_tci && ntohs(fsp->h_ext.vlan_tci) >= VLAN_N_VID) + return false; + + /* proto and vlan must have vlan-etype defined */ + if (fsp->m_u.ether_spec.h_proto && fsp->m_ext.vlan_tci && + !fsp->m_ext.vlan_etype) { + dev_warn(dev, "Filter with proto and vlan require also vlan-etype"); + return false; + } + + return true; +} + +/** + * ice_set_ether_flow_seg - set address and protocol segments for ether flow + * @dev: network interface device structure + * @seg: flow segment for programming + * @eth_spec: mask data from ethtool + * + * Return: 0 on success and errno in case of error. + */ +static int ice_set_ether_flow_seg(struct device *dev, + struct ice_flow_seg_info *seg, + struct ethhdr *eth_spec) +{ + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ETH); + + /* empty rules are not valid */ + if (is_zero_ether_addr(eth_spec->h_source) && + is_zero_ether_addr(eth_spec->h_dest) && + !eth_spec->h_proto) + return -EINVAL; + + /* Ethertype */ + if (eth_spec->h_proto == htons(0xFFFF)) { + ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_ETH_TYPE, + ICE_FLOW_FLD_OFF_INVAL, + ICE_FLOW_FLD_OFF_INVAL, + ICE_FLOW_FLD_OFF_INVAL, false); + } else if (eth_spec->h_proto) { + dev_warn(dev, "Only 0x0000 or 0xffff proto mask is allowed for flow-type ether"); + return -EOPNOTSUPP; + } + + /* Source MAC address */ + if (is_broadcast_ether_addr(eth_spec->h_source)) + ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_ETH_SA, + ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, + ICE_FLOW_FLD_OFF_INVAL, false); + else if (!is_zero_ether_addr(eth_spec->h_source)) + goto err_mask; + + /* Destination MAC address */ + if (is_broadcast_ether_addr(eth_spec->h_dest)) + ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_ETH_DA, + ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, + ICE_FLOW_FLD_OFF_INVAL, false); + else if (!is_zero_ether_addr(eth_spec->h_dest)) + goto err_mask; + + return 0; + +err_mask: + dev_warn(dev, "Only 00:00:00:00:00:00 or ff:ff:ff:ff:ff:ff MAC address mask is allowed for flow-type ether"); + return -EOPNOTSUPP; +} + +/** + * ice_set_fdir_vlan_seg - set vlan segments for ether flow + * @seg: flow segment for programming + * @ext_masks: masks for additional RX flow fields + * + * Return: 0 on success and errno in case of error. + */ +static int +ice_set_fdir_vlan_seg(struct ice_flow_seg_info *seg, + struct ethtool_flow_ext *ext_masks) +{ + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_VLAN); + + if (ext_masks->vlan_etype) { + if (ext_masks->vlan_etype != htons(0xFFFF)) + return -EOPNOTSUPP; + + ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_S_VLAN, + ICE_FLOW_FLD_OFF_INVAL, + ICE_FLOW_FLD_OFF_INVAL, + ICE_FLOW_FLD_OFF_INVAL, false); + } + + if (ext_masks->vlan_tci) { + if (ext_masks->vlan_tci != htons(0xFFFF)) + return -EOPNOTSUPP; + + ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_C_VLAN, + ICE_FLOW_FLD_OFF_INVAL, + ICE_FLOW_FLD_OFF_INVAL, + ICE_FLOW_FLD_OFF_INVAL, false); + } + + return 0; +} + /** * ice_cfg_fdir_xtrct_seq - Configure extraction sequence for the given filter * @pf: PF structure @@ -1209,7 +1333,7 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, struct device *dev = ice_pf_to_dev(pf); enum ice_fltr_ptype fltr_idx; struct ice_hw *hw = &pf->hw; - bool perfect_filter; + bool perfect_filter = false; int ret; seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL); @@ -1262,6 +1386,16 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, ret = ice_set_fdir_ip6_usr_seg(seg, &fsp->m_u.usr_ip6_spec, &perfect_filter); break; + case ETHER_FLOW: + ret = ice_set_ether_flow_seg(dev, seg, &fsp->m_u.ether_spec); + if (!ret && (fsp->m_ext.vlan_etype || fsp->m_ext.vlan_tci)) { + if (!ice_fdir_vlan_valid(dev, fsp)) { + ret = -EINVAL; + break; + } + ret = ice_set_fdir_vlan_seg(seg, &fsp->m_ext); + } + break; default: ret = -EINVAL; } @@ -1823,6 +1957,10 @@ ice_set_fdir_input_set(struct ice_vsi *vsi, struct ethtool_rx_flow_spec *fsp, input->mask.v6.tc = fsp->m_u.usr_ip6_spec.tclass; input->mask.v6.proto = fsp->m_u.usr_ip6_spec.l4_proto; break; + case ETHER_FLOW: + input->eth = fsp->h_u.ether_spec; + input->eth_mask = fsp->m_u.ether_spec; + break; default: /* not doing un-parsed flow types */ return -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c index 1f7b26f38818..26b357c0ae15 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_fdir.c @@ -4,6 +4,8 @@ #include "ice_common.h" /* These are training packet headers used to program flow director filters. */ +static const u8 ice_fdir_eth_pkt[22]; + static const u8 ice_fdir_tcpv4_pkt[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, @@ -416,6 +418,11 @@ static const u8 ice_fdir_ip6_tun_pkt[] = { /* Flow Director no-op training packet table */ static const struct ice_fdir_base_pkt ice_fdir_pkt[] = { + { + ICE_FLTR_PTYPE_NONF_ETH, + sizeof(ice_fdir_eth_pkt), ice_fdir_eth_pkt, + sizeof(ice_fdir_eth_pkt), ice_fdir_eth_pkt, + }, { ICE_FLTR_PTYPE_NONF_IPV4_TCP, sizeof(ice_fdir_tcpv4_pkt), ice_fdir_tcpv4_pkt, @@ -914,6 +921,21 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, * perspective. The input from user is from Rx filter perspective. */ switch (flow) { + case ICE_FLTR_PTYPE_NONF_ETH: + ice_pkt_insert_mac_addr(loc, input->eth.h_dest); + ice_pkt_insert_mac_addr(loc + ETH_ALEN, input->eth.h_source); + if (input->ext_data.vlan_tag || input->ext_data.vlan_type) { + ice_pkt_insert_u16(loc, ICE_ETH_TYPE_F_OFFSET, + input->ext_data.vlan_type); + ice_pkt_insert_u16(loc, ICE_ETH_VLAN_TCI_OFFSET, + input->ext_data.vlan_tag); + ice_pkt_insert_u16(loc, ICE_ETH_TYPE_VLAN_OFFSET, + input->eth.h_proto); + } else { + ice_pkt_insert_u16(loc, ICE_ETH_TYPE_F_OFFSET, + input->eth.h_proto); + } + break; case ICE_FLTR_PTYPE_NONF_IPV4_TCP: ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET, input->ip.v4.src_ip); @@ -1201,6 +1223,10 @@ ice_fdir_comp_rules(struct ice_fdir_fltr *a, struct ice_fdir_fltr *b) * same flow_type. */ switch (flow_type) { + case ICE_FLTR_PTYPE_NONF_ETH: + if (!memcmp(&a->eth, &b->eth, sizeof(a->eth))) + return true; + break; case ICE_FLTR_PTYPE_NONF_IPV4_TCP: case ICE_FLTR_PTYPE_NONF_IPV4_UDP: case ICE_FLTR_PTYPE_NONF_IPV4_SCTP: diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h index 1b9b84490689..021ecbac7848 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.h +++ b/drivers/net/ethernet/intel/ice/ice_fdir.h @@ -8,6 +8,9 @@ #define ICE_FDIR_MAX_RAW_PKT_SIZE (512 + ICE_FDIR_TUN_PKT_OFF) /* macros for offsets into packets for flow director programming */ +#define ICE_ETH_TYPE_F_OFFSET 12 +#define ICE_ETH_VLAN_TCI_OFFSET 14 +#define ICE_ETH_TYPE_VLAN_OFFSET 16 #define ICE_IPV4_SRC_ADDR_OFFSET 26 #define ICE_IPV4_DST_ADDR_OFFSET 30 #define ICE_IPV4_TCP_SRC_PORT_OFFSET 34 @@ -159,6 +162,8 @@ struct ice_fdir_fltr { struct list_head fltr_node; enum ice_fltr_ptype flow_type; + struct ethhdr eth, eth_mask; + union { struct ice_fdir_v4 v4; struct ice_fdir_v6 v6; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 08ec5efdafe6..4049ae40c4fb 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -203,6 +203,7 @@ struct ice_phy_info { enum ice_fltr_ptype { /* NONE - used for undef/error */ ICE_FLTR_PTYPE_NONF_NONE = 0, + ICE_FLTR_PTYPE_NONF_ETH, ICE_FLTR_PTYPE_NONF_IPV4_UDP, ICE_FLTR_PTYPE_NONF_IPV4_TCP, ICE_FLTR_PTYPE_NONF_IPV4_SCTP, -- cgit v1.2.3 From c22f7dacb8202779e60f45976443d979a749ab13 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 22 Mar 2024 14:44:44 -0700 Subject: ice: set vf->num_msix in ice_initialize_vf_entry() Commit fe1c5ca2fe76 ("ice: implement num_msix field per VF") updated the driver to allow for per-VF MSI-X configuration. The initial defaults were set in ice_create_vf_entries(). This logic is better placed in ice_initialize_vf_entry(). Indeed, that function already sets vf->num_vf_qs, as well as initializes the allow list via calling ice_vc_set_default_allowlist(). Move this logic into ice_initialize_vf_entry(). This makes the code clear, and ensures that these VF fields will be initialized properly for both SR-IOV VFs and the upcoming Scalable IOV VFs. Signed-off-by: Jacob Keller Reviewed-by: Jesse Brandeburg Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 5 ----- drivers/net/ethernet/intel/ice/ice_vf_lib.c | 5 ++++- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 65e1986af777..5e9521876617 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -831,11 +831,6 @@ static int ice_create_vf_entries(struct ice_pf *pf, u16 num_vfs) pci_dev_get(vfdev); - /* set default number of MSI-X */ - vf->num_msix = pf->vfs.num_msix_per; - vf->num_vf_qs = pf->vfs.num_qps_per; - ice_vc_set_default_allowlist(vf); - hash_add_rcu(vfs->table, &vf->entry, vf_id); } diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 21d26e19338a..c51e2482cad2 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -992,10 +992,13 @@ void ice_initialize_vf_entry(struct ice_vf *vf) /* assign default capabilities */ vf->spoofchk = true; - vf->num_vf_qs = vfs->num_qps_per; ice_vc_set_default_allowlist(vf); ice_virtchnl_set_dflt_ops(vf); + /* set default number of MSI-X */ + vf->num_msix = vfs->num_msix_per; + vf->num_vf_qs = vfs->num_qps_per; + /* ctrl_vsi_idx will be set to a valid value only when iAVF * creates its first fdir rule. */ -- cgit v1.2.3 From b80d01ef9aba6c0824644bbde49b1f4e2955e5d1 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 22 Mar 2024 14:44:45 -0700 Subject: ice: store VF relative MSI-X index in q_vector->vf_reg_idx The ice physical function driver needs to configure the association of queues and interrupts on behalf of its virtual functions. This is done over virtchnl by the VF sending messages during its initialization phase. These messages contain a vector_id which the VF wants to associate with a given queue. This ID is relative to the VF space, where 0 indicates the control IRQ for non-queue interrupts. When programming the mapping, the PF driver currently passes this vector_id directly to the low level functions for programming. This works for SR-IOV, because the hardware uses the VF-based indexing for interrupts. This won't work for Scalable IOV, which uses PF-based indexing for programming its VSIs. To handle this, the driver needs to be able to look up the proper index to use for programming. For typical IRQs, this would be the q_vector->reg_idx field. The q_vector->reg_idx can't be set to a VF relative value, because it is used when the PF needs to control the interrupt, such as when triggering a software interrupt on stopping the Tx queue. Thus, introduce a new q_vector->vf_reg_idx which can store the VF relative index for registers which expect this. Use this in ice_cfg_interrupt to look up the VF index from the q_vector. This allows removing the vector ID parameter of ice_cfg_interrupt. Also notice that this function returns an int, but then is cast to the virtchnl error enumeration, virtchnl_status_code. Update the return type to indicate it does not return an integer error code. We can't use normal error codes here because the return values are passed across the virtchnl interface. This will allow the future Scalable IOV VFs to correctly look up the index needed for programming the VF queues without breaking SR-IOV. Signed-off-by: Jacob Keller Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 3 ++- drivers/net/ethernet/intel/ice/ice_base.c | 3 ++- drivers/net/ethernet/intel/ice/ice_sriov.c | 7 ++++--- drivers/net/ethernet/intel/ice/ice_sriov.h | 5 ++--- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 14 +++++++------- 5 files changed, 17 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index a7e88d797d4c..67a3236ab1fc 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -459,7 +459,7 @@ struct ice_q_vector { struct ice_vsi *vsi; u16 v_idx; /* index in the vsi->q_vector array. */ - u16 reg_idx; + u16 reg_idx; /* PF relative register index */ u8 num_ring_rx; /* total number of Rx rings in vector */ u8 num_ring_tx; /* total number of Tx rings in vector */ u8 wb_on_itr:1; /* if true, WB on ITR is enabled */ @@ -481,6 +481,7 @@ struct ice_q_vector { char name[ICE_INT_NAME_STR_LEN]; u16 total_events; /* net_dim(): number of interrupts processed */ + u16 vf_reg_idx; /* VF relative register index */ struct msi_map irq; } ____cacheline_internodealigned_in_smp; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index af0384b37119..687f6cb2b917 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -121,7 +121,7 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) q_vector->irq.index = -ENOENT; if (vsi->type == ICE_VSI_VF) { - q_vector->reg_idx = ice_calc_vf_reg_idx(vsi->vf, q_vector); + ice_calc_vf_reg_idx(vsi->vf, q_vector); goto out; } else if (vsi->type == ICE_VSI_CTRL && vsi->vf) { struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi); @@ -145,6 +145,7 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) skip_alloc: q_vector->reg_idx = q_vector->irq.index; + q_vector->vf_reg_idx = q_vector->irq.index; /* only set affinity_mask if the CPU is online */ if (cpu_online(v_idx)) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 5e9521876617..fb2e96db647e 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -360,13 +360,14 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) * @vf: VF to calculate the register index for * @q_vector: a q_vector associated to the VF */ -int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) +void ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) { if (!vf || !q_vector) - return -EINVAL; + return; /* always add one to account for the OICR being the first MSIX */ - return vf->first_vector_idx + q_vector->v_idx + 1; + q_vector->vf_reg_idx = q_vector->v_idx + ICE_NONQ_VECS_VF; + q_vector->reg_idx = vf->first_vector_idx + q_vector->vf_reg_idx; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h index 8488df38b586..4ba8fb53aea1 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.h +++ b/drivers/net/ethernet/intel/ice/ice_sriov.h @@ -49,7 +49,7 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state); int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena); -int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector); +void ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector); int ice_get_vf_stats(struct net_device *netdev, int vf_id, @@ -130,11 +130,10 @@ ice_set_vf_bw(struct net_device __always_unused *netdev, return -EOPNOTSUPP; } -static inline int +static inline void ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf, struct ice_q_vector __always_unused *q_vector) { - return 0; } static inline int diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 1ff9818b4c84..1c6ce0c4ed4e 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -1505,13 +1505,12 @@ error_param: * ice_cfg_interrupt * @vf: pointer to the VF info * @vsi: the VSI being configured - * @vector_id: vector ID * @map: vector map for mapping vectors to queues * @q_vector: structure for interrupt vector * configure the IRQ to queue map */ -static int -ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id, +static enum virtchnl_status_code +ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, struct virtchnl_vector_map *map, struct ice_q_vector *q_vector) { @@ -1531,7 +1530,8 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id, q_vector->num_ring_rx++; q_vector->rx.itr_idx = map->rxitr_idx; vsi->rx_rings[vsi_q_id]->q_vector = q_vector; - ice_cfg_rxq_interrupt(vsi, vsi_q_id, vector_id, + ice_cfg_rxq_interrupt(vsi, vsi_q_id, + q_vector->vf_reg_idx, q_vector->rx.itr_idx); } @@ -1545,7 +1545,8 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id, q_vector->num_ring_tx++; q_vector->tx.itr_idx = map->txitr_idx; vsi->tx_rings[vsi_q_id]->q_vector = q_vector; - ice_cfg_txq_interrupt(vsi, vsi_q_id, vector_id, + ice_cfg_txq_interrupt(vsi, vsi_q_id, + q_vector->vf_reg_idx, q_vector->tx.itr_idx); } @@ -1619,8 +1620,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) } /* lookout for the invalid queue index */ - v_ret = (enum virtchnl_status_code) - ice_cfg_interrupt(vf, vsi, vector_id, map, q_vector); + v_ret = ice_cfg_interrupt(vf, vsi, map, q_vector); if (v_ret) goto error_param; } -- cgit v1.2.3 From 2a1a1a7b5fd778ccf22ee530fd4dec7f7d597056 Mon Sep 17 00:00:00 2001 From: Hao Lan Date: Wed, 10 Apr 2024 20:53:51 +0800 Subject: net: hns3: add command queue trace for hns3 Add support to dump command queue trace for hns3. Signed-off-by: Hao Lan Signed-off-by: Jijie Shao Link: https://lore.kernel.org/r/20240410125354.2177067-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../hisilicon/hns3/hns3_common/hclge_comm_cmd.c | 18 +++++ .../hisilicon/hns3/hns3_common/hclge_comm_cmd.h | 14 +++- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 45 +++++++++++ .../ethernet/hisilicon/hns3/hns3pf/hclge_trace.h | 94 ++++++++++++++++++++++ .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 40 +++++++++ .../ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h | 50 ++++++++++++ 6 files changed, 260 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c index 652d71326231..11df6fbd641d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c @@ -470,10 +470,14 @@ static int hclge_comm_cmd_check_result(struct hclge_comm_hw *hw, int hclge_comm_cmd_send(struct hclge_comm_hw *hw, struct hclge_desc *desc, int num) { + bool is_special = hclge_comm_is_special_opcode(le16_to_cpu(desc->opcode)); struct hclge_comm_cmq_ring *csq = &hw->cmq.csq; int ret; int ntc; + if (hw->cmq.ops.trace_cmd_send) + hw->cmq.ops.trace_cmd_send(hw, desc, num, is_special); + spin_lock_bh(&hw->cmq.csq.lock); if (test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hw->comm_state)) { @@ -507,6 +511,9 @@ int hclge_comm_cmd_send(struct hclge_comm_hw *hw, struct hclge_desc *desc, spin_unlock_bh(&hw->cmq.csq.lock); + if (hw->cmq.ops.trace_cmd_get) + hw->cmq.ops.trace_cmd_get(hw, desc, num, is_special); + return ret; } @@ -584,6 +591,17 @@ err_csq: return ret; } +void hclge_comm_cmd_init_ops(struct hclge_comm_hw *hw, + const struct hclge_comm_cmq_ops *ops) +{ + struct hclge_comm_cmq *cmdq = &hw->cmq; + + if (ops) { + cmdq->ops.trace_cmd_send = ops->trace_cmd_send; + cmdq->ops.trace_cmd_get = ops->trace_cmd_get; + } +} + int hclge_comm_cmd_init(struct hnae3_ae_dev *ae_dev, struct hclge_comm_hw *hw, u32 *fw_version, bool is_pf, unsigned long reset_pending) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h index 552396518e08..e6a087576df6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h @@ -423,11 +423,22 @@ enum hclge_comm_cmd_status { HCLGE_COMM_ERR_CSQ_ERROR = -3, }; +struct hclge_comm_hw; +struct hclge_comm_cmq_ops { + void (*trace_cmd_send)(struct hclge_comm_hw *hw, + struct hclge_desc *desc, + int num, bool is_special); + void (*trace_cmd_get)(struct hclge_comm_hw *hw, + struct hclge_desc *desc, + int num, bool is_special); +}; + struct hclge_comm_cmq { struct hclge_comm_cmq_ring csq; struct hclge_comm_cmq_ring crq; u16 tx_timeout; enum hclge_comm_cmd_status last_status; + struct hclge_comm_cmq_ops ops; }; struct hclge_comm_hw { @@ -474,5 +485,6 @@ int hclge_comm_cmd_queue_init(struct pci_dev *pdev, struct hclge_comm_hw *hw); int hclge_comm_cmd_init(struct hnae3_ae_dev *ae_dev, struct hclge_comm_hw *hw, u32 *fw_version, bool is_pf, unsigned long reset_pending); - +void hclge_comm_cmd_init_ops(struct hclge_comm_hw *hw, + const struct hclge_comm_cmq_ops *ops); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index ff6a2ed23ddb..eb31a7e9c8fc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -27,6 +27,8 @@ #include "hclge_devlink.h" #include "hclge_comm_cmd.h" +#include "hclge_trace.h" + #define HCLGE_NAME "hclge" #define HCLGE_BUF_SIZE_UNIT 256U @@ -391,6 +393,48 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) return hclge_comm_cmd_send(&hw->hw, desc, num); } +static void hclge_trace_cmd_send(struct hclge_comm_hw *hw, struct hclge_desc *desc, + int num, bool is_special) +{ + int i; + + trace_hclge_pf_cmd_send(hw, desc, 0, num); + + if (!is_special) { + for (i = 1; i < num; i++) + trace_hclge_pf_cmd_send(hw, &desc[i], i, num); + } else { + for (i = 1; i < num; i++) + trace_hclge_pf_special_cmd_send(hw, (__le32 *)&desc[i], + i, num); + } +} + +static void hclge_trace_cmd_get(struct hclge_comm_hw *hw, struct hclge_desc *desc, + int num, bool is_special) +{ + int i; + + if (!HCLGE_COMM_SEND_SYNC(le16_to_cpu(desc->flag))) + return; + + trace_hclge_pf_cmd_get(hw, desc, 0, num); + + if (!is_special) { + for (i = 1; i < num; i++) + trace_hclge_pf_cmd_get(hw, &desc[i], i, num); + } else { + for (i = 1; i < num; i++) + trace_hclge_pf_special_cmd_get(hw, (__le32 *)&desc[i], + i, num); + } +} + +static const struct hclge_comm_cmq_ops hclge_cmq_ops = { + .trace_cmd_send = hclge_trace_cmd_send, + .trace_cmd_get = hclge_trace_cmd_get, +}; + static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) { #define HCLGE_MAC_CMD_NUM 21 @@ -11634,6 +11678,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_devlink_uninit; /* Firmware command initialize */ + hclge_comm_cmd_init_ops(&hdev->hw.hw, &hclge_cmq_ops); ret = hclge_comm_cmd_init(hdev->ae_dev, &hdev->hw.hw, &hdev->fw_version, true, hdev->reset_pending); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h index f3cd5a376eca..7e47f0c21d88 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h @@ -10,6 +10,7 @@ #include +#define PF_DESC_LEN (sizeof(struct hclge_desc) / sizeof(u32)) #define PF_GET_MBX_LEN (sizeof(struct hclge_mbx_vf_to_pf_cmd) / sizeof(u32)) #define PF_SEND_MBX_LEN (sizeof(struct hclge_mbx_pf_to_vf_cmd) / sizeof(u32)) @@ -77,6 +78,99 @@ TRACE_EVENT(hclge_pf_mbx_send, ) ); +DECLARE_EVENT_CLASS(hclge_pf_cmd_template, + TP_PROTO(struct hclge_comm_hw *hw, + struct hclge_desc *desc, + int index, + int num), + TP_ARGS(hw, desc, index, num), + + TP_STRUCT__entry(__field(u16, opcode) + __field(u16, flag) + __field(u16, retval) + __field(u16, rsv) + __field(int, index) + __field(int, num) + __string(pciname, pci_name(hw->cmq.csq.pdev)) + __array(u32, data, HCLGE_DESC_DATA_LEN)), + + TP_fast_assign(int i; + __entry->opcode = le16_to_cpu(desc->opcode); + __entry->flag = le16_to_cpu(desc->flag); + __entry->retval = le16_to_cpu(desc->retval); + __entry->rsv = le16_to_cpu(desc->rsv); + __entry->index = index; + __entry->num = num; + __assign_str(pciname, pci_name(hw->cmq.csq.pdev)); + for (i = 0; i < HCLGE_DESC_DATA_LEN; i++) + __entry->data[i] = le32_to_cpu(desc->data[i]);), + + TP_printk("%s opcode:0x%04x %d-%d flag:0x%04x retval:0x%04x rsv:0x%04x data:%s", + __get_str(pciname), __entry->opcode, + __entry->index, __entry->num, + __entry->flag, __entry->retval, __entry->rsv, + __print_array(__entry->data, + HCLGE_DESC_DATA_LEN, sizeof(u32))) +); + +DEFINE_EVENT(hclge_pf_cmd_template, hclge_pf_cmd_send, + TP_PROTO(struct hclge_comm_hw *hw, + struct hclge_desc *desc, + int index, + int num), + TP_ARGS(hw, desc, index, num) +); + +DEFINE_EVENT(hclge_pf_cmd_template, hclge_pf_cmd_get, + TP_PROTO(struct hclge_comm_hw *hw, + struct hclge_desc *desc, + int index, + int num), + TP_ARGS(hw, desc, index, num) +); + +DECLARE_EVENT_CLASS(hclge_pf_special_cmd_template, + TP_PROTO(struct hclge_comm_hw *hw, + __le32 *data, + int index, + int num), + TP_ARGS(hw, data, index, num), + + TP_STRUCT__entry(__field(int, index) + __field(int, num) + __string(pciname, pci_name(hw->cmq.csq.pdev)) + __array(u32, data, PF_DESC_LEN)), + + TP_fast_assign(int i; + __entry->index = index; + __entry->num = num; + __assign_str(pciname, pci_name(hw->cmq.csq.pdev)); + for (i = 0; i < PF_DESC_LEN; i++) + __entry->data[i] = le32_to_cpu(data[i]); + ), + + TP_printk("%s %d-%d data:%s", + __get_str(pciname), + __entry->index, __entry->num, + __print_array(__entry->data, + PF_DESC_LEN, sizeof(u32))) +); + +DEFINE_EVENT(hclge_pf_special_cmd_template, hclge_pf_special_cmd_send, + TP_PROTO(struct hclge_comm_hw *hw, + __le32 *desc, + int index, + int num), + TP_ARGS(hw, desc, index, num)); + +DEFINE_EVENT(hclge_pf_special_cmd_template, hclge_pf_special_cmd_get, + TP_PROTO(struct hclge_comm_hw *hw, + __le32 *desc, + int index, + int num), + TP_ARGS(hw, desc, index, num) +); + #endif /* _HCLGE_TRACE_H_ */ /* This must be outside ifdef _HCLGE_TRACE_H */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 0aa9beefd1c7..ecc092555362 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -11,6 +11,7 @@ #include "hnae3.h" #include "hclgevf_devlink.h" #include "hclge_comm_rss.h" +#include "hclgevf_trace.h" #define HCLGEVF_NAME "hclgevf" @@ -47,6 +48,42 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclge_desc *desc, int num) return hclge_comm_cmd_send(&hw->hw, desc, num); } +static void hclgevf_trace_cmd_send(struct hclge_comm_hw *hw, struct hclge_desc *desc, + int num, bool is_special) +{ + int i; + + trace_hclge_vf_cmd_send(hw, desc, 0, num); + + if (is_special) + return; + + for (i = 1; i < num; i++) + trace_hclge_vf_cmd_send(hw, &desc[i], i, num); +} + +static void hclgevf_trace_cmd_get(struct hclge_comm_hw *hw, struct hclge_desc *desc, + int num, bool is_special) +{ + int i; + + if (!HCLGE_COMM_SEND_SYNC(le16_to_cpu(desc->flag))) + return; + + trace_hclge_vf_cmd_get(hw, desc, 0, num); + + if (is_special) + return; + + for (i = 1; i < num; i++) + trace_hclge_vf_cmd_get(hw, &desc[i], i, num); +} + +static const struct hclge_comm_cmq_ops hclgevf_cmq_ops = { + .trace_cmd_send = hclgevf_trace_cmd_send, + .trace_cmd_get = hclgevf_trace_cmd_get, +}; + void hclgevf_arq_init(struct hclgevf_dev *hdev) { struct hclge_comm_cmq *cmdq = &hdev->hw.hw.cmq; @@ -2796,6 +2833,7 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) } hclgevf_arq_init(hdev); + ret = hclge_comm_cmd_init(hdev->ae_dev, &hdev->hw.hw, &hdev->fw_version, false, hdev->reset_pending); @@ -2854,6 +2892,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) goto err_cmd_queue_init; hclgevf_arq_init(hdev); + + hclge_comm_cmd_init_ops(&hdev->hw.hw, &hclgevf_cmq_ops); ret = hclge_comm_cmd_init(hdev->ae_dev, &hdev->hw.hw, &hdev->fw_version, false, hdev->reset_pending); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h index b259e95dd53c..e2e3a2602b6a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h @@ -77,6 +77,56 @@ TRACE_EVENT(hclge_vf_mbx_send, ) ); +DECLARE_EVENT_CLASS(hclge_vf_cmd_template, + TP_PROTO(struct hclge_comm_hw *hw, + struct hclge_desc *desc, + int index, + int num), + + TP_ARGS(hw, desc, index, num), + + TP_STRUCT__entry(__field(u16, opcode) + __field(u16, flag) + __field(u16, retval) + __field(u16, rsv) + __field(int, index) + __field(int, num) + __string(pciname, pci_name(hw->cmq.csq.pdev)) + __array(u32, data, HCLGE_DESC_DATA_LEN)), + + TP_fast_assign(int i; + __entry->opcode = le16_to_cpu(desc->opcode); + __entry->flag = le16_to_cpu(desc->flag); + __entry->retval = le16_to_cpu(desc->retval); + __entry->rsv = le16_to_cpu(desc->rsv); + __entry->index = index; + __entry->num = num; + __assign_str(pciname, pci_name(hw->cmq.csq.pdev)); + for (i = 0; i < HCLGE_DESC_DATA_LEN; i++) + __entry->data[i] = le32_to_cpu(desc->data[i]);), + + TP_printk("%s opcode:0x%04x %d-%d flag:0x%04x retval:0x%04x rsv:0x%04x data:%s", + __get_str(pciname), __entry->opcode, + __entry->index, __entry->num, + __entry->flag, __entry->retval, __entry->rsv, + __print_array(__entry->data, + HCLGE_DESC_DATA_LEN, sizeof(u32))) +); + +DEFINE_EVENT(hclge_vf_cmd_template, hclge_vf_cmd_send, + TP_PROTO(struct hclge_comm_hw *hw, + struct hclge_desc *desc, + int index, + int num), + TP_ARGS(hw, desc, index, num)); + +DEFINE_EVENT(hclge_vf_cmd_template, hclge_vf_cmd_get, + TP_PROTO(struct hclge_comm_hw *hw, + struct hclge_desc *desc, + int index, + int num), + TP_ARGS(hw, desc, index, num)); + #endif /* _HCLGEVF_TRACE_H_ */ /* This must be outside ifdef _HCLGEVF_TRACE_H */ -- cgit v1.2.3 From b20250afcfb4d3b94ae520ae7afed591ebe5cb5b Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Wed, 10 Apr 2024 20:53:52 +0800 Subject: net: hns3: move constants from hclge_debugfs.h to hclge_debugfs.c some constants are defined in hclge_debugfs.h, but only used in hclge_debugfs.c. so move them from hclge_debugfs.h to hclge_debugfs.c. Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240410125354.2177067-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 640 +++++++++++++++++++++ .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h | 640 --------------------- 2 files changed, 640 insertions(+), 640 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 9ec471ced3d6..243be19a7557 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -18,6 +18,646 @@ static const char * const hclge_mac_state_str[] = { static const char * const tc_map_mode_str[] = { "PRIO", "DSCP" }; +static const struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = { + {false, "Reserved"}, + {true, "BP_CPU_STATE"}, + {true, "DFX_MSIX_INFO_NIC_0"}, + {true, "DFX_MSIX_INFO_NIC_1"}, + {true, "DFX_MSIX_INFO_NIC_2"}, + {true, "DFX_MSIX_INFO_NIC_3"}, + + {true, "DFX_MSIX_INFO_ROC_0"}, + {true, "DFX_MSIX_INFO_ROC_1"}, + {true, "DFX_MSIX_INFO_ROC_2"}, + {true, "DFX_MSIX_INFO_ROC_3"}, + {false, "Reserved"}, + {false, "Reserved"}, +}; + +static const struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_0[] = { + {false, "Reserved"}, + {true, "SSU_ETS_PORT_STATUS"}, + {true, "SSU_ETS_TCG_STATUS"}, + {false, "Reserved"}, + {false, "Reserved"}, + {true, "SSU_BP_STATUS_0"}, + + {true, "SSU_BP_STATUS_1"}, + {true, "SSU_BP_STATUS_2"}, + {true, "SSU_BP_STATUS_3"}, + {true, "SSU_BP_STATUS_4"}, + {true, "SSU_BP_STATUS_5"}, + {true, "SSU_MAC_TX_PFC_IND"}, + + {true, "MAC_SSU_RX_PFC_IND"}, + {true, "BTMP_AGEING_ST_B0"}, + {true, "BTMP_AGEING_ST_B1"}, + {true, "BTMP_AGEING_ST_B2"}, + {false, "Reserved"}, + {false, "Reserved"}, + + {true, "FULL_DROP_NUM"}, + {true, "PART_DROP_NUM"}, + {true, "PPP_KEY_DROP_NUM"}, + {true, "PPP_RLT_DROP_NUM"}, + {true, "LO_PRI_UNICAST_RLT_DROP_NUM"}, + {true, "HI_PRI_MULTICAST_RLT_DROP_NUM"}, + + {true, "LO_PRI_MULTICAST_RLT_DROP_NUM"}, + {true, "NCSI_PACKET_CURR_BUFFER_CNT"}, + {true, "BTMP_AGEING_RLS_CNT_BANK0"}, + {true, "BTMP_AGEING_RLS_CNT_BANK1"}, + {true, "BTMP_AGEING_RLS_CNT_BANK2"}, + {true, "SSU_MB_RD_RLT_DROP_CNT"}, + + {true, "SSU_PPP_MAC_KEY_NUM_L"}, + {true, "SSU_PPP_MAC_KEY_NUM_H"}, + {true, "SSU_PPP_HOST_KEY_NUM_L"}, + {true, "SSU_PPP_HOST_KEY_NUM_H"}, + {true, "PPP_SSU_MAC_RLT_NUM_L"}, + {true, "PPP_SSU_MAC_RLT_NUM_H"}, + + {true, "PPP_SSU_HOST_RLT_NUM_L"}, + {true, "PPP_SSU_HOST_RLT_NUM_H"}, + {true, "NCSI_RX_PACKET_IN_CNT_L"}, + {true, "NCSI_RX_PACKET_IN_CNT_H"}, + {true, "NCSI_TX_PACKET_OUT_CNT_L"}, + {true, "NCSI_TX_PACKET_OUT_CNT_H"}, + + {true, "SSU_KEY_DROP_NUM"}, + {true, "MB_UNCOPY_NUM"}, + {true, "RX_OQ_DROP_PKT_CNT"}, + {true, "TX_OQ_DROP_PKT_CNT"}, + {true, "BANK_UNBALANCE_DROP_CNT"}, + {true, "BANK_UNBALANCE_RX_DROP_CNT"}, + + {true, "NIC_L2_ERR_DROP_PKT_CNT"}, + {true, "ROC_L2_ERR_DROP_PKT_CNT"}, + {true, "NIC_L2_ERR_DROP_PKT_CNT_RX"}, + {true, "ROC_L2_ERR_DROP_PKT_CNT_RX"}, + {true, "RX_OQ_GLB_DROP_PKT_CNT"}, + {false, "Reserved"}, + + {true, "LO_PRI_UNICAST_CUR_CNT"}, + {true, "HI_PRI_MULTICAST_CUR_CNT"}, + {true, "LO_PRI_MULTICAST_CUR_CNT"}, + {false, "Reserved"}, + {false, "Reserved"}, + {false, "Reserved"}, +}; + +static const struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_1[] = { + {true, "prt_id"}, + {true, "PACKET_TC_CURR_BUFFER_CNT_0"}, + {true, "PACKET_TC_CURR_BUFFER_CNT_1"}, + {true, "PACKET_TC_CURR_BUFFER_CNT_2"}, + {true, "PACKET_TC_CURR_BUFFER_CNT_3"}, + {true, "PACKET_TC_CURR_BUFFER_CNT_4"}, + + {true, "PACKET_TC_CURR_BUFFER_CNT_5"}, + {true, "PACKET_TC_CURR_BUFFER_CNT_6"}, + {true, "PACKET_TC_CURR_BUFFER_CNT_7"}, + {true, "PACKET_CURR_BUFFER_CNT"}, + {false, "Reserved"}, + {false, "Reserved"}, + + {true, "RX_PACKET_IN_CNT_L"}, + {true, "RX_PACKET_IN_CNT_H"}, + {true, "RX_PACKET_OUT_CNT_L"}, + {true, "RX_PACKET_OUT_CNT_H"}, + {true, "TX_PACKET_IN_CNT_L"}, + {true, "TX_PACKET_IN_CNT_H"}, + + {true, "TX_PACKET_OUT_CNT_L"}, + {true, "TX_PACKET_OUT_CNT_H"}, + {true, "ROC_RX_PACKET_IN_CNT_L"}, + {true, "ROC_RX_PACKET_IN_CNT_H"}, + {true, "ROC_TX_PACKET_OUT_CNT_L"}, + {true, "ROC_TX_PACKET_OUT_CNT_H"}, + + {true, "RX_PACKET_TC_IN_CNT_0_L"}, + {true, "RX_PACKET_TC_IN_CNT_0_H"}, + {true, "RX_PACKET_TC_IN_CNT_1_L"}, + {true, "RX_PACKET_TC_IN_CNT_1_H"}, + {true, "RX_PACKET_TC_IN_CNT_2_L"}, + {true, "RX_PACKET_TC_IN_CNT_2_H"}, + + {true, "RX_PACKET_TC_IN_CNT_3_L"}, + {true, "RX_PACKET_TC_IN_CNT_3_H"}, + {true, "RX_PACKET_TC_IN_CNT_4_L"}, + {true, "RX_PACKET_TC_IN_CNT_4_H"}, + {true, "RX_PACKET_TC_IN_CNT_5_L"}, + {true, "RX_PACKET_TC_IN_CNT_5_H"}, + + {true, "RX_PACKET_TC_IN_CNT_6_L"}, + {true, "RX_PACKET_TC_IN_CNT_6_H"}, + {true, "RX_PACKET_TC_IN_CNT_7_L"}, + {true, "RX_PACKET_TC_IN_CNT_7_H"}, + {true, "RX_PACKET_TC_OUT_CNT_0_L"}, + {true, "RX_PACKET_TC_OUT_CNT_0_H"}, + + {true, "RX_PACKET_TC_OUT_CNT_1_L"}, + {true, "RX_PACKET_TC_OUT_CNT_1_H"}, + {true, "RX_PACKET_TC_OUT_CNT_2_L"}, + {true, "RX_PACKET_TC_OUT_CNT_2_H"}, + {true, "RX_PACKET_TC_OUT_CNT_3_L"}, + {true, "RX_PACKET_TC_OUT_CNT_3_H"}, + + {true, "RX_PACKET_TC_OUT_CNT_4_L"}, + {true, "RX_PACKET_TC_OUT_CNT_4_H"}, + {true, "RX_PACKET_TC_OUT_CNT_5_L"}, + {true, "RX_PACKET_TC_OUT_CNT_5_H"}, + {true, "RX_PACKET_TC_OUT_CNT_6_L"}, + {true, "RX_PACKET_TC_OUT_CNT_6_H"}, + + {true, "RX_PACKET_TC_OUT_CNT_7_L"}, + {true, "RX_PACKET_TC_OUT_CNT_7_H"}, + {true, "TX_PACKET_TC_IN_CNT_0_L"}, + {true, "TX_PACKET_TC_IN_CNT_0_H"}, + {true, "TX_PACKET_TC_IN_CNT_1_L"}, + {true, "TX_PACKET_TC_IN_CNT_1_H"}, + + {true, "TX_PACKET_TC_IN_CNT_2_L"}, + {true, "TX_PACKET_TC_IN_CNT_2_H"}, + {true, "TX_PACKET_TC_IN_CNT_3_L"}, + {true, "TX_PACKET_TC_IN_CNT_3_H"}, + {true, "TX_PACKET_TC_IN_CNT_4_L"}, + {true, "TX_PACKET_TC_IN_CNT_4_H"}, + + {true, "TX_PACKET_TC_IN_CNT_5_L"}, + {true, "TX_PACKET_TC_IN_CNT_5_H"}, + {true, "TX_PACKET_TC_IN_CNT_6_L"}, + {true, "TX_PACKET_TC_IN_CNT_6_H"}, + {true, "TX_PACKET_TC_IN_CNT_7_L"}, + {true, "TX_PACKET_TC_IN_CNT_7_H"}, + + {true, "TX_PACKET_TC_OUT_CNT_0_L"}, + {true, "TX_PACKET_TC_OUT_CNT_0_H"}, + {true, "TX_PACKET_TC_OUT_CNT_1_L"}, + {true, "TX_PACKET_TC_OUT_CNT_1_H"}, + {true, "TX_PACKET_TC_OUT_CNT_2_L"}, + {true, "TX_PACKET_TC_OUT_CNT_2_H"}, + + {true, "TX_PACKET_TC_OUT_CNT_3_L"}, + {true, "TX_PACKET_TC_OUT_CNT_3_H"}, + {true, "TX_PACKET_TC_OUT_CNT_4_L"}, + {true, "TX_PACKET_TC_OUT_CNT_4_H"}, + {true, "TX_PACKET_TC_OUT_CNT_5_L"}, + {true, "TX_PACKET_TC_OUT_CNT_5_H"}, + + {true, "TX_PACKET_TC_OUT_CNT_6_L"}, + {true, "TX_PACKET_TC_OUT_CNT_6_H"}, + {true, "TX_PACKET_TC_OUT_CNT_7_L"}, + {true, "TX_PACKET_TC_OUT_CNT_7_H"}, + {false, "Reserved"}, + {false, "Reserved"}, +}; + +static const struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_2[] = { + {true, "OQ_INDEX"}, + {true, "QUEUE_CNT"}, + {false, "Reserved"}, + {false, "Reserved"}, + {false, "Reserved"}, + {false, "Reserved"}, +}; + +static const struct hclge_dbg_dfx_message hclge_dbg_igu_egu_reg[] = { + {true, "prt_id"}, + {true, "IGU_RX_ERR_PKT"}, + {true, "IGU_RX_NO_SOF_PKT"}, + {true, "EGU_TX_1588_SHORT_PKT"}, + {true, "EGU_TX_1588_PKT"}, + {true, "EGU_TX_ERR_PKT"}, + + {true, "IGU_RX_OUT_L2_PKT"}, + {true, "IGU_RX_OUT_L3_PKT"}, + {true, "IGU_RX_OUT_L4_PKT"}, + {true, "IGU_RX_IN_L2_PKT"}, + {true, "IGU_RX_IN_L3_PKT"}, + {true, "IGU_RX_IN_L4_PKT"}, + + {true, "IGU_RX_EL3E_PKT"}, + {true, "IGU_RX_EL4E_PKT"}, + {true, "IGU_RX_L3E_PKT"}, + {true, "IGU_RX_L4E_PKT"}, + {true, "IGU_RX_ROCEE_PKT"}, + {true, "IGU_RX_OUT_UDP0_PKT"}, + + {true, "IGU_RX_IN_UDP0_PKT"}, + {true, "IGU_MC_CAR_DROP_PKT_L"}, + {true, "IGU_MC_CAR_DROP_PKT_H"}, + {true, "IGU_BC_CAR_DROP_PKT_L"}, + {true, "IGU_BC_CAR_DROP_PKT_H"}, + {false, "Reserved"}, + + {true, "IGU_RX_OVERSIZE_PKT_L"}, + {true, "IGU_RX_OVERSIZE_PKT_H"}, + {true, "IGU_RX_UNDERSIZE_PKT_L"}, + {true, "IGU_RX_UNDERSIZE_PKT_H"}, + {true, "IGU_RX_OUT_ALL_PKT_L"}, + {true, "IGU_RX_OUT_ALL_PKT_H"}, + + {true, "IGU_TX_OUT_ALL_PKT_L"}, + {true, "IGU_TX_OUT_ALL_PKT_H"}, + {true, "IGU_RX_UNI_PKT_L"}, + {true, "IGU_RX_UNI_PKT_H"}, + {true, "IGU_RX_MULTI_PKT_L"}, + {true, "IGU_RX_MULTI_PKT_H"}, + + {true, "IGU_RX_BROAD_PKT_L"}, + {true, "IGU_RX_BROAD_PKT_H"}, + {true, "EGU_TX_OUT_ALL_PKT_L"}, + {true, "EGU_TX_OUT_ALL_PKT_H"}, + {true, "EGU_TX_UNI_PKT_L"}, + {true, "EGU_TX_UNI_PKT_H"}, + + {true, "EGU_TX_MULTI_PKT_L"}, + {true, "EGU_TX_MULTI_PKT_H"}, + {true, "EGU_TX_BROAD_PKT_L"}, + {true, "EGU_TX_BROAD_PKT_H"}, + {true, "IGU_TX_KEY_NUM_L"}, + {true, "IGU_TX_KEY_NUM_H"}, + + {true, "IGU_RX_NON_TUN_PKT_L"}, + {true, "IGU_RX_NON_TUN_PKT_H"}, + {true, "IGU_RX_TUN_PKT_L"}, + {true, "IGU_RX_TUN_PKT_H"}, + {false, "Reserved"}, + {false, "Reserved"}, +}; + +static const struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_0[] = { + {true, "tc_queue_num"}, + {true, "FSM_DFX_ST0"}, + {true, "FSM_DFX_ST1"}, + {true, "RPU_RX_PKT_DROP_CNT"}, + {true, "BUF_WAIT_TIMEOUT"}, + {true, "BUF_WAIT_TIMEOUT_QID"}, +}; + +static const struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_1[] = { + {false, "Reserved"}, + {true, "FIFO_DFX_ST0"}, + {true, "FIFO_DFX_ST1"}, + {true, "FIFO_DFX_ST2"}, + {true, "FIFO_DFX_ST3"}, + {true, "FIFO_DFX_ST4"}, + + {true, "FIFO_DFX_ST5"}, + {false, "Reserved"}, + {false, "Reserved"}, + {false, "Reserved"}, + {false, "Reserved"}, + {false, "Reserved"}, +}; + +static const struct hclge_dbg_dfx_message hclge_dbg_ncsi_reg[] = { + {false, "Reserved"}, + {true, "NCSI_EGU_TX_FIFO_STS"}, + {true, "NCSI_PAUSE_STATUS"}, + {true, "NCSI_RX_CTRL_DMAC_ERR_CNT"}, + {true, "NCSI_RX_CTRL_SMAC_ERR_CNT"}, + {true, "NCSI_RX_CTRL_CKS_ERR_CNT"}, + + {true, "NCSI_RX_CTRL_PKT_CNT"}, + {true, "NCSI_RX_PT_DMAC_ERR_CNT"}, + {true, "NCSI_RX_PT_SMAC_ERR_CNT"}, + {true, "NCSI_RX_PT_PKT_CNT"}, + {true, "NCSI_RX_FCS_ERR_CNT"}, + {true, "NCSI_TX_CTRL_DMAC_ERR_CNT"}, + + {true, "NCSI_TX_CTRL_SMAC_ERR_CNT"}, + {true, "NCSI_TX_CTRL_PKT_CNT"}, + {true, "NCSI_TX_PT_DMAC_ERR_CNT"}, + {true, "NCSI_TX_PT_SMAC_ERR_CNT"}, + {true, "NCSI_TX_PT_PKT_CNT"}, + {true, "NCSI_TX_PT_PKT_TRUNC_CNT"}, + + {true, "NCSI_TX_PT_PKT_ERR_CNT"}, + {true, "NCSI_TX_CTRL_PKT_ERR_CNT"}, + {true, "NCSI_RX_CTRL_PKT_TRUNC_CNT"}, + {true, "NCSI_RX_CTRL_PKT_CFLIT_CNT"}, + {false, "Reserved"}, + {false, "Reserved"}, + + {true, "NCSI_MAC_RX_OCTETS_OK"}, + {true, "NCSI_MAC_RX_OCTETS_BAD"}, + {true, "NCSI_MAC_RX_UC_PKTS"}, + {true, "NCSI_MAC_RX_MC_PKTS"}, + {true, "NCSI_MAC_RX_BC_PKTS"}, + {true, "NCSI_MAC_RX_PKTS_64OCTETS"}, + + {true, "NCSI_MAC_RX_PKTS_65TO127OCTETS"}, + {true, "NCSI_MAC_RX_PKTS_128TO255OCTETS"}, + {true, "NCSI_MAC_RX_PKTS_255TO511OCTETS"}, + {true, "NCSI_MAC_RX_PKTS_512TO1023OCTETS"}, + {true, "NCSI_MAC_RX_PKTS_1024TO1518OCTETS"}, + {true, "NCSI_MAC_RX_PKTS_1519TOMAXOCTETS"}, + + {true, "NCSI_MAC_RX_FCS_ERRORS"}, + {true, "NCSI_MAC_RX_LONG_ERRORS"}, + {true, "NCSI_MAC_RX_JABBER_ERRORS"}, + {true, "NCSI_MAC_RX_RUNT_ERR_CNT"}, + {true, "NCSI_MAC_RX_SHORT_ERR_CNT"}, + {true, "NCSI_MAC_RX_FILT_PKT_CNT"}, + + {true, "NCSI_MAC_RX_OCTETS_TOTAL_FILT"}, + {true, "NCSI_MAC_TX_OCTETS_OK"}, + {true, "NCSI_MAC_TX_OCTETS_BAD"}, + {true, "NCSI_MAC_TX_UC_PKTS"}, + {true, "NCSI_MAC_TX_MC_PKTS"}, + {true, "NCSI_MAC_TX_BC_PKTS"}, + + {true, "NCSI_MAC_TX_PKTS_64OCTETS"}, + {true, "NCSI_MAC_TX_PKTS_65TO127OCTETS"}, + {true, "NCSI_MAC_TX_PKTS_128TO255OCTETS"}, + {true, "NCSI_MAC_TX_PKTS_256TO511OCTETS"}, + {true, "NCSI_MAC_TX_PKTS_512TO1023OCTETS"}, + {true, "NCSI_MAC_TX_PKTS_1024TO1518OCTETS"}, + + {true, "NCSI_MAC_TX_PKTS_1519TOMAXOCTETS"}, + {true, "NCSI_MAC_TX_UNDERRUN"}, + {true, "NCSI_MAC_TX_CRC_ERROR"}, + {true, "NCSI_MAC_TX_PAUSE_FRAMES"}, + {true, "NCSI_MAC_RX_PAD_PKTS"}, + {true, "NCSI_MAC_RX_PAUSE_FRAMES"}, +}; + +static const struct hclge_dbg_dfx_message hclge_dbg_rtc_reg[] = { + {false, "Reserved"}, + {true, "LGE_IGU_AFIFO_DFX_0"}, + {true, "LGE_IGU_AFIFO_DFX_1"}, + {true, "LGE_IGU_AFIFO_DFX_2"}, + {true, "LGE_IGU_AFIFO_DFX_3"}, + {true, "LGE_IGU_AFIFO_DFX_4"}, + + {true, "LGE_IGU_AFIFO_DFX_5"}, + {true, "LGE_IGU_AFIFO_DFX_6"}, + {true, "LGE_IGU_AFIFO_DFX_7"}, + {true, "LGE_EGU_AFIFO_DFX_0"}, + {true, "LGE_EGU_AFIFO_DFX_1"}, + {true, "LGE_EGU_AFIFO_DFX_2"}, + + {true, "LGE_EGU_AFIFO_DFX_3"}, + {true, "LGE_EGU_AFIFO_DFX_4"}, + {true, "LGE_EGU_AFIFO_DFX_5"}, + {true, "LGE_EGU_AFIFO_DFX_6"}, + {true, "LGE_EGU_AFIFO_DFX_7"}, + {true, "CGE_IGU_AFIFO_DFX_0"}, + + {true, "CGE_IGU_AFIFO_DFX_1"}, + {true, "CGE_EGU_AFIFO_DFX_0"}, + {true, "CGE_EGU_AFIFO_DFX_1"}, + {false, "Reserved"}, + {false, "Reserved"}, + {false, "Reserved"}, +}; + +static const struct hclge_dbg_dfx_message hclge_dbg_ppp_reg[] = { + {false, "Reserved"}, + {true, "DROP_FROM_PRT_PKT_CNT"}, + {true, "DROP_FROM_HOST_PKT_CNT"}, + {true, "DROP_TX_VLAN_PROC_CNT"}, + {true, "DROP_MNG_CNT"}, + {true, "DROP_FD_CNT"}, + + {true, "DROP_NO_DST_CNT"}, + {true, "DROP_MC_MBID_FULL_CNT"}, + {true, "DROP_SC_FILTERED"}, + {true, "PPP_MC_DROP_PKT_CNT"}, + {true, "DROP_PT_CNT"}, + {true, "DROP_MAC_ANTI_SPOOF_CNT"}, + + {true, "DROP_IG_VFV_CNT"}, + {true, "DROP_IG_PRTV_CNT"}, + {true, "DROP_CNM_PFC_PAUSE_CNT"}, + {true, "DROP_TORUS_TC_CNT"}, + {true, "DROP_TORUS_LPBK_CNT"}, + {true, "PPP_HFS_STS"}, + + {true, "PPP_MC_RSLT_STS"}, + {true, "PPP_P3U_STS"}, + {true, "PPP_RSLT_DESCR_STS"}, + {true, "PPP_UMV_STS_0"}, + {true, "PPP_UMV_STS_1"}, + {true, "PPP_VFV_STS"}, + + {true, "PPP_GRO_KEY_CNT"}, + {true, "PPP_GRO_INFO_CNT"}, + {true, "PPP_GRO_DROP_CNT"}, + {true, "PPP_GRO_OUT_CNT"}, + {true, "PPP_GRO_KEY_MATCH_DATA_CNT"}, + {true, "PPP_GRO_KEY_MATCH_TCAM_CNT"}, + + {true, "PPP_GRO_INFO_MATCH_CNT"}, + {true, "PPP_GRO_FREE_ENTRY_CNT"}, + {true, "PPP_GRO_INNER_DFX_SIGNAL"}, + {false, "Reserved"}, + {false, "Reserved"}, + {false, "Reserved"}, + + {true, "GET_RX_PKT_CNT_L"}, + {true, "GET_RX_PKT_CNT_H"}, + {true, "GET_TX_PKT_CNT_L"}, + {true, "GET_TX_PKT_CNT_H"}, + {true, "SEND_UC_PRT2HOST_PKT_CNT_L"}, + {true, "SEND_UC_PRT2HOST_PKT_CNT_H"}, + + {true, "SEND_UC_PRT2PRT_PKT_CNT_L"}, + {true, "SEND_UC_PRT2PRT_PKT_CNT_H"}, + {true, "SEND_UC_HOST2HOST_PKT_CNT_L"}, + {true, "SEND_UC_HOST2HOST_PKT_CNT_H"}, + {true, "SEND_UC_HOST2PRT_PKT_CNT_L"}, + {true, "SEND_UC_HOST2PRT_PKT_CNT_H"}, + + {true, "SEND_MC_FROM_PRT_CNT_L"}, + {true, "SEND_MC_FROM_PRT_CNT_H"}, + {true, "SEND_MC_FROM_HOST_CNT_L"}, + {true, "SEND_MC_FROM_HOST_CNT_H"}, + {true, "SSU_MC_RD_CNT_L"}, + {true, "SSU_MC_RD_CNT_H"}, + + {true, "SSU_MC_DROP_CNT_L"}, + {true, "SSU_MC_DROP_CNT_H"}, + {true, "SSU_MC_RD_PKT_CNT_L"}, + {true, "SSU_MC_RD_PKT_CNT_H"}, + {true, "PPP_MC_2HOST_PKT_CNT_L"}, + {true, "PPP_MC_2HOST_PKT_CNT_H"}, + + {true, "PPP_MC_2PRT_PKT_CNT_L"}, + {true, "PPP_MC_2PRT_PKT_CNT_H"}, + {true, "NTSNOS_PKT_CNT_L"}, + {true, "NTSNOS_PKT_CNT_H"}, + {true, "NTUP_PKT_CNT_L"}, + {true, "NTUP_PKT_CNT_H"}, + + {true, "NTLCL_PKT_CNT_L"}, + {true, "NTLCL_PKT_CNT_H"}, + {true, "NTTGT_PKT_CNT_L"}, + {true, "NTTGT_PKT_CNT_H"}, + {true, "RTNS_PKT_CNT_L"}, + {true, "RTNS_PKT_CNT_H"}, + + {true, "RTLPBK_PKT_CNT_L"}, + {true, "RTLPBK_PKT_CNT_H"}, + {true, "NR_PKT_CNT_L"}, + {true, "NR_PKT_CNT_H"}, + {true, "RR_PKT_CNT_L"}, + {true, "RR_PKT_CNT_H"}, + + {true, "MNG_TBL_HIT_CNT_L"}, + {true, "MNG_TBL_HIT_CNT_H"}, + {true, "FD_TBL_HIT_CNT_L"}, + {true, "FD_TBL_HIT_CNT_H"}, + {true, "FD_LKUP_CNT_L"}, + {true, "FD_LKUP_CNT_H"}, + + {true, "BC_HIT_CNT_L"}, + {true, "BC_HIT_CNT_H"}, + {true, "UM_TBL_UC_HIT_CNT_L"}, + {true, "UM_TBL_UC_HIT_CNT_H"}, + {true, "UM_TBL_MC_HIT_CNT_L"}, + {true, "UM_TBL_MC_HIT_CNT_H"}, + + {true, "UM_TBL_VMDQ1_HIT_CNT_L"}, + {true, "UM_TBL_VMDQ1_HIT_CNT_H"}, + {true, "MTA_TBL_HIT_CNT_L"}, + {true, "MTA_TBL_HIT_CNT_H"}, + {true, "FWD_BONDING_HIT_CNT_L"}, + {true, "FWD_BONDING_HIT_CNT_H"}, + + {true, "PROMIS_TBL_HIT_CNT_L"}, + {true, "PROMIS_TBL_HIT_CNT_H"}, + {true, "GET_TUNL_PKT_CNT_L"}, + {true, "GET_TUNL_PKT_CNT_H"}, + {true, "GET_BMC_PKT_CNT_L"}, + {true, "GET_BMC_PKT_CNT_H"}, + + {true, "SEND_UC_PRT2BMC_PKT_CNT_L"}, + {true, "SEND_UC_PRT2BMC_PKT_CNT_H"}, + {true, "SEND_UC_HOST2BMC_PKT_CNT_L"}, + {true, "SEND_UC_HOST2BMC_PKT_CNT_H"}, + {true, "SEND_UC_BMC2HOST_PKT_CNT_L"}, + {true, "SEND_UC_BMC2HOST_PKT_CNT_H"}, + + {true, "SEND_UC_BMC2PRT_PKT_CNT_L"}, + {true, "SEND_UC_BMC2PRT_PKT_CNT_H"}, + {true, "PPP_MC_2BMC_PKT_CNT_L"}, + {true, "PPP_MC_2BMC_PKT_CNT_H"}, + {true, "VLAN_MIRR_CNT_L"}, + {true, "VLAN_MIRR_CNT_H"}, + + {true, "IG_MIRR_CNT_L"}, + {true, "IG_MIRR_CNT_H"}, + {true, "EG_MIRR_CNT_L"}, + {true, "EG_MIRR_CNT_H"}, + {true, "RX_DEFAULT_HOST_HIT_CNT_L"}, + {true, "RX_DEFAULT_HOST_HIT_CNT_H"}, + + {true, "LAN_PAIR_CNT_L"}, + {true, "LAN_PAIR_CNT_H"}, + {true, "UM_TBL_MC_HIT_PKT_CNT_L"}, + {true, "UM_TBL_MC_HIT_PKT_CNT_H"}, + {true, "MTA_TBL_HIT_PKT_CNT_L"}, + {true, "MTA_TBL_HIT_PKT_CNT_H"}, + + {true, "PROMIS_TBL_HIT_PKT_CNT_L"}, + {true, "PROMIS_TBL_HIT_PKT_CNT_H"}, + {false, "Reserved"}, + {false, "Reserved"}, + {false, "Reserved"}, + {false, "Reserved"}, +}; + +static const struct hclge_dbg_dfx_message hclge_dbg_rcb_reg[] = { + {false, "Reserved"}, + {true, "FSM_DFX_ST0"}, + {true, "FSM_DFX_ST1"}, + {true, "FSM_DFX_ST2"}, + {true, "FIFO_DFX_ST0"}, + {true, "FIFO_DFX_ST1"}, + + {true, "FIFO_DFX_ST2"}, + {true, "FIFO_DFX_ST3"}, + {true, "FIFO_DFX_ST4"}, + {true, "FIFO_DFX_ST5"}, + {true, "FIFO_DFX_ST6"}, + {true, "FIFO_DFX_ST7"}, + + {true, "FIFO_DFX_ST8"}, + {true, "FIFO_DFX_ST9"}, + {true, "FIFO_DFX_ST10"}, + {true, "FIFO_DFX_ST11"}, + {true, "Q_CREDIT_VLD_0"}, + {true, "Q_CREDIT_VLD_1"}, + + {true, "Q_CREDIT_VLD_2"}, + {true, "Q_CREDIT_VLD_3"}, + {true, "Q_CREDIT_VLD_4"}, + {true, "Q_CREDIT_VLD_5"}, + {true, "Q_CREDIT_VLD_6"}, + {true, "Q_CREDIT_VLD_7"}, + + {true, "Q_CREDIT_VLD_8"}, + {true, "Q_CREDIT_VLD_9"}, + {true, "Q_CREDIT_VLD_10"}, + {true, "Q_CREDIT_VLD_11"}, + {true, "Q_CREDIT_VLD_12"}, + {true, "Q_CREDIT_VLD_13"}, + + {true, "Q_CREDIT_VLD_14"}, + {true, "Q_CREDIT_VLD_15"}, + {true, "Q_CREDIT_VLD_16"}, + {true, "Q_CREDIT_VLD_17"}, + {true, "Q_CREDIT_VLD_18"}, + {true, "Q_CREDIT_VLD_19"}, + + {true, "Q_CREDIT_VLD_20"}, + {true, "Q_CREDIT_VLD_21"}, + {true, "Q_CREDIT_VLD_22"}, + {true, "Q_CREDIT_VLD_23"}, + {true, "Q_CREDIT_VLD_24"}, + {true, "Q_CREDIT_VLD_25"}, + + {true, "Q_CREDIT_VLD_26"}, + {true, "Q_CREDIT_VLD_27"}, + {true, "Q_CREDIT_VLD_28"}, + {true, "Q_CREDIT_VLD_29"}, + {true, "Q_CREDIT_VLD_30"}, + {true, "Q_CREDIT_VLD_31"}, + + {true, "GRO_BD_SERR_CNT"}, + {true, "GRO_CONTEXT_SERR_CNT"}, + {true, "RX_STASH_CFG_SERR_CNT"}, + {true, "AXI_RD_FBD_SERR_CNT"}, + {true, "GRO_BD_MERR_CNT"}, + {true, "GRO_CONTEXT_MERR_CNT"}, + + {true, "RX_STASH_CFG_MERR_CNT"}, + {true, "AXI_RD_FBD_MERR_CNT"}, + {false, "Reserved"}, + {false, "Reserved"}, + {false, "Reserved"}, + {false, "Reserved"}, +}; + +static const struct hclge_dbg_dfx_message hclge_dbg_tqp_reg[] = { + {true, "q_num"}, + {true, "RCB_CFG_RX_RING_TAIL"}, + {true, "RCB_CFG_RX_RING_HEAD"}, + {true, "RCB_CFG_RX_RING_FBDNUM"}, + {true, "RCB_CFG_RX_RING_OFFSET"}, + {true, "RCB_CFG_RX_RING_FBDOFFSET"}, + + {true, "RCB_CFG_RX_RING_PKTNUM_RECORD"}, + {true, "RCB_CFG_TX_RING_TAIL"}, + {true, "RCB_CFG_TX_RING_HEAD"}, + {true, "RCB_CFG_TX_RING_FBDNUM"}, + {true, "RCB_CFG_TX_RING_OFFSET"}, + {true, "RCB_CFG_TX_RING_EBDNUM"}, +}; + static const struct hclge_dbg_reg_type_info hclge_dbg_reg_info[] = { { .cmd = HNAE3_DBG_CMD_REG_BIOS_COMMON, .dfx_msg = &hclge_dbg_bios_common_reg[0], diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h index 724052928b88..31a775fb032b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h @@ -99,646 +99,6 @@ struct hclge_dbg_status_dfx_info { char message[HCLGE_DBG_MAX_DFX_MSG_LEN]; }; -static const struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = { - {false, "Reserved"}, - {true, "BP_CPU_STATE"}, - {true, "DFX_MSIX_INFO_NIC_0"}, - {true, "DFX_MSIX_INFO_NIC_1"}, - {true, "DFX_MSIX_INFO_NIC_2"}, - {true, "DFX_MSIX_INFO_NIC_3"}, - - {true, "DFX_MSIX_INFO_ROC_0"}, - {true, "DFX_MSIX_INFO_ROC_1"}, - {true, "DFX_MSIX_INFO_ROC_2"}, - {true, "DFX_MSIX_INFO_ROC_3"}, - {false, "Reserved"}, - {false, "Reserved"}, -}; - -static const struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_0[] = { - {false, "Reserved"}, - {true, "SSU_ETS_PORT_STATUS"}, - {true, "SSU_ETS_TCG_STATUS"}, - {false, "Reserved"}, - {false, "Reserved"}, - {true, "SSU_BP_STATUS_0"}, - - {true, "SSU_BP_STATUS_1"}, - {true, "SSU_BP_STATUS_2"}, - {true, "SSU_BP_STATUS_3"}, - {true, "SSU_BP_STATUS_4"}, - {true, "SSU_BP_STATUS_5"}, - {true, "SSU_MAC_TX_PFC_IND"}, - - {true, "MAC_SSU_RX_PFC_IND"}, - {true, "BTMP_AGEING_ST_B0"}, - {true, "BTMP_AGEING_ST_B1"}, - {true, "BTMP_AGEING_ST_B2"}, - {false, "Reserved"}, - {false, "Reserved"}, - - {true, "FULL_DROP_NUM"}, - {true, "PART_DROP_NUM"}, - {true, "PPP_KEY_DROP_NUM"}, - {true, "PPP_RLT_DROP_NUM"}, - {true, "LO_PRI_UNICAST_RLT_DROP_NUM"}, - {true, "HI_PRI_MULTICAST_RLT_DROP_NUM"}, - - {true, "LO_PRI_MULTICAST_RLT_DROP_NUM"}, - {true, "NCSI_PACKET_CURR_BUFFER_CNT"}, - {true, "BTMP_AGEING_RLS_CNT_BANK0"}, - {true, "BTMP_AGEING_RLS_CNT_BANK1"}, - {true, "BTMP_AGEING_RLS_CNT_BANK2"}, - {true, "SSU_MB_RD_RLT_DROP_CNT"}, - - {true, "SSU_PPP_MAC_KEY_NUM_L"}, - {true, "SSU_PPP_MAC_KEY_NUM_H"}, - {true, "SSU_PPP_HOST_KEY_NUM_L"}, - {true, "SSU_PPP_HOST_KEY_NUM_H"}, - {true, "PPP_SSU_MAC_RLT_NUM_L"}, - {true, "PPP_SSU_MAC_RLT_NUM_H"}, - - {true, "PPP_SSU_HOST_RLT_NUM_L"}, - {true, "PPP_SSU_HOST_RLT_NUM_H"}, - {true, "NCSI_RX_PACKET_IN_CNT_L"}, - {true, "NCSI_RX_PACKET_IN_CNT_H"}, - {true, "NCSI_TX_PACKET_OUT_CNT_L"}, - {true, "NCSI_TX_PACKET_OUT_CNT_H"}, - - {true, "SSU_KEY_DROP_NUM"}, - {true, "MB_UNCOPY_NUM"}, - {true, "RX_OQ_DROP_PKT_CNT"}, - {true, "TX_OQ_DROP_PKT_CNT"}, - {true, "BANK_UNBALANCE_DROP_CNT"}, - {true, "BANK_UNBALANCE_RX_DROP_CNT"}, - - {true, "NIC_L2_ERR_DROP_PKT_CNT"}, - {true, "ROC_L2_ERR_DROP_PKT_CNT"}, - {true, "NIC_L2_ERR_DROP_PKT_CNT_RX"}, - {true, "ROC_L2_ERR_DROP_PKT_CNT_RX"}, - {true, "RX_OQ_GLB_DROP_PKT_CNT"}, - {false, "Reserved"}, - - {true, "LO_PRI_UNICAST_CUR_CNT"}, - {true, "HI_PRI_MULTICAST_CUR_CNT"}, - {true, "LO_PRI_MULTICAST_CUR_CNT"}, - {false, "Reserved"}, - {false, "Reserved"}, - {false, "Reserved"}, -}; - -static const struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_1[] = { - {true, "prt_id"}, - {true, "PACKET_TC_CURR_BUFFER_CNT_0"}, - {true, "PACKET_TC_CURR_BUFFER_CNT_1"}, - {true, "PACKET_TC_CURR_BUFFER_CNT_2"}, - {true, "PACKET_TC_CURR_BUFFER_CNT_3"}, - {true, "PACKET_TC_CURR_BUFFER_CNT_4"}, - - {true, "PACKET_TC_CURR_BUFFER_CNT_5"}, - {true, "PACKET_TC_CURR_BUFFER_CNT_6"}, - {true, "PACKET_TC_CURR_BUFFER_CNT_7"}, - {true, "PACKET_CURR_BUFFER_CNT"}, - {false, "Reserved"}, - {false, "Reserved"}, - - {true, "RX_PACKET_IN_CNT_L"}, - {true, "RX_PACKET_IN_CNT_H"}, - {true, "RX_PACKET_OUT_CNT_L"}, - {true, "RX_PACKET_OUT_CNT_H"}, - {true, "TX_PACKET_IN_CNT_L"}, - {true, "TX_PACKET_IN_CNT_H"}, - - {true, "TX_PACKET_OUT_CNT_L"}, - {true, "TX_PACKET_OUT_CNT_H"}, - {true, "ROC_RX_PACKET_IN_CNT_L"}, - {true, "ROC_RX_PACKET_IN_CNT_H"}, - {true, "ROC_TX_PACKET_OUT_CNT_L"}, - {true, "ROC_TX_PACKET_OUT_CNT_H"}, - - {true, "RX_PACKET_TC_IN_CNT_0_L"}, - {true, "RX_PACKET_TC_IN_CNT_0_H"}, - {true, "RX_PACKET_TC_IN_CNT_1_L"}, - {true, "RX_PACKET_TC_IN_CNT_1_H"}, - {true, "RX_PACKET_TC_IN_CNT_2_L"}, - {true, "RX_PACKET_TC_IN_CNT_2_H"}, - - {true, "RX_PACKET_TC_IN_CNT_3_L"}, - {true, "RX_PACKET_TC_IN_CNT_3_H"}, - {true, "RX_PACKET_TC_IN_CNT_4_L"}, - {true, "RX_PACKET_TC_IN_CNT_4_H"}, - {true, "RX_PACKET_TC_IN_CNT_5_L"}, - {true, "RX_PACKET_TC_IN_CNT_5_H"}, - - {true, "RX_PACKET_TC_IN_CNT_6_L"}, - {true, "RX_PACKET_TC_IN_CNT_6_H"}, - {true, "RX_PACKET_TC_IN_CNT_7_L"}, - {true, "RX_PACKET_TC_IN_CNT_7_H"}, - {true, "RX_PACKET_TC_OUT_CNT_0_L"}, - {true, "RX_PACKET_TC_OUT_CNT_0_H"}, - - {true, "RX_PACKET_TC_OUT_CNT_1_L"}, - {true, "RX_PACKET_TC_OUT_CNT_1_H"}, - {true, "RX_PACKET_TC_OUT_CNT_2_L"}, - {true, "RX_PACKET_TC_OUT_CNT_2_H"}, - {true, "RX_PACKET_TC_OUT_CNT_3_L"}, - {true, "RX_PACKET_TC_OUT_CNT_3_H"}, - - {true, "RX_PACKET_TC_OUT_CNT_4_L"}, - {true, "RX_PACKET_TC_OUT_CNT_4_H"}, - {true, "RX_PACKET_TC_OUT_CNT_5_L"}, - {true, "RX_PACKET_TC_OUT_CNT_5_H"}, - {true, "RX_PACKET_TC_OUT_CNT_6_L"}, - {true, "RX_PACKET_TC_OUT_CNT_6_H"}, - - {true, "RX_PACKET_TC_OUT_CNT_7_L"}, - {true, "RX_PACKET_TC_OUT_CNT_7_H"}, - {true, "TX_PACKET_TC_IN_CNT_0_L"}, - {true, "TX_PACKET_TC_IN_CNT_0_H"}, - {true, "TX_PACKET_TC_IN_CNT_1_L"}, - {true, "TX_PACKET_TC_IN_CNT_1_H"}, - - {true, "TX_PACKET_TC_IN_CNT_2_L"}, - {true, "TX_PACKET_TC_IN_CNT_2_H"}, - {true, "TX_PACKET_TC_IN_CNT_3_L"}, - {true, "TX_PACKET_TC_IN_CNT_3_H"}, - {true, "TX_PACKET_TC_IN_CNT_4_L"}, - {true, "TX_PACKET_TC_IN_CNT_4_H"}, - - {true, "TX_PACKET_TC_IN_CNT_5_L"}, - {true, "TX_PACKET_TC_IN_CNT_5_H"}, - {true, "TX_PACKET_TC_IN_CNT_6_L"}, - {true, "TX_PACKET_TC_IN_CNT_6_H"}, - {true, "TX_PACKET_TC_IN_CNT_7_L"}, - {true, "TX_PACKET_TC_IN_CNT_7_H"}, - - {true, "TX_PACKET_TC_OUT_CNT_0_L"}, - {true, "TX_PACKET_TC_OUT_CNT_0_H"}, - {true, "TX_PACKET_TC_OUT_CNT_1_L"}, - {true, "TX_PACKET_TC_OUT_CNT_1_H"}, - {true, "TX_PACKET_TC_OUT_CNT_2_L"}, - {true, "TX_PACKET_TC_OUT_CNT_2_H"}, - - {true, "TX_PACKET_TC_OUT_CNT_3_L"}, - {true, "TX_PACKET_TC_OUT_CNT_3_H"}, - {true, "TX_PACKET_TC_OUT_CNT_4_L"}, - {true, "TX_PACKET_TC_OUT_CNT_4_H"}, - {true, "TX_PACKET_TC_OUT_CNT_5_L"}, - {true, "TX_PACKET_TC_OUT_CNT_5_H"}, - - {true, "TX_PACKET_TC_OUT_CNT_6_L"}, - {true, "TX_PACKET_TC_OUT_CNT_6_H"}, - {true, "TX_PACKET_TC_OUT_CNT_7_L"}, - {true, "TX_PACKET_TC_OUT_CNT_7_H"}, - {false, "Reserved"}, - {false, "Reserved"}, -}; - -static const struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_2[] = { - {true, "OQ_INDEX"}, - {true, "QUEUE_CNT"}, - {false, "Reserved"}, - {false, "Reserved"}, - {false, "Reserved"}, - {false, "Reserved"}, -}; - -static const struct hclge_dbg_dfx_message hclge_dbg_igu_egu_reg[] = { - {true, "prt_id"}, - {true, "IGU_RX_ERR_PKT"}, - {true, "IGU_RX_NO_SOF_PKT"}, - {true, "EGU_TX_1588_SHORT_PKT"}, - {true, "EGU_TX_1588_PKT"}, - {true, "EGU_TX_ERR_PKT"}, - - {true, "IGU_RX_OUT_L2_PKT"}, - {true, "IGU_RX_OUT_L3_PKT"}, - {true, "IGU_RX_OUT_L4_PKT"}, - {true, "IGU_RX_IN_L2_PKT"}, - {true, "IGU_RX_IN_L3_PKT"}, - {true, "IGU_RX_IN_L4_PKT"}, - - {true, "IGU_RX_EL3E_PKT"}, - {true, "IGU_RX_EL4E_PKT"}, - {true, "IGU_RX_L3E_PKT"}, - {true, "IGU_RX_L4E_PKT"}, - {true, "IGU_RX_ROCEE_PKT"}, - {true, "IGU_RX_OUT_UDP0_PKT"}, - - {true, "IGU_RX_IN_UDP0_PKT"}, - {true, "IGU_MC_CAR_DROP_PKT_L"}, - {true, "IGU_MC_CAR_DROP_PKT_H"}, - {true, "IGU_BC_CAR_DROP_PKT_L"}, - {true, "IGU_BC_CAR_DROP_PKT_H"}, - {false, "Reserved"}, - - {true, "IGU_RX_OVERSIZE_PKT_L"}, - {true, "IGU_RX_OVERSIZE_PKT_H"}, - {true, "IGU_RX_UNDERSIZE_PKT_L"}, - {true, "IGU_RX_UNDERSIZE_PKT_H"}, - {true, "IGU_RX_OUT_ALL_PKT_L"}, - {true, "IGU_RX_OUT_ALL_PKT_H"}, - - {true, "IGU_TX_OUT_ALL_PKT_L"}, - {true, "IGU_TX_OUT_ALL_PKT_H"}, - {true, "IGU_RX_UNI_PKT_L"}, - {true, "IGU_RX_UNI_PKT_H"}, - {true, "IGU_RX_MULTI_PKT_L"}, - {true, "IGU_RX_MULTI_PKT_H"}, - - {true, "IGU_RX_BROAD_PKT_L"}, - {true, "IGU_RX_BROAD_PKT_H"}, - {true, "EGU_TX_OUT_ALL_PKT_L"}, - {true, "EGU_TX_OUT_ALL_PKT_H"}, - {true, "EGU_TX_UNI_PKT_L"}, - {true, "EGU_TX_UNI_PKT_H"}, - - {true, "EGU_TX_MULTI_PKT_L"}, - {true, "EGU_TX_MULTI_PKT_H"}, - {true, "EGU_TX_BROAD_PKT_L"}, - {true, "EGU_TX_BROAD_PKT_H"}, - {true, "IGU_TX_KEY_NUM_L"}, - {true, "IGU_TX_KEY_NUM_H"}, - - {true, "IGU_RX_NON_TUN_PKT_L"}, - {true, "IGU_RX_NON_TUN_PKT_H"}, - {true, "IGU_RX_TUN_PKT_L"}, - {true, "IGU_RX_TUN_PKT_H"}, - {false, "Reserved"}, - {false, "Reserved"}, -}; - -static const struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_0[] = { - {true, "tc_queue_num"}, - {true, "FSM_DFX_ST0"}, - {true, "FSM_DFX_ST1"}, - {true, "RPU_RX_PKT_DROP_CNT"}, - {true, "BUF_WAIT_TIMEOUT"}, - {true, "BUF_WAIT_TIMEOUT_QID"}, -}; - -static const struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_1[] = { - {false, "Reserved"}, - {true, "FIFO_DFX_ST0"}, - {true, "FIFO_DFX_ST1"}, - {true, "FIFO_DFX_ST2"}, - {true, "FIFO_DFX_ST3"}, - {true, "FIFO_DFX_ST4"}, - - {true, "FIFO_DFX_ST5"}, - {false, "Reserved"}, - {false, "Reserved"}, - {false, "Reserved"}, - {false, "Reserved"}, - {false, "Reserved"}, -}; - -static const struct hclge_dbg_dfx_message hclge_dbg_ncsi_reg[] = { - {false, "Reserved"}, - {true, "NCSI_EGU_TX_FIFO_STS"}, - {true, "NCSI_PAUSE_STATUS"}, - {true, "NCSI_RX_CTRL_DMAC_ERR_CNT"}, - {true, "NCSI_RX_CTRL_SMAC_ERR_CNT"}, - {true, "NCSI_RX_CTRL_CKS_ERR_CNT"}, - - {true, "NCSI_RX_CTRL_PKT_CNT"}, - {true, "NCSI_RX_PT_DMAC_ERR_CNT"}, - {true, "NCSI_RX_PT_SMAC_ERR_CNT"}, - {true, "NCSI_RX_PT_PKT_CNT"}, - {true, "NCSI_RX_FCS_ERR_CNT"}, - {true, "NCSI_TX_CTRL_DMAC_ERR_CNT"}, - - {true, "NCSI_TX_CTRL_SMAC_ERR_CNT"}, - {true, "NCSI_TX_CTRL_PKT_CNT"}, - {true, "NCSI_TX_PT_DMAC_ERR_CNT"}, - {true, "NCSI_TX_PT_SMAC_ERR_CNT"}, - {true, "NCSI_TX_PT_PKT_CNT"}, - {true, "NCSI_TX_PT_PKT_TRUNC_CNT"}, - - {true, "NCSI_TX_PT_PKT_ERR_CNT"}, - {true, "NCSI_TX_CTRL_PKT_ERR_CNT"}, - {true, "NCSI_RX_CTRL_PKT_TRUNC_CNT"}, - {true, "NCSI_RX_CTRL_PKT_CFLIT_CNT"}, - {false, "Reserved"}, - {false, "Reserved"}, - - {true, "NCSI_MAC_RX_OCTETS_OK"}, - {true, "NCSI_MAC_RX_OCTETS_BAD"}, - {true, "NCSI_MAC_RX_UC_PKTS"}, - {true, "NCSI_MAC_RX_MC_PKTS"}, - {true, "NCSI_MAC_RX_BC_PKTS"}, - {true, "NCSI_MAC_RX_PKTS_64OCTETS"}, - - {true, "NCSI_MAC_RX_PKTS_65TO127OCTETS"}, - {true, "NCSI_MAC_RX_PKTS_128TO255OCTETS"}, - {true, "NCSI_MAC_RX_PKTS_255TO511OCTETS"}, - {true, "NCSI_MAC_RX_PKTS_512TO1023OCTETS"}, - {true, "NCSI_MAC_RX_PKTS_1024TO1518OCTETS"}, - {true, "NCSI_MAC_RX_PKTS_1519TOMAXOCTETS"}, - - {true, "NCSI_MAC_RX_FCS_ERRORS"}, - {true, "NCSI_MAC_RX_LONG_ERRORS"}, - {true, "NCSI_MAC_RX_JABBER_ERRORS"}, - {true, "NCSI_MAC_RX_RUNT_ERR_CNT"}, - {true, "NCSI_MAC_RX_SHORT_ERR_CNT"}, - {true, "NCSI_MAC_RX_FILT_PKT_CNT"}, - - {true, "NCSI_MAC_RX_OCTETS_TOTAL_FILT"}, - {true, "NCSI_MAC_TX_OCTETS_OK"}, - {true, "NCSI_MAC_TX_OCTETS_BAD"}, - {true, "NCSI_MAC_TX_UC_PKTS"}, - {true, "NCSI_MAC_TX_MC_PKTS"}, - {true, "NCSI_MAC_TX_BC_PKTS"}, - - {true, "NCSI_MAC_TX_PKTS_64OCTETS"}, - {true, "NCSI_MAC_TX_PKTS_65TO127OCTETS"}, - {true, "NCSI_MAC_TX_PKTS_128TO255OCTETS"}, - {true, "NCSI_MAC_TX_PKTS_256TO511OCTETS"}, - {true, "NCSI_MAC_TX_PKTS_512TO1023OCTETS"}, - {true, "NCSI_MAC_TX_PKTS_1024TO1518OCTETS"}, - - {true, "NCSI_MAC_TX_PKTS_1519TOMAXOCTETS"}, - {true, "NCSI_MAC_TX_UNDERRUN"}, - {true, "NCSI_MAC_TX_CRC_ERROR"}, - {true, "NCSI_MAC_TX_PAUSE_FRAMES"}, - {true, "NCSI_MAC_RX_PAD_PKTS"}, - {true, "NCSI_MAC_RX_PAUSE_FRAMES"}, -}; - -static const struct hclge_dbg_dfx_message hclge_dbg_rtc_reg[] = { - {false, "Reserved"}, - {true, "LGE_IGU_AFIFO_DFX_0"}, - {true, "LGE_IGU_AFIFO_DFX_1"}, - {true, "LGE_IGU_AFIFO_DFX_2"}, - {true, "LGE_IGU_AFIFO_DFX_3"}, - {true, "LGE_IGU_AFIFO_DFX_4"}, - - {true, "LGE_IGU_AFIFO_DFX_5"}, - {true, "LGE_IGU_AFIFO_DFX_6"}, - {true, "LGE_IGU_AFIFO_DFX_7"}, - {true, "LGE_EGU_AFIFO_DFX_0"}, - {true, "LGE_EGU_AFIFO_DFX_1"}, - {true, "LGE_EGU_AFIFO_DFX_2"}, - - {true, "LGE_EGU_AFIFO_DFX_3"}, - {true, "LGE_EGU_AFIFO_DFX_4"}, - {true, "LGE_EGU_AFIFO_DFX_5"}, - {true, "LGE_EGU_AFIFO_DFX_6"}, - {true, "LGE_EGU_AFIFO_DFX_7"}, - {true, "CGE_IGU_AFIFO_DFX_0"}, - - {true, "CGE_IGU_AFIFO_DFX_1"}, - {true, "CGE_EGU_AFIFO_DFX_0"}, - {true, "CGE_EGU_AFIFO_DFX_1"}, - {false, "Reserved"}, - {false, "Reserved"}, - {false, "Reserved"}, -}; - -static const struct hclge_dbg_dfx_message hclge_dbg_ppp_reg[] = { - {false, "Reserved"}, - {true, "DROP_FROM_PRT_PKT_CNT"}, - {true, "DROP_FROM_HOST_PKT_CNT"}, - {true, "DROP_TX_VLAN_PROC_CNT"}, - {true, "DROP_MNG_CNT"}, - {true, "DROP_FD_CNT"}, - - {true, "DROP_NO_DST_CNT"}, - {true, "DROP_MC_MBID_FULL_CNT"}, - {true, "DROP_SC_FILTERED"}, - {true, "PPP_MC_DROP_PKT_CNT"}, - {true, "DROP_PT_CNT"}, - {true, "DROP_MAC_ANTI_SPOOF_CNT"}, - - {true, "DROP_IG_VFV_CNT"}, - {true, "DROP_IG_PRTV_CNT"}, - {true, "DROP_CNM_PFC_PAUSE_CNT"}, - {true, "DROP_TORUS_TC_CNT"}, - {true, "DROP_TORUS_LPBK_CNT"}, - {true, "PPP_HFS_STS"}, - - {true, "PPP_MC_RSLT_STS"}, - {true, "PPP_P3U_STS"}, - {true, "PPP_RSLT_DESCR_STS"}, - {true, "PPP_UMV_STS_0"}, - {true, "PPP_UMV_STS_1"}, - {true, "PPP_VFV_STS"}, - - {true, "PPP_GRO_KEY_CNT"}, - {true, "PPP_GRO_INFO_CNT"}, - {true, "PPP_GRO_DROP_CNT"}, - {true, "PPP_GRO_OUT_CNT"}, - {true, "PPP_GRO_KEY_MATCH_DATA_CNT"}, - {true, "PPP_GRO_KEY_MATCH_TCAM_CNT"}, - - {true, "PPP_GRO_INFO_MATCH_CNT"}, - {true, "PPP_GRO_FREE_ENTRY_CNT"}, - {true, "PPP_GRO_INNER_DFX_SIGNAL"}, - {false, "Reserved"}, - {false, "Reserved"}, - {false, "Reserved"}, - - {true, "GET_RX_PKT_CNT_L"}, - {true, "GET_RX_PKT_CNT_H"}, - {true, "GET_TX_PKT_CNT_L"}, - {true, "GET_TX_PKT_CNT_H"}, - {true, "SEND_UC_PRT2HOST_PKT_CNT_L"}, - {true, "SEND_UC_PRT2HOST_PKT_CNT_H"}, - - {true, "SEND_UC_PRT2PRT_PKT_CNT_L"}, - {true, "SEND_UC_PRT2PRT_PKT_CNT_H"}, - {true, "SEND_UC_HOST2HOST_PKT_CNT_L"}, - {true, "SEND_UC_HOST2HOST_PKT_CNT_H"}, - {true, "SEND_UC_HOST2PRT_PKT_CNT_L"}, - {true, "SEND_UC_HOST2PRT_PKT_CNT_H"}, - - {true, "SEND_MC_FROM_PRT_CNT_L"}, - {true, "SEND_MC_FROM_PRT_CNT_H"}, - {true, "SEND_MC_FROM_HOST_CNT_L"}, - {true, "SEND_MC_FROM_HOST_CNT_H"}, - {true, "SSU_MC_RD_CNT_L"}, - {true, "SSU_MC_RD_CNT_H"}, - - {true, "SSU_MC_DROP_CNT_L"}, - {true, "SSU_MC_DROP_CNT_H"}, - {true, "SSU_MC_RD_PKT_CNT_L"}, - {true, "SSU_MC_RD_PKT_CNT_H"}, - {true, "PPP_MC_2HOST_PKT_CNT_L"}, - {true, "PPP_MC_2HOST_PKT_CNT_H"}, - - {true, "PPP_MC_2PRT_PKT_CNT_L"}, - {true, "PPP_MC_2PRT_PKT_CNT_H"}, - {true, "NTSNOS_PKT_CNT_L"}, - {true, "NTSNOS_PKT_CNT_H"}, - {true, "NTUP_PKT_CNT_L"}, - {true, "NTUP_PKT_CNT_H"}, - - {true, "NTLCL_PKT_CNT_L"}, - {true, "NTLCL_PKT_CNT_H"}, - {true, "NTTGT_PKT_CNT_L"}, - {true, "NTTGT_PKT_CNT_H"}, - {true, "RTNS_PKT_CNT_L"}, - {true, "RTNS_PKT_CNT_H"}, - - {true, "RTLPBK_PKT_CNT_L"}, - {true, "RTLPBK_PKT_CNT_H"}, - {true, "NR_PKT_CNT_L"}, - {true, "NR_PKT_CNT_H"}, - {true, "RR_PKT_CNT_L"}, - {true, "RR_PKT_CNT_H"}, - - {true, "MNG_TBL_HIT_CNT_L"}, - {true, "MNG_TBL_HIT_CNT_H"}, - {true, "FD_TBL_HIT_CNT_L"}, - {true, "FD_TBL_HIT_CNT_H"}, - {true, "FD_LKUP_CNT_L"}, - {true, "FD_LKUP_CNT_H"}, - - {true, "BC_HIT_CNT_L"}, - {true, "BC_HIT_CNT_H"}, - {true, "UM_TBL_UC_HIT_CNT_L"}, - {true, "UM_TBL_UC_HIT_CNT_H"}, - {true, "UM_TBL_MC_HIT_CNT_L"}, - {true, "UM_TBL_MC_HIT_CNT_H"}, - - {true, "UM_TBL_VMDQ1_HIT_CNT_L"}, - {true, "UM_TBL_VMDQ1_HIT_CNT_H"}, - {true, "MTA_TBL_HIT_CNT_L"}, - {true, "MTA_TBL_HIT_CNT_H"}, - {true, "FWD_BONDING_HIT_CNT_L"}, - {true, "FWD_BONDING_HIT_CNT_H"}, - - {true, "PROMIS_TBL_HIT_CNT_L"}, - {true, "PROMIS_TBL_HIT_CNT_H"}, - {true, "GET_TUNL_PKT_CNT_L"}, - {true, "GET_TUNL_PKT_CNT_H"}, - {true, "GET_BMC_PKT_CNT_L"}, - {true, "GET_BMC_PKT_CNT_H"}, - - {true, "SEND_UC_PRT2BMC_PKT_CNT_L"}, - {true, "SEND_UC_PRT2BMC_PKT_CNT_H"}, - {true, "SEND_UC_HOST2BMC_PKT_CNT_L"}, - {true, "SEND_UC_HOST2BMC_PKT_CNT_H"}, - {true, "SEND_UC_BMC2HOST_PKT_CNT_L"}, - {true, "SEND_UC_BMC2HOST_PKT_CNT_H"}, - - {true, "SEND_UC_BMC2PRT_PKT_CNT_L"}, - {true, "SEND_UC_BMC2PRT_PKT_CNT_H"}, - {true, "PPP_MC_2BMC_PKT_CNT_L"}, - {true, "PPP_MC_2BMC_PKT_CNT_H"}, - {true, "VLAN_MIRR_CNT_L"}, - {true, "VLAN_MIRR_CNT_H"}, - - {true, "IG_MIRR_CNT_L"}, - {true, "IG_MIRR_CNT_H"}, - {true, "EG_MIRR_CNT_L"}, - {true, "EG_MIRR_CNT_H"}, - {true, "RX_DEFAULT_HOST_HIT_CNT_L"}, - {true, "RX_DEFAULT_HOST_HIT_CNT_H"}, - - {true, "LAN_PAIR_CNT_L"}, - {true, "LAN_PAIR_CNT_H"}, - {true, "UM_TBL_MC_HIT_PKT_CNT_L"}, - {true, "UM_TBL_MC_HIT_PKT_CNT_H"}, - {true, "MTA_TBL_HIT_PKT_CNT_L"}, - {true, "MTA_TBL_HIT_PKT_CNT_H"}, - - {true, "PROMIS_TBL_HIT_PKT_CNT_L"}, - {true, "PROMIS_TBL_HIT_PKT_CNT_H"}, - {false, "Reserved"}, - {false, "Reserved"}, - {false, "Reserved"}, - {false, "Reserved"}, -}; - -static const struct hclge_dbg_dfx_message hclge_dbg_rcb_reg[] = { - {false, "Reserved"}, - {true, "FSM_DFX_ST0"}, - {true, "FSM_DFX_ST1"}, - {true, "FSM_DFX_ST2"}, - {true, "FIFO_DFX_ST0"}, - {true, "FIFO_DFX_ST1"}, - - {true, "FIFO_DFX_ST2"}, - {true, "FIFO_DFX_ST3"}, - {true, "FIFO_DFX_ST4"}, - {true, "FIFO_DFX_ST5"}, - {true, "FIFO_DFX_ST6"}, - {true, "FIFO_DFX_ST7"}, - - {true, "FIFO_DFX_ST8"}, - {true, "FIFO_DFX_ST9"}, - {true, "FIFO_DFX_ST10"}, - {true, "FIFO_DFX_ST11"}, - {true, "Q_CREDIT_VLD_0"}, - {true, "Q_CREDIT_VLD_1"}, - - {true, "Q_CREDIT_VLD_2"}, - {true, "Q_CREDIT_VLD_3"}, - {true, "Q_CREDIT_VLD_4"}, - {true, "Q_CREDIT_VLD_5"}, - {true, "Q_CREDIT_VLD_6"}, - {true, "Q_CREDIT_VLD_7"}, - - {true, "Q_CREDIT_VLD_8"}, - {true, "Q_CREDIT_VLD_9"}, - {true, "Q_CREDIT_VLD_10"}, - {true, "Q_CREDIT_VLD_11"}, - {true, "Q_CREDIT_VLD_12"}, - {true, "Q_CREDIT_VLD_13"}, - - {true, "Q_CREDIT_VLD_14"}, - {true, "Q_CREDIT_VLD_15"}, - {true, "Q_CREDIT_VLD_16"}, - {true, "Q_CREDIT_VLD_17"}, - {true, "Q_CREDIT_VLD_18"}, - {true, "Q_CREDIT_VLD_19"}, - - {true, "Q_CREDIT_VLD_20"}, - {true, "Q_CREDIT_VLD_21"}, - {true, "Q_CREDIT_VLD_22"}, - {true, "Q_CREDIT_VLD_23"}, - {true, "Q_CREDIT_VLD_24"}, - {true, "Q_CREDIT_VLD_25"}, - - {true, "Q_CREDIT_VLD_26"}, - {true, "Q_CREDIT_VLD_27"}, - {true, "Q_CREDIT_VLD_28"}, - {true, "Q_CREDIT_VLD_29"}, - {true, "Q_CREDIT_VLD_30"}, - {true, "Q_CREDIT_VLD_31"}, - - {true, "GRO_BD_SERR_CNT"}, - {true, "GRO_CONTEXT_SERR_CNT"}, - {true, "RX_STASH_CFG_SERR_CNT"}, - {true, "AXI_RD_FBD_SERR_CNT"}, - {true, "GRO_BD_MERR_CNT"}, - {true, "GRO_CONTEXT_MERR_CNT"}, - - {true, "RX_STASH_CFG_MERR_CNT"}, - {true, "AXI_RD_FBD_MERR_CNT"}, - {false, "Reserved"}, - {false, "Reserved"}, - {false, "Reserved"}, - {false, "Reserved"}, -}; - -static const struct hclge_dbg_dfx_message hclge_dbg_tqp_reg[] = { - {true, "q_num"}, - {true, "RCB_CFG_RX_RING_TAIL"}, - {true, "RCB_CFG_RX_RING_HEAD"}, - {true, "RCB_CFG_RX_RING_FBDNUM"}, - {true, "RCB_CFG_RX_RING_OFFSET"}, - {true, "RCB_CFG_RX_RING_FBDOFFSET"}, - - {true, "RCB_CFG_RX_RING_PKTNUM_RECORD"}, - {true, "RCB_CFG_TX_RING_TAIL"}, - {true, "RCB_CFG_TX_RING_HEAD"}, - {true, "RCB_CFG_TX_RING_FBDNUM"}, - {true, "RCB_CFG_TX_RING_OFFSET"}, - {true, "RCB_CFG_TX_RING_EBDNUM"}, -}; - #define HCLGE_DBG_INFO_LEN 256 #define HCLGE_DBG_VLAN_FLTR_INFO_LEN 256 #define HCLGE_DBG_VLAN_OFFLOAD_INFO_LEN 512 -- cgit v1.2.3 From 8a4bda8cb9e43e1fae96c4c4aa94069f49dc3a68 Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Wed, 10 Apr 2024 20:53:53 +0800 Subject: net: hns3: dump more reg info based on ras mod When the driver received an interrupte for hardware error, it will try to restore by resetting. But the hardware registers will also be reset at this case, which make it hard to analysis why the hardware error occurs. This patch dumps these registers before resetting to help analyze the hardware error occurs. Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240410125354.2177067-4-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 4 + .../hisilicon/hns3/hns3_common/hclge_comm_cmd.c | 1 + .../hisilicon/hns3/hns3_common/hclge_comm_cmd.h | 2 + .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 6 +- .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h | 3 + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 433 ++++++++++++++++++++- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h | 36 ++ 7 files changed, 477 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index f19f1e1d1f9f..e9266c65b331 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -104,6 +104,7 @@ enum HNAE3_DEV_CAP_BITS { HNAE3_DEV_SUPPORT_WOL_B, HNAE3_DEV_SUPPORT_TM_FLUSH_B, HNAE3_DEV_SUPPORT_VF_FAULT_B, + HNAE3_DEV_SUPPORT_ERR_MOD_GEN_REG_B, }; #define hnae3_ae_dev_fd_supported(ae_dev) \ @@ -181,6 +182,9 @@ enum HNAE3_DEV_CAP_BITS { #define hnae3_ae_dev_vf_fault_supported(ae_dev) \ test_bit(HNAE3_DEV_SUPPORT_VF_FAULT_B, (ae_dev)->caps) +#define hnae3_ae_dev_gen_reg_dfx_supported(hdev) \ + test_bit(HNAE3_DEV_SUPPORT_ERR_MOD_GEN_REG_B, (hdev)->ae_dev->caps) + enum HNAE3_PF_CAP_BITS { HNAE3_PF_SUPPORT_VLAN_FLTR_MDF_B = 0, }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c index 11df6fbd641d..ea40b594dbac 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c @@ -158,6 +158,7 @@ static const struct hclge_comm_caps_bit_map hclge_pf_cmd_caps[] = { {HCLGE_COMM_CAP_WOL_B, HNAE3_DEV_SUPPORT_WOL_B}, {HCLGE_COMM_CAP_TM_FLUSH_B, HNAE3_DEV_SUPPORT_TM_FLUSH_B}, {HCLGE_COMM_CAP_VF_FAULT_B, HNAE3_DEV_SUPPORT_VF_FAULT_B}, + {HCLGE_COMM_CAP_ERR_MOD_GEN_REG_B, HNAE3_DEV_SUPPORT_ERR_MOD_GEN_REG_B}, }; static const struct hclge_comm_caps_bit_map hclge_vf_cmd_caps[] = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h index e6a087576df6..a2bc5a9adaa3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h @@ -91,6 +91,7 @@ enum hclge_opcode_type { HCLGE_OPC_DFX_RCB_REG = 0x004D, HCLGE_OPC_DFX_TQP_REG = 0x004E, HCLGE_OPC_DFX_SSU_REG_2 = 0x004F, + HCLGE_OPC_DFX_GEN_REG = 0x7038, HCLGE_OPC_QUERY_DEV_SPECS = 0x0050, HCLGE_OPC_GET_QUEUE_ERR_VF = 0x0067, @@ -353,6 +354,7 @@ enum HCLGE_COMM_CAP_BITS { HCLGE_COMM_CAP_LANE_NUM_B = 27, HCLGE_COMM_CAP_WOL_B = 28, HCLGE_COMM_CAP_TM_FLUSH_B = 31, + HCLGE_COMM_CAP_ERR_MOD_GEN_REG_B = 32, }; enum HCLGE_COMM_API_CAP_BITS { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 243be19a7557..debf143e9940 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -801,10 +801,8 @@ static int hclge_dbg_get_dfx_bd_num(struct hclge_dev *hdev, int offset, return 0; } -static int hclge_dbg_cmd_send(struct hclge_dev *hdev, - struct hclge_desc *desc_src, - int index, int bd_num, - enum hclge_opcode_type cmd) +int hclge_dbg_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc_src, + int index, int bd_num, enum hclge_opcode_type cmd) { struct hclge_desc *desc = desc_src; int ret, i; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h index 31a775fb032b..2b998cbed826 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h @@ -131,4 +131,7 @@ struct hclge_dbg_vlan_cfg { u8 pri_only2; }; +int hclge_dbg_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc_src, + int index, int bd_num, enum hclge_opcode_type cmd); + #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index d63e114f93d0..e132c2f09560 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1198,6 +1198,425 @@ static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = { } }; +static const struct hclge_mod_reg_info hclge_ssu_reg_0_info[] = { + { + .reg_name = "SSU_BP_STATUS_0~5", + .reg_offset_group = { 5, 6, 7, 8, 9, 10}, + .group_size = 6 + }, { + .reg_name = "LO_PRI_UNICAST_CUR_CNT", + .reg_offset_group = {54}, + .group_size = 1 + }, { + .reg_name = "HI/LO_PRI_MULTICAST_CUR_CNT", + .reg_offset_group = {55, 56}, + .group_size = 2 + }, { + .reg_name = "SSU_MB_RD_RLT_DROP_CNT", + .reg_offset_group = {29}, + .group_size = 1 + }, { + .reg_name = "SSU_PPP_MAC_KEY_NUM", + .reg_offset_group = {31, 30}, + .group_size = 2 + }, { + .reg_name = "SSU_PPP_HOST_KEY_NUM", + .reg_offset_group = {33, 32}, + .group_size = 2 + }, { + .reg_name = "PPP_SSU_MAC/HOST_RLT_NUM", + .reg_offset_group = {35, 34, 37, 36}, + .group_size = 4 + }, { + .reg_name = "FULL/PART_DROP_NUM", + .reg_offset_group = {18, 19}, + .group_size = 2 + }, { + .reg_name = "PPP_KEY/RLT_DROP_NUM", + .reg_offset_group = {20, 21}, + .group_size = 2 + }, { + .reg_name = "NIC/ROC_L2_ERR_DROP_PKT_CNT", + .reg_offset_group = {48, 49}, + .group_size = 2 + }, { + .reg_name = "NIC/ROC_L2_ERR_DROP_PKT_CNT_RX", + .reg_offset_group = {50, 51}, + .group_size = 2 + }, +}; + +static const struct hclge_mod_reg_info hclge_ssu_reg_1_info[] = { + { + .reg_name = "RX_PACKET_IN/OUT_CNT", + .reg_offset_group = {13, 12, 15, 14}, + .group_size = 4 + }, { + .reg_name = "TX_PACKET_IN/OUT_CNT", + .reg_offset_group = {17, 16, 19, 18}, + .group_size = 4 + }, { + .reg_name = "RX_PACKET_TC0_IN/OUT_CNT", + .reg_offset_group = {25, 24, 41, 40}, + .group_size = 4 + }, { + .reg_name = "RX_PACKET_TC1_IN/OUT_CNT", + .reg_offset_group = {27, 26, 43, 42}, + .group_size = 4 + }, { + .reg_name = "RX_PACKET_TC2_IN/OUT_CNT", + .reg_offset_group = {29, 28, 45, 44}, + .group_size = 4 + }, { + .reg_name = "RX_PACKET_TC3_IN/OUT_CNT", + .reg_offset_group = {31, 30, 47, 46}, + .group_size = 4 + }, { + .reg_name = "RX_PACKET_TC4_IN/OUT_CNT", + .reg_offset_group = {33, 32, 49, 48}, + .group_size = 4 + }, { + .reg_name = "RX_PACKET_TC5_IN/OUT_CNT", + .reg_offset_group = {35, 34, 51, 50}, + .group_size = 4 + }, { + .reg_name = "RX_PACKET_TC6_IN/OUT_CNT", + .reg_offset_group = {37, 36, 53, 52}, + .group_size = 4 + }, { + .reg_name = "RX_PACKET_TC7_IN/OUT_CNT", + .reg_offset_group = {39, 38, 55, 54}, + .group_size = 4 + }, { + .reg_name = "TX_PACKET_TC0_IN/OUT_CNT", + .reg_offset_group = {57, 56, 73, 72}, + .group_size = 4 + }, { + .reg_name = "TX_PACKET_TC1_IN/OUT_CNT", + .reg_offset_group = {59, 58, 75, 74}, + .group_size = 4 + }, { + .reg_name = "TX_PACKET_TC2_IN/OUT_CNT", + .reg_offset_group = {61, 60, 77, 76}, + .group_size = 4 + }, { + .reg_name = "TX_PACKET_TC3_IN/OUT_CNT", + .reg_offset_group = {63, 62, 79, 78}, + .group_size = 4 + }, { + .reg_name = "TX_PACKET_TC4_IN/OUT_CNT", + .reg_offset_group = {65, 64, 81, 80}, + .group_size = 4 + }, { + .reg_name = "TX_PACKET_TC5_IN/OUT_CNT", + .reg_offset_group = {67, 66, 83, 82}, + .group_size = 4 + }, { + .reg_name = "TX_PACKET_TC6_IN/OUT_CNT", + .reg_offset_group = {69, 68, 85, 84}, + .group_size = 4 + }, { + .reg_name = "TX_PACKET_TC7_IN/OUT_CNT", + .reg_offset_group = {71, 70, 87, 86}, + .group_size = 4 + }, { + .reg_name = "PACKET_TC0~3_CURR_BUFFER_CNT", + .reg_offset_group = {1, 2, 3, 4}, + .group_size = 4 + }, { + .reg_name = "PACKET_TC4~7_CURR_BUFFER_CNT", + .reg_offset_group = {5, 6, 7, 8}, + .group_size = 4 + }, { + .reg_name = "ROC_RX_PACKET_IN_CNT", + .reg_offset_group = {21, 20}, + .group_size = 2 + }, { + .reg_name = "ROC_TX_PACKET_OUT_CNT", + .reg_offset_group = {23, 22}, + .group_size = 2 + } +}; + +static const struct hclge_mod_reg_info hclge_rpu_reg_0_info[] = { + { + .reg_name = "RPU_FSM_DFX_ST0/ST1_TNL", + .has_suffix = true, + .reg_offset_group = {1, 2}, + .group_size = 2 + }, { + .reg_name = "RPU_RX_PKT_DROP_CNT_TNL", + .has_suffix = true, + .reg_offset_group = {3}, + .group_size = 1 + } +}; + +static const struct hclge_mod_reg_info hclge_rpu_reg_1_info[] = { + { + .reg_name = "FIFO_DFX_ST0_1_2_4", + .reg_offset_group = {1, 2, 3, 5}, + .group_size = 4 + } +}; + +static const struct hclge_mod_reg_info hclge_igu_egu_reg_info[] = { + { + .reg_name = "IGU_RX_ERR_PKT", + .reg_offset_group = {1}, + .group_size = 1 + }, { + .reg_name = "IGU_RX_OUT_ALL_PKT", + .reg_offset_group = {29, 28}, + .group_size = 2 + }, { + .reg_name = "EGU_TX_OUT_ALL_PKT", + .reg_offset_group = {39, 38}, + .group_size = 2 + }, { + .reg_name = "EGU_TX_ERR_PKT", + .reg_offset_group = {5}, + .group_size = 1 + } +}; + +static const struct hclge_mod_reg_info hclge_gen_reg_info_tnl[] = { + { + .reg_name = "SSU2RPU_TNL_WR_PKT_CNT_TNL", + .has_suffix = true, + .reg_offset_group = {1}, + .group_size = 1 + }, { + .reg_name = "RPU2HST_TNL_WR_PKT_CNT_TNL", + .has_suffix = true, + .reg_offset_group = {12}, + .group_size = 1 + } +}; + +static const struct hclge_mod_reg_info hclge_gen_reg_info[] = { + { + .reg_name = "SSU_OVERSIZE_DROP_CNT", + .reg_offset_group = {12}, + .group_size = 1 + }, { + .reg_name = "ROCE_RX_BYPASS_5NS_DROP_NUM", + .reg_offset_group = {13}, + .group_size = 1 + }, { + .reg_name = "RX_PKT_IN/OUT_ERR_CNT", + .reg_offset_group = {15, 14, 19, 18}, + .group_size = 4 + }, { + .reg_name = "TX_PKT_IN/OUT_ERR_CNT", + .reg_offset_group = {17, 16, 21, 20}, + .group_size = 4 + }, { + .reg_name = "ETS_TC_READY", + .reg_offset_group = {22}, + .group_size = 1 + }, { + .reg_name = "MIB_TX/RX_BAD_PKTS", + .reg_offset_group = {19, 18, 29, 28}, + .group_size = 4 + }, { + .reg_name = "MIB_TX/RX_GOOD_PKTS", + .reg_offset_group = {21, 20, 31, 30}, + .group_size = 4 + }, { + .reg_name = "MIB_TX/RX_TOTAL_PKTS", + .reg_offset_group = {23, 22, 33, 32}, + .group_size = 4 + }, { + .reg_name = "MIB_TX/RX_PAUSE_PKTS", + .reg_offset_group = {25, 24, 35, 34}, + .group_size = 4 + }, { + .reg_name = "MIB_TX_ERR_ALL_PKTS", + .reg_offset_group = {27, 26}, + .group_size = 2 + }, { + .reg_name = "MIB_RX_FCS_ERR_PKTS", + .reg_offset_group = {37, 36}, + .group_size = 2 + }, { + .reg_name = "IGU_EGU_AUTO_GATE_EN", + .reg_offset_group = {42}, + .group_size = 1 + }, { + .reg_name = "IGU_EGU_INT_SRC", + .reg_offset_group = {43}, + .group_size = 1 + }, { + .reg_name = "EGU_READY_NUM_CFG", + .reg_offset_group = {44}, + .group_size = 1 + }, { + .reg_name = "IGU_EGU_TNL_DFX", + .reg_offset_group = {45}, + .group_size = 1 + }, { + .reg_name = "TX_TNL_NOTE_PKT", + .reg_offset_group = {46}, + .group_size = 1 + } +}; + +static const struct hclge_mod_reg_common_msg hclge_ssu_reg_common_msg[] = { + { + .cmd = HCLGE_OPC_DFX_SSU_REG_0, + .result_regs = hclge_ssu_reg_0_info, + .bd_num = HCLGE_BD_NUM_SSU_REG_0, + .result_regs_size = ARRAY_SIZE(hclge_ssu_reg_0_info) + }, { + .cmd = HCLGE_OPC_DFX_SSU_REG_1, + .result_regs = hclge_ssu_reg_1_info, + .bd_num = HCLGE_BD_NUM_SSU_REG_1, + .result_regs_size = ARRAY_SIZE(hclge_ssu_reg_1_info) + }, { + .cmd = HCLGE_OPC_DFX_RPU_REG_0, + .result_regs = hclge_rpu_reg_0_info, + .bd_num = HCLGE_BD_NUM_RPU_REG_0, + .result_regs_size = ARRAY_SIZE(hclge_rpu_reg_0_info), + .need_para = true + }, { + .cmd = HCLGE_OPC_DFX_RPU_REG_1, + .result_regs = hclge_rpu_reg_1_info, + .bd_num = HCLGE_BD_NUM_RPU_REG_1, + .result_regs_size = ARRAY_SIZE(hclge_rpu_reg_1_info) + }, { + .cmd = HCLGE_OPC_DFX_IGU_EGU_REG, + .result_regs = hclge_igu_egu_reg_info, + .bd_num = HCLGE_BD_NUM_IGU_EGU_REG, + .result_regs_size = ARRAY_SIZE(hclge_igu_egu_reg_info) + }, { + .cmd = HCLGE_OPC_DFX_GEN_REG, + .result_regs = hclge_gen_reg_info_tnl, + .bd_num = HCLGE_BD_NUM_GEN_REG, + .result_regs_size = ARRAY_SIZE(hclge_gen_reg_info_tnl), + .need_para = true + }, { + .cmd = HCLGE_OPC_DFX_GEN_REG, + .result_regs = hclge_gen_reg_info, + .bd_num = HCLGE_BD_NUM_GEN_REG, + .result_regs_size = ARRAY_SIZE(hclge_gen_reg_info) + } +}; + +static int +hclge_print_mod_reg_info(struct device *dev, struct hclge_desc *desc, + const struct hclge_mod_reg_info *reg_info, int size) +{ + int i, j, pos, actual_len; + u8 offset, bd_idx, index; + char *buf; + + buf = kzalloc(HCLGE_MOD_REG_INFO_LEN_MAX, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (i = 0; i < size; i++) { + actual_len = strlen(reg_info[i].reg_name) + + HCLGE_MOD_REG_EXTRA_LEN + + HCLGE_MOD_REG_VALUE_LEN * reg_info[i].group_size; + if (actual_len > HCLGE_MOD_REG_INFO_LEN_MAX) { + dev_info(dev, "length of reg(%s) is invalid, len=%d\n", + reg_info[i].reg_name, actual_len); + continue; + } + + pos = scnprintf(buf, HCLGE_MOD_REG_INFO_LEN_MAX, "%s", + reg_info[i].reg_name); + if (reg_info[i].has_suffix) + pos += scnprintf(buf + pos, + HCLGE_MOD_REG_INFO_LEN_MAX - pos, "%u", + le32_to_cpu(desc->data[0])); + pos += scnprintf(buf + pos, + HCLGE_MOD_REG_INFO_LEN_MAX - pos, + ":"); + for (j = 0; j < reg_info[i].group_size; j++) { + offset = reg_info[i].reg_offset_group[j]; + index = offset % HCLGE_DESC_DATA_LEN; + bd_idx = offset / HCLGE_DESC_DATA_LEN; + pos += scnprintf(buf + pos, + HCLGE_MOD_REG_INFO_LEN_MAX - pos, + " %08x", + le32_to_cpu(desc[bd_idx].data[index])); + } + dev_info(dev, "%s\n", buf); + } + + kfree(buf); + return 0; +} + +static bool hclge_err_mod_check_support_cmd(enum hclge_opcode_type opcode, + struct hclge_dev *hdev) +{ + if (opcode == HCLGE_OPC_DFX_GEN_REG && + !hnae3_ae_dev_gen_reg_dfx_supported(hdev)) + return false; + return true; +} + +/* For each common msg, send cmdq to IMP and print result reg info. + * If there is a parameter, loop it and request. + */ +static void +hclge_query_reg_info(struct hclge_dev *hdev, + struct hclge_mod_reg_common_msg *msg, u32 loop_time, + u32 *loop_para) +{ + int desc_len, i, ret; + + desc_len = msg->bd_num * sizeof(struct hclge_desc); + msg->desc = kzalloc(desc_len, GFP_KERNEL); + if (!msg->desc) { + dev_err(&hdev->pdev->dev, "failed to query reg info, ret=%d", + -ENOMEM); + return; + } + + for (i = 0; i < loop_time; i++) { + ret = hclge_dbg_cmd_send(hdev, msg->desc, *loop_para, + msg->bd_num, msg->cmd); + loop_para++; + if (ret) + continue; + ret = hclge_print_mod_reg_info(&hdev->pdev->dev, msg->desc, + msg->result_regs, + msg->result_regs_size); + if (ret) + dev_err(&hdev->pdev->dev, "failed to print mod reg info, ret=%d\n", + ret); + } + + kfree(msg->desc); +} + +static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev) +{ + u32 loop_para[HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE] = {0}; + struct hclge_mod_reg_common_msg msg; + u8 i, j, num; + u32 loop_time; + + num = ARRAY_SIZE(hclge_ssu_reg_common_msg); + for (i = 0; i < num; i++) { + msg = hclge_ssu_reg_common_msg[i]; + if (!hclge_err_mod_check_support_cmd(msg.cmd, hdev)) + continue; + loop_time = 1; + loop_para[0] = 0; + if (msg.need_para) { + loop_time = hdev->ae_dev->dev_specs.tnl_num; + for (j = 0; j < loop_time; j++) + loop_para[j] = j + 1; + } + hclge_query_reg_info(hdev, &msg, loop_time, loop_para); + } +} + static const struct hclge_hw_module_id hclge_hw_module_id_st[] = { { .module_id = MODULE_NONE, @@ -1210,7 +1629,8 @@ static const struct hclge_hw_module_id hclge_hw_module_id_st[] = { .msg = "MODULE_GE" }, { .module_id = MODULE_IGU_EGU, - .msg = "MODULE_IGU_EGU" + .msg = "MODULE_IGU_EGU", + .query_reg_info = hclge_query_reg_info_of_ssu }, { .module_id = MODULE_LGE, .msg = "MODULE_LGE" @@ -1231,7 +1651,8 @@ static const struct hclge_hw_module_id hclge_hw_module_id_st[] = { .msg = "MODULE_RTC" }, { .module_id = MODULE_SSU, - .msg = "MODULE_SSU" + .msg = "MODULE_SSU", + .query_reg_info = hclge_query_reg_info_of_ssu }, { .module_id = MODULE_TM, .msg = "MODULE_TM" @@ -2762,7 +3183,7 @@ void hclge_handle_occurred_error(struct hclge_dev *hdev) } static bool -hclge_handle_error_type_reg_log(struct device *dev, +hclge_handle_error_type_reg_log(struct hclge_dev *hdev, struct hclge_mod_err_info *mod_info, struct hclge_type_reg_err_info *type_reg_info) { @@ -2770,6 +3191,7 @@ hclge_handle_error_type_reg_log(struct device *dev, #define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7 u8 mod_id, total_module, type_id, total_type, i, is_ras; + struct device *dev = &hdev->pdev->dev; u8 index_module = MODULE_NONE; u8 index_type = NONE_ERROR; bool cause_by_vf = false; @@ -2810,6 +3232,9 @@ hclge_handle_error_type_reg_log(struct device *dev, for (i = 0; i < type_reg_info->reg_num; i++) dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]); + if (hclge_hw_module_id_st[index_module].query_reg_info) + hclge_hw_module_id_st[index_module].query_reg_info(hdev); + return cause_by_vf; } @@ -2850,7 +3275,7 @@ static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev, type_reg_info = (struct hclge_type_reg_err_info *) &buf[offset++]; - if (hclge_handle_error_type_reg_log(dev, mod_info, + if (hclge_handle_error_type_reg_log(hdev, mod_info, type_reg_info)) cause_by_vf = true; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h index 68b738affa66..45a783a50643 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -5,6 +5,7 @@ #define __HCLGE_ERR_H #include "hclge_main.h" +#include "hclge_debugfs.h" #include "hnae3.h" #define HCLGE_MPF_RAS_INT_MIN_BD_NUM 10 @@ -115,6 +116,18 @@ #define HCLGE_REG_NUM_MAX 256 #define HCLGE_DESC_NO_DATA_LEN 8 +#define HCLGE_BD_NUM_SSU_REG_0 10 +#define HCLGE_BD_NUM_SSU_REG_1 15 +#define HCLGE_BD_NUM_RPU_REG_0 1 +#define HCLGE_BD_NUM_RPU_REG_1 2 +#define HCLGE_BD_NUM_IGU_EGU_REG 9 +#define HCLGE_BD_NUM_GEN_REG 8 +#define HCLGE_MOD_REG_INFO_LEN_MAX 256 +#define HCLGE_MOD_REG_EXTRA_LEN 11 +#define HCLGE_MOD_REG_VALUE_LEN 9 +#define HCLGE_MOD_REG_GROUP_MAX_SIZE 6 +#define HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE 8 + enum hclge_err_int_type { HCLGE_ERR_INT_MSIX = 0, HCLGE_ERR_INT_RAS_CE = 1, @@ -191,6 +204,7 @@ struct hclge_hw_error { struct hclge_hw_module_id { enum hclge_mod_name_list module_id; const char *msg; + void (*query_reg_info)(struct hclge_dev *hdev); }; struct hclge_hw_type_id { @@ -218,6 +232,28 @@ struct hclge_type_reg_err_info { u32 hclge_reg[HCLGE_REG_NUM_MAX]; }; +struct hclge_mod_reg_info { + const char *reg_name; + bool has_suffix; /* add suffix for register name */ + /* the positions of reg values in hclge_desc.data */ + u8 reg_offset_group[HCLGE_MOD_REG_GROUP_MAX_SIZE]; + u8 group_size; +}; + +/* This structure defines cmdq used to query the hardware module debug + * regisgers. + */ +struct hclge_mod_reg_common_msg { + enum hclge_opcode_type cmd; + struct hclge_desc *desc; + u8 bd_num; /* the bd number of hclge_desc used */ + bool need_para; /* whether this cmdq needs to add para */ + + /* the regs need to print */ + const struct hclge_mod_reg_info *result_regs; + u16 result_regs_size; +}; + int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en); int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state); int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en); -- cgit v1.2.3 From a1e5de0d07a3b2db047fe8ba0063d129672f979a Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Wed, 10 Apr 2024 20:53:54 +0800 Subject: net: hns3: add support to query scc version by devlink info Add support to query scc version by devlink info for device V3. Signed-off-by: Hao Chen Signed-off-by: Jijie Shao Link: https://lore.kernel.org/r/20240410125354.2177067-5-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- Documentation/networking/devlink/hns3.rst | 5 +++ drivers/net/ethernet/hisilicon/hns3/hnae3.h | 9 +++++ .../hisilicon/hns3/hns3_common/hclge_comm_cmd.h | 8 ++++ .../ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c | 44 ++++++++++++++++++++-- .../ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h | 2 + .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 18 +++++++++ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 1 + 7 files changed, 84 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/Documentation/networking/devlink/hns3.rst b/Documentation/networking/devlink/hns3.rst index 4562a6e4782f..72bc1b9f3785 100644 --- a/Documentation/networking/devlink/hns3.rst +++ b/Documentation/networking/devlink/hns3.rst @@ -23,3 +23,8 @@ The ``hns3`` driver reports the following versions * - ``fw`` - running - Used to represent the firmware version. + * - ``fw.scc`` + - running + - Used to represent the Soft Congestion Control (SSC) firmware version. + SCC is a firmware component which provides multiple RDMA congestion + control algorithms, including DCQCN. diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index e9266c65b331..7c2c8bea4c06 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -366,6 +366,15 @@ struct hnae3_vector_info { #define HNAE3_FW_VERSION_BYTE0_SHIFT 0 #define HNAE3_FW_VERSION_BYTE0_MASK GENMASK(7, 0) +#define HNAE3_SCC_VERSION_BYTE3_SHIFT 24 +#define HNAE3_SCC_VERSION_BYTE3_MASK GENMASK(31, 24) +#define HNAE3_SCC_VERSION_BYTE2_SHIFT 16 +#define HNAE3_SCC_VERSION_BYTE2_MASK GENMASK(23, 16) +#define HNAE3_SCC_VERSION_BYTE1_SHIFT 8 +#define HNAE3_SCC_VERSION_BYTE1_MASK GENMASK(15, 8) +#define HNAE3_SCC_VERSION_BYTE0_SHIFT 0 +#define HNAE3_SCC_VERSION_BYTE0_MASK GENMASK(7, 0) + struct hnae3_ring_chain_node { struct hnae3_ring_chain_node *next; u32 tqp_index; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h index a2bc5a9adaa3..2c2a2f1e0d7a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h @@ -247,6 +247,9 @@ enum hclge_opcode_type { HCLGE_OPC_QCN_AJUST_INIT = 0x1A07, HCLGE_OPC_QCN_DFX_CNT_STATUS = 0x1A08, + /* SCC commands */ + HCLGE_OPC_QUERY_SCC_VER = 0x1A84, + /* Mailbox command */ HCLGEVF_OPC_MBX_PF_TO_VF = 0x2000, HCLGEVF_OPC_MBX_VF_TO_PF = 0x2001, @@ -394,6 +397,11 @@ struct hclge_comm_query_version_cmd { __le32 caps[HCLGE_COMM_QUERY_CAP_LENGTH]; /* capabilities of device */ }; +struct hclge_comm_query_scc_cmd { + __le32 scc_version; + u8 rsv[20]; +}; + #define HCLGE_DESC_DATA_LEN 6 struct hclge_desc { __le16 opcode; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c index 9a939c0b217f..a1571c108678 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c @@ -5,6 +5,34 @@ #include "hclge_devlink.h" +static int hclge_devlink_scc_info_get(struct devlink *devlink, + struct devlink_info_req *req) +{ + struct hclge_devlink_priv *priv = devlink_priv(devlink); + char scc_version[HCLGE_DEVLINK_FW_SCC_LEN]; + struct hclge_dev *hdev = priv->hdev; + u32 scc_version_tmp; + int ret; + + ret = hclge_query_scc_version(hdev, &scc_version_tmp); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get scc version, ret = %d\n", ret); + return ret; + } + + snprintf(scc_version, sizeof(scc_version), "%lu.%lu.%lu.%lu", + hnae3_get_field(scc_version_tmp, HNAE3_SCC_VERSION_BYTE3_MASK, + HNAE3_FW_VERSION_BYTE3_SHIFT), + hnae3_get_field(scc_version_tmp, HNAE3_SCC_VERSION_BYTE2_MASK, + HNAE3_FW_VERSION_BYTE2_SHIFT), + hnae3_get_field(scc_version_tmp, HNAE3_SCC_VERSION_BYTE1_MASK, + HNAE3_FW_VERSION_BYTE1_SHIFT), + hnae3_get_field(scc_version_tmp, HNAE3_SCC_VERSION_BYTE0_MASK, + HNAE3_FW_VERSION_BYTE0_SHIFT)); + return devlink_info_version_running_put(req, "fw.scc", scc_version); +} + static int hclge_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack) @@ -13,6 +41,7 @@ static int hclge_devlink_info_get(struct devlink *devlink, struct hclge_devlink_priv *priv = devlink_priv(devlink); char version_str[HCLGE_DEVLINK_FW_STRING_LEN]; struct hclge_dev *hdev = priv->hdev; + int ret; snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu", hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK, @@ -24,9 +53,18 @@ static int hclge_devlink_info_get(struct devlink *devlink, hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK, HNAE3_FW_VERSION_BYTE0_SHIFT)); - return devlink_info_version_running_put(req, - DEVLINK_INFO_VERSION_GENERIC_FW, - version_str); + ret = devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + version_str); + if (ret) { + dev_err(&hdev->pdev->dev, "failed to set running version of fw\n"); + return ret; + } + + if (hdev->pdev->revision > HNAE3_DEVICE_VERSION_V2) + ret = hclge_devlink_scc_info_get(devlink, req); + + return ret; } static int hclge_devlink_reload_down(struct devlink *devlink, bool netns_change, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h index 918be04507a5..148effa5ea89 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h @@ -6,6 +6,8 @@ #include "hclge_main.h" +#define HCLGE_DEVLINK_FW_SCC_LEN 32 + struct hclge_devlink_priv { struct hclge_dev *hdev; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index eb31a7e9c8fc..28336cf17170 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -10883,6 +10883,24 @@ static u32 hclge_get_fw_version(struct hnae3_handle *handle) return hdev->fw_version; } +int hclge_query_scc_version(struct hclge_dev *hdev, u32 *scc_version) +{ + struct hclge_comm_query_scc_cmd *resp; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_SCC_VER, 1); + resp = (struct hclge_comm_query_scc_cmd *)desc.data; + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + return ret; + + *scc_version = le32_to_cpu(resp->scc_version); + + return 0; +} + static void hclge_set_flowctrl_adv(struct hclge_dev *hdev, u32 rx_en, u32 tx_en) { struct phy_device *phydev = hdev->hw.mac.phydev; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index e821dd2f1528..df3c10098349 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -1169,4 +1169,5 @@ int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en); int hclge_mac_update_stats(struct hclge_dev *hdev); struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf); int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type); +int hclge_query_scc_version(struct hclge_dev *hdev, u32 *scc_version); #endif -- cgit v1.2.3 From e36245dacd2cca4be716e4096b7dc6df9c8a7a6b Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Thu, 11 Apr 2024 10:52:55 +0000 Subject: nfp: flower: fix check for unsupported control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use flow_rule_is_supp_control_flags() Check the mask, not the key, for unsupported control flags. Only compile-tested, no access to HW Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Louis Peens Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/offload.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 0aceef9fe582..8e0a890381b6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -527,10 +527,10 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, struct flow_match_control ctl; flow_rule_match_control(rule, &ctl); - if (ctl.key->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on unknown control flag"); + + if (!flow_rule_is_supp_control_flags(NFP_FLOWER_SUPPORTED_CTLFLAGS, + ctl.mask->flags, extack)) return -EOPNOTSUPP; - } } ret_key_ls->key_layer = key_layer; -- cgit v1.2.3 From f8a5ea8c2a7f2ad65e0b3f4e5054b9ebfc87c25e Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Thu, 11 Apr 2024 10:52:56 +0000 Subject: net: prestera: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add check for unsupported control flags. Only compile-tested, no access to HW. Signed-off-by: Asbjørn Sloth Tønnesen Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/prestera/prestera_flower.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c index 8b9455d8a4f7..418101a93149 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c @@ -229,6 +229,10 @@ static int prestera_flower_parse(struct prestera_flow_block *block, flow_rule_match_control(f_rule, &match); addr_type = match.key->addr_type; + + if (flow_rule_has_control_flags(match.mask->flags, + f->common.extack)) + return -EOPNOTSUPP; } if (flow_rule_match_key(f_rule, FLOW_DISSECTOR_KEY_BASIC)) { -- cgit v1.2.3 From 4ca78e61ec708c1d679506e0f1c69e69b9de70f9 Mon Sep 17 00:00:00 2001 From: John Fraker Date: Thu, 11 Apr 2024 22:32:29 -0700 Subject: gve: Correctly report software timestamping capabilities gve has supported software timestamp generation since its inception, but has not advertised that support via ethtool. This patch correctly advertises that support. Signed-off-by: John Fraker Reviewed-by: Harshitha Ramamurthy Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 815dead31673..299206d15c73 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -784,5 +784,6 @@ const struct ethtool_ops gve_ethtool_ops = { .set_tunable = gve_set_tunable, .get_priv_flags = gve_get_priv_flags, .set_priv_flags = gve_set_priv_flags, - .get_link_ksettings = gve_get_link_ksettings + .get_link_ksettings = gve_get_link_ksettings, + .get_ts_info = ethtool_op_get_ts_info, }; -- cgit v1.2.3 From cd8ff81f747fdf5df75a634b9b90aef2716d84fd Mon Sep 17 00:00:00 2001 From: Julien Panis Date: Fri, 12 Apr 2024 17:38:32 +0200 Subject: net: ethernet: ti: Add accessors for struct k3_cppi_desc_pool members This patch adds accessors for desc_size and cpumem members. They may be used, for instance, to compute a descriptor index. Reviewed-by: Jacob Keller Signed-off-by: Julien Panis Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/k3-cppi-desc-pool.c | 12 ++++++++++++ drivers/net/ethernet/ti/k3-cppi-desc-pool.h | 2 ++ 2 files changed, 14 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/k3-cppi-desc-pool.c b/drivers/net/ethernet/ti/k3-cppi-desc-pool.c index 05cc7aab1ec8..414bcac9dcc6 100644 --- a/drivers/net/ethernet/ti/k3-cppi-desc-pool.c +++ b/drivers/net/ethernet/ti/k3-cppi-desc-pool.c @@ -132,5 +132,17 @@ size_t k3_cppi_desc_pool_avail(struct k3_cppi_desc_pool *pool) } EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_avail); +size_t k3_cppi_desc_pool_desc_size(const struct k3_cppi_desc_pool *pool) +{ + return pool->desc_size; +} +EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_desc_size); + +void *k3_cppi_desc_pool_cpuaddr(const struct k3_cppi_desc_pool *pool) +{ + return pool->cpumem; +} +EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_cpuaddr); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("TI K3 CPPI5 descriptors pool API"); diff --git a/drivers/net/ethernet/ti/k3-cppi-desc-pool.h b/drivers/net/ethernet/ti/k3-cppi-desc-pool.h index a7e3fa5e7b62..3c6aed0bed71 100644 --- a/drivers/net/ethernet/ti/k3-cppi-desc-pool.h +++ b/drivers/net/ethernet/ti/k3-cppi-desc-pool.h @@ -26,5 +26,7 @@ k3_cppi_desc_pool_dma2virt(struct k3_cppi_desc_pool *pool, dma_addr_t dma); void *k3_cppi_desc_pool_alloc(struct k3_cppi_desc_pool *pool); void k3_cppi_desc_pool_free(struct k3_cppi_desc_pool *pool, void *addr); size_t k3_cppi_desc_pool_avail(struct k3_cppi_desc_pool *pool); +size_t k3_cppi_desc_pool_desc_size(const struct k3_cppi_desc_pool *pool); +void *k3_cppi_desc_pool_cpuaddr(const struct k3_cppi_desc_pool *pool); #endif /* K3_CPPI_DESC_POOL_H_ */ -- cgit v1.2.3 From 84d767a3c0b5e6b7d13bcaa64405c0613138161f Mon Sep 17 00:00:00 2001 From: Julien Panis Date: Fri, 12 Apr 2024 17:38:33 +0200 Subject: net: ethernet: ti: Add desc_infos member to struct k3_cppi_desc_pool This patch introduces a member and the related accessors which can be used to store descriptor specific additional information. This member can store, for instance, an ID to differentiate a skb TX buffer type from a xdpf TX buffer type. Signed-off-by: Julien Panis Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/k3-cppi-desc-pool.c | 34 ++++++++++++++++++++++++----- drivers/net/ethernet/ti/k3-cppi-desc-pool.h | 4 ++++ 2 files changed, 33 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/k3-cppi-desc-pool.c b/drivers/net/ethernet/ti/k3-cppi-desc-pool.c index 414bcac9dcc6..739bae8e11ee 100644 --- a/drivers/net/ethernet/ti/k3-cppi-desc-pool.c +++ b/drivers/net/ethernet/ti/k3-cppi-desc-pool.c @@ -22,6 +22,7 @@ struct k3_cppi_desc_pool { size_t mem_size; size_t num_desc; struct gen_pool *gen_pool; + void **desc_infos; }; void k3_cppi_desc_pool_destroy(struct k3_cppi_desc_pool *pool) @@ -37,7 +38,11 @@ void k3_cppi_desc_pool_destroy(struct k3_cppi_desc_pool *pool) dma_free_coherent(pool->dev, pool->mem_size, pool->cpumem, pool->dma_addr); + kfree(pool->desc_infos); + gen_pool_destroy(pool->gen_pool); /* frees pool->name */ + + kfree(pool); } EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_destroy); @@ -50,7 +55,7 @@ k3_cppi_desc_pool_create_name(struct device *dev, size_t size, const char *pool_name = NULL; int ret = -ENOMEM; - pool = devm_kzalloc(dev, sizeof(*pool), GFP_KERNEL); + pool = kzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) return ERR_PTR(ret); @@ -62,18 +67,21 @@ k3_cppi_desc_pool_create_name(struct device *dev, size_t size, pool_name = kstrdup_const(name ? name : dev_name(pool->dev), GFP_KERNEL); if (!pool_name) - return ERR_PTR(-ENOMEM); + goto gen_pool_create_fail; pool->gen_pool = gen_pool_create(ilog2(pool->desc_size), -1); if (!pool->gen_pool) { - ret = -ENOMEM; - dev_err(pool->dev, "pool create failed %d\n", ret); kfree_const(pool_name); goto gen_pool_create_fail; } pool->gen_pool->name = pool_name; + pool->desc_infos = kcalloc(pool->num_desc, + sizeof(*pool->desc_infos), GFP_KERNEL); + if (!pool->desc_infos) + goto gen_pool_desc_infos_alloc_fail; + pool->cpumem = dma_alloc_coherent(pool->dev, pool->mem_size, &pool->dma_addr, GFP_KERNEL); @@ -94,9 +102,11 @@ gen_pool_add_virt_fail: dma_free_coherent(pool->dev, pool->mem_size, pool->cpumem, pool->dma_addr); dma_alloc_fail: + kfree(pool->desc_infos); +gen_pool_desc_infos_alloc_fail: gen_pool_destroy(pool->gen_pool); /* frees pool->name */ gen_pool_create_fail: - devm_kfree(pool->dev, pool); + kfree(pool); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_create_name); @@ -144,5 +154,19 @@ void *k3_cppi_desc_pool_cpuaddr(const struct k3_cppi_desc_pool *pool) } EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_cpuaddr); +void k3_cppi_desc_pool_desc_info_set(struct k3_cppi_desc_pool *pool, + int desc_idx, void *info) +{ + pool->desc_infos[desc_idx] = info; +} +EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_desc_info_set); + +void *k3_cppi_desc_pool_desc_info(const struct k3_cppi_desc_pool *pool, + int desc_idx) +{ + return pool->desc_infos[desc_idx]; +} +EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_desc_info); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("TI K3 CPPI5 descriptors pool API"); diff --git a/drivers/net/ethernet/ti/k3-cppi-desc-pool.h b/drivers/net/ethernet/ti/k3-cppi-desc-pool.h index 3c6aed0bed71..851d352b338b 100644 --- a/drivers/net/ethernet/ti/k3-cppi-desc-pool.h +++ b/drivers/net/ethernet/ti/k3-cppi-desc-pool.h @@ -28,5 +28,9 @@ void k3_cppi_desc_pool_free(struct k3_cppi_desc_pool *pool, void *addr); size_t k3_cppi_desc_pool_avail(struct k3_cppi_desc_pool *pool); size_t k3_cppi_desc_pool_desc_size(const struct k3_cppi_desc_pool *pool); void *k3_cppi_desc_pool_cpuaddr(const struct k3_cppi_desc_pool *pool); +void k3_cppi_desc_pool_desc_info_set(struct k3_cppi_desc_pool *pool, + int desc_idx, void *info); +void *k3_cppi_desc_pool_desc_info(const struct k3_cppi_desc_pool *pool, + int desc_idx); #endif /* K3_CPPI_DESC_POOL_H_ */ -- cgit v1.2.3 From 8acacc40f7337527ff84cd901ed2ef0a2b95b2b6 Mon Sep 17 00:00:00 2001 From: Julien Panis Date: Fri, 12 Apr 2024 17:38:34 +0200 Subject: net: ethernet: ti: am65-cpsw: Add minimal XDP support This patch adds XDP (eXpress Data Path) support to TI AM65 CPSW Ethernet driver. The following features are implemented: - NETDEV_XDP_ACT_BASIC (XDP_PASS, XDP_TX, XDP_DROP, XDP_ABORTED) - NETDEV_XDP_ACT_REDIRECT (XDP_REDIRECT) - NETDEV_XDP_ACT_NDO_XMIT (ndo_xdp_xmit callback) The page pool memory model is used to get better performance. Below are benchmark results obtained for the receiver with iperf3 default parameters: - Without page pool: 495 Mbits/sec - With page pool: 605 Mbits/sec (actually 610 Mbits/sec, with a 5 Mbits/sec loss due to extra processing in the hot path to handle XDP). Signed-off-by: Julien Panis Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 659 ++++++++++++++++++++++++++----- drivers/net/ethernet/ti/am65-cpsw-nuss.h | 13 + 2 files changed, 576 insertions(+), 96 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 2939a21ca74f..bfba883d4fc4 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -5,6 +5,7 @@ * */ +#include #include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include #include "cpsw_ale.h" @@ -138,6 +140,18 @@ #define AM65_CPSW_DEFAULT_TX_CHNS 8 +/* CPPI streaming packet interface */ +#define AM65_CPSW_CPPI_TX_FLOW_ID 0x3FFF +#define AM65_CPSW_CPPI_TX_PKT_TYPE 0x7 + +/* XDP */ +#define AM65_CPSW_XDP_CONSUMED 2 +#define AM65_CPSW_XDP_REDIRECT 1 +#define AM65_CPSW_XDP_PASS 0 + +/* Include headroom compatible with both skb and xdpf */ +#define AM65_CPSW_HEADROOM (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN) + static void am65_cpsw_port_set_sl_mac(struct am65_cpsw_port *slave, const u8 *dev_addr) { @@ -305,12 +319,11 @@ static void am65_cpsw_nuss_ndo_host_tx_timeout(struct net_device *ndev, } static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common, - struct sk_buff *skb) + struct page *page) { struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; struct cppi5_host_desc_t *desc_rx; struct device *dev = common->dev; - u32 pkt_len = skb_tailroom(skb); dma_addr_t desc_dma; dma_addr_t buf_dma; void *swdata; @@ -322,20 +335,22 @@ static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common, } desc_dma = k3_cppi_desc_pool_virt2dma(rx_chn->desc_pool, desc_rx); - buf_dma = dma_map_single(rx_chn->dma_dev, skb->data, pkt_len, - DMA_FROM_DEVICE); + buf_dma = dma_map_single(rx_chn->dma_dev, + page_address(page) + AM65_CPSW_HEADROOM, + AM65_CPSW_MAX_PACKET_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(rx_chn->dma_dev, buf_dma))) { k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - dev_err(dev, "Failed to map rx skb buffer\n"); + dev_err(dev, "Failed to map rx buffer\n"); return -EINVAL; } cppi5_hdesc_init(desc_rx, CPPI5_INFO0_HDESC_EPIB_PRESENT, AM65_CPSW_NAV_PS_DATA_SIZE); k3_udma_glue_rx_dma_to_cppi5_addr(rx_chn->rx_chn, &buf_dma); - cppi5_hdesc_attach_buf(desc_rx, buf_dma, skb_tailroom(skb), buf_dma, skb_tailroom(skb)); + cppi5_hdesc_attach_buf(desc_rx, buf_dma, AM65_CPSW_MAX_PACKET_SIZE, + buf_dma, AM65_CPSW_MAX_PACKET_SIZE); swdata = cppi5_hdesc_get_swdata(desc_rx); - *((void **)swdata) = skb; + *((void **)swdata) = page_address(page); return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, 0, desc_rx, desc_dma); } @@ -369,25 +384,131 @@ static void am65_cpsw_init_host_port_emac(struct am65_cpsw_common *common); static void am65_cpsw_init_port_switch_ale(struct am65_cpsw_port *port); static void am65_cpsw_init_port_emac_ale(struct am65_cpsw_port *port); +static void am65_cpsw_destroy_xdp_rxqs(struct am65_cpsw_common *common) +{ + struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; + struct xdp_rxq_info *rxq; + int i; + + for (i = 0; i < common->port_num; i++) { + rxq = &common->ports[i].xdp_rxq; + + if (xdp_rxq_info_is_reg(rxq)) + xdp_rxq_info_unreg(rxq); + } + + if (rx_chn->page_pool) { + page_pool_destroy(rx_chn->page_pool); + rx_chn->page_pool = NULL; + } +} + +static int am65_cpsw_create_xdp_rxqs(struct am65_cpsw_common *common) +{ + struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; + struct page_pool_params pp_params = { + .flags = PP_FLAG_DMA_MAP, + .order = 0, + .pool_size = AM65_CPSW_MAX_RX_DESC, + .nid = dev_to_node(common->dev), + .dev = common->dev, + .dma_dir = DMA_BIDIRECTIONAL, + .napi = &common->napi_rx, + }; + struct xdp_rxq_info *rxq; + struct page_pool *pool; + int i, ret; + + pool = page_pool_create(&pp_params); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + rx_chn->page_pool = pool; + + for (i = 0; i < common->port_num; i++) { + rxq = &common->ports[i].xdp_rxq; + + ret = xdp_rxq_info_reg(rxq, common->ports[i].ndev, i, 0); + if (ret) + goto err; + + ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool); + if (ret) + goto err; + } + + return 0; + +err: + am65_cpsw_destroy_xdp_rxqs(common); + return ret; +} + +static int am65_cpsw_nuss_desc_idx(struct k3_cppi_desc_pool *desc_pool, + void *desc, + unsigned char dsize_log2) +{ + void *pool_addr = k3_cppi_desc_pool_cpuaddr(desc_pool); + + return (desc - pool_addr) >> dsize_log2; +} + +static void am65_cpsw_nuss_set_buf_type(struct am65_cpsw_tx_chn *tx_chn, + struct cppi5_host_desc_t *desc, + enum am65_cpsw_tx_buf_type buf_type) +{ + int desc_idx; + + desc_idx = am65_cpsw_nuss_desc_idx(tx_chn->desc_pool, desc, + tx_chn->dsize_log2); + k3_cppi_desc_pool_desc_info_set(tx_chn->desc_pool, desc_idx, + (void *)buf_type); +} + +static enum am65_cpsw_tx_buf_type am65_cpsw_nuss_buf_type(struct am65_cpsw_tx_chn *tx_chn, + dma_addr_t desc_dma) +{ + struct cppi5_host_desc_t *desc_tx; + int desc_idx; + + desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); + desc_idx = am65_cpsw_nuss_desc_idx(tx_chn->desc_pool, desc_tx, + tx_chn->dsize_log2); + + return (enum am65_cpsw_tx_buf_type)k3_cppi_desc_pool_desc_info(tx_chn->desc_pool, + desc_idx); +} + +static inline void am65_cpsw_put_page(struct am65_cpsw_rx_chn *rx_chn, + struct page *page, + bool allow_direct, + int desc_idx) +{ + page_pool_put_full_page(rx_chn->page_pool, page, allow_direct); + rx_chn->pages[desc_idx] = NULL; +} + static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma) { struct am65_cpsw_rx_chn *rx_chn = data; struct cppi5_host_desc_t *desc_rx; - struct sk_buff *skb; dma_addr_t buf_dma; u32 buf_dma_len; + void *page_addr; void **swdata; + int desc_idx; desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_rx); - skb = *swdata; + page_addr = *swdata; cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); - dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - dev_kfree_skb_any(skb); + desc_idx = am65_cpsw_nuss_desc_idx(rx_chn->desc_pool, desc_rx, + rx_chn->dsize_log2); + am65_cpsw_put_page(rx_chn, virt_to_page(page_addr), false, desc_idx); } static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, @@ -440,12 +561,32 @@ static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma) dev_kfree_skb_any(skb); } +static struct sk_buff *am65_cpsw_build_skb(void *page_addr, + struct net_device *ndev, + unsigned int len) +{ + struct sk_buff *skb; + + len += AM65_CPSW_HEADROOM; + + skb = build_skb(page_addr, len); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, AM65_CPSW_HEADROOM); + skb->dev = ndev; + + return skb; +} + static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) { struct am65_cpsw_host *host_p = am65_common_get_host(common); + struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; + struct am65_cpsw_tx_chn *tx_chn = common->tx_chns; int port_idx, i, ret, tx; - struct sk_buff *skb; u32 val, port_mask; + struct page *page; if (common->usage_count) return 0; @@ -505,25 +646,29 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) am65_cpsw_qos_tx_p0_rate_init(common); - for (i = 0; i < common->rx_chns.descs_num; i++) { - skb = __netdev_alloc_skb_ip_align(NULL, - AM65_CPSW_MAX_PACKET_SIZE, - GFP_KERNEL); - if (!skb) { + ret = am65_cpsw_create_xdp_rxqs(common); + if (ret) { + dev_err(common->dev, "Failed to create XDP rx queues\n"); + return ret; + } + + for (i = 0; i < rx_chn->descs_num; i++) { + page = page_pool_dev_alloc_pages(rx_chn->page_pool); + if (!page) { ret = -ENOMEM; - dev_err(common->dev, "cannot allocate skb\n"); if (i) goto fail_rx; return ret; } + rx_chn->pages[i] = page; - ret = am65_cpsw_nuss_rx_push(common, skb); + ret = am65_cpsw_nuss_rx_push(common, page); if (ret < 0) { dev_err(common->dev, - "cannot submit skb to channel rx, error %d\n", + "cannot submit page to channel rx: %d\n", ret); - kfree_skb(skb); + am65_cpsw_put_page(rx_chn, page, false, i); if (i) goto fail_rx; @@ -531,27 +676,27 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) } } - ret = k3_udma_glue_enable_rx_chn(common->rx_chns.rx_chn); + ret = k3_udma_glue_enable_rx_chn(rx_chn->rx_chn); if (ret) { dev_err(common->dev, "couldn't enable rx chn: %d\n", ret); goto fail_rx; } for (tx = 0; tx < common->tx_ch_num; tx++) { - ret = k3_udma_glue_enable_tx_chn(common->tx_chns[tx].tx_chn); + ret = k3_udma_glue_enable_tx_chn(tx_chn[tx].tx_chn); if (ret) { dev_err(common->dev, "couldn't enable tx chn %d: %d\n", tx, ret); tx--; goto fail_tx; } - napi_enable(&common->tx_chns[tx].napi_tx); + napi_enable(&tx_chn[tx].napi_tx); } napi_enable(&common->napi_rx); if (common->rx_irq_disabled) { common->rx_irq_disabled = false; - enable_irq(common->rx_chns.irq); + enable_irq(rx_chn->irq); } dev_dbg(common->dev, "cpsw_nuss started\n"); @@ -559,22 +704,23 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) fail_tx: while (tx >= 0) { - napi_disable(&common->tx_chns[tx].napi_tx); - k3_udma_glue_disable_tx_chn(common->tx_chns[tx].tx_chn); + napi_disable(&tx_chn[tx].napi_tx); + k3_udma_glue_disable_tx_chn(tx_chn[tx].tx_chn); tx--; } - k3_udma_glue_disable_rx_chn(common->rx_chns.rx_chn); + k3_udma_glue_disable_rx_chn(rx_chn->rx_chn); fail_rx: - k3_udma_glue_reset_rx_chn(common->rx_chns.rx_chn, 0, - &common->rx_chns, + k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, 0, rx_chn, am65_cpsw_nuss_rx_cleanup, 0); return ret; } static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common) { + struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; + struct am65_cpsw_tx_chn *tx_chn = common->tx_chns; int i; if (common->usage_count != 1) @@ -590,26 +736,25 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common) reinit_completion(&common->tdown_complete); for (i = 0; i < common->tx_ch_num; i++) - k3_udma_glue_tdown_tx_chn(common->tx_chns[i].tx_chn, false); + k3_udma_glue_tdown_tx_chn(tx_chn[i].tx_chn, false); i = wait_for_completion_timeout(&common->tdown_complete, msecs_to_jiffies(1000)); if (!i) dev_err(common->dev, "tx timeout\n"); for (i = 0; i < common->tx_ch_num; i++) { - napi_disable(&common->tx_chns[i].napi_tx); - hrtimer_cancel(&common->tx_chns[i].tx_hrtimer); + napi_disable(&tx_chn[i].napi_tx); + hrtimer_cancel(&tx_chn[i].tx_hrtimer); } for (i = 0; i < common->tx_ch_num; i++) { - k3_udma_glue_reset_tx_chn(common->tx_chns[i].tx_chn, - &common->tx_chns[i], + k3_udma_glue_reset_tx_chn(tx_chn[i].tx_chn, &tx_chn[i], am65_cpsw_nuss_tx_cleanup); - k3_udma_glue_disable_tx_chn(common->tx_chns[i].tx_chn); + k3_udma_glue_disable_tx_chn(tx_chn[i].tx_chn); } reinit_completion(&common->tdown_complete); - k3_udma_glue_tdown_rx_chn(common->rx_chns.rx_chn, true); + k3_udma_glue_tdown_rx_chn(rx_chn->rx_chn, true); if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ) { i = wait_for_completion_timeout(&common->tdown_complete, msecs_to_jiffies(1000)); @@ -621,17 +766,22 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common) hrtimer_cancel(&common->rx_hrtimer); for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++) - k3_udma_glue_reset_rx_chn(common->rx_chns.rx_chn, i, - &common->rx_chns, + k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, rx_chn, am65_cpsw_nuss_rx_cleanup, !!i); - k3_udma_glue_disable_rx_chn(common->rx_chns.rx_chn); + k3_udma_glue_disable_rx_chn(rx_chn->rx_chn); cpsw_ale_stop(common->ale); writel(0, common->cpsw_base + AM65_CPSW_REG_CTL); writel(0, common->cpsw_base + AM65_CPSW_REG_STAT_PORT_EN); + for (i = 0; i < rx_chn->descs_num; i++) { + if (rx_chn->pages[i]) + am65_cpsw_put_page(rx_chn, rx_chn->pages[i], false, i); + } + am65_cpsw_destroy_xdp_rxqs(common); + dev_dbg(common->dev, "cpsw_nuss stopped\n"); return 0; } @@ -749,6 +899,151 @@ runtime_put: return ret; } +static int am65_cpsw_xdp_tx_frame(struct net_device *ndev, + struct am65_cpsw_tx_chn *tx_chn, + struct xdp_frame *xdpf, + enum am65_cpsw_tx_buf_type buf_type) +{ + struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct cppi5_host_desc_t *host_desc; + struct netdev_queue *netif_txq; + dma_addr_t dma_desc, dma_buf; + u32 pkt_len = xdpf->len; + void **swdata; + int ret; + + host_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); + if (unlikely(!host_desc)) { + ndev->stats.tx_dropped++; + return -ENOMEM; + } + + am65_cpsw_nuss_set_buf_type(tx_chn, host_desc, buf_type); + + dma_buf = dma_map_single(tx_chn->dma_dev, xdpf->data, + pkt_len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx_chn->dma_dev, dma_buf))) { + ndev->stats.tx_dropped++; + ret = -ENOMEM; + goto pool_free; + } + + cppi5_hdesc_init(host_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT, + AM65_CPSW_NAV_PS_DATA_SIZE); + cppi5_hdesc_set_pkttype(host_desc, AM65_CPSW_CPPI_TX_PKT_TYPE); + cppi5_hdesc_set_pktlen(host_desc, pkt_len); + cppi5_desc_set_pktids(&host_desc->hdr, 0, AM65_CPSW_CPPI_TX_FLOW_ID); + cppi5_desc_set_tags_ids(&host_desc->hdr, 0, port->port_id); + + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &dma_buf); + cppi5_hdesc_attach_buf(host_desc, dma_buf, pkt_len, dma_buf, pkt_len); + + swdata = cppi5_hdesc_get_swdata(host_desc); + *(swdata) = xdpf; + + /* Report BQL before sending the packet */ + netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); + netdev_tx_sent_queue(netif_txq, pkt_len); + + dma_desc = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, host_desc); + if (AM65_CPSW_IS_CPSW2G(common)) { + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, host_desc, + dma_desc); + } else { + spin_lock_bh(&tx_chn->lock); + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, host_desc, + dma_desc); + spin_unlock_bh(&tx_chn->lock); + } + if (ret) { + /* Inform BQL */ + netdev_tx_completed_queue(netif_txq, 1, pkt_len); + ndev->stats.tx_errors++; + goto dma_unmap; + } + + return 0; + +dma_unmap: + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &dma_buf); + dma_unmap_single(tx_chn->dma_dev, dma_buf, pkt_len, DMA_TO_DEVICE); +pool_free: + k3_cppi_desc_pool_free(tx_chn->desc_pool, host_desc); + return ret; +} + +static int am65_cpsw_run_xdp(struct am65_cpsw_common *common, + struct am65_cpsw_port *port, + struct xdp_buff *xdp, + int desc_idx, int cpu, int *len) +{ + struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; + struct net_device *ndev = port->ndev; + int ret = AM65_CPSW_XDP_CONSUMED; + struct am65_cpsw_tx_chn *tx_chn; + struct netdev_queue *netif_txq; + struct xdp_frame *xdpf; + struct bpf_prog *prog; + struct page *page; + u32 act; + + prog = READ_ONCE(port->xdp_prog); + if (!prog) + return AM65_CPSW_XDP_PASS; + + act = bpf_prog_run_xdp(prog, xdp); + /* XDP prog might have changed packet data and boundaries */ + *len = xdp->data_end - xdp->data; + + switch (act) { + case XDP_PASS: + ret = AM65_CPSW_XDP_PASS; + goto out; + case XDP_TX: + tx_chn = &common->tx_chns[cpu % AM65_CPSW_MAX_TX_QUEUES]; + netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); + + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) + break; + + __netif_tx_lock(netif_txq, cpu); + ret = am65_cpsw_xdp_tx_frame(ndev, tx_chn, xdpf, + AM65_CPSW_TX_BUF_TYPE_XDP_TX); + __netif_tx_unlock(netif_txq); + if (ret) + break; + + ndev->stats.rx_bytes += *len; + ndev->stats.rx_packets++; + ret = AM65_CPSW_XDP_CONSUMED; + goto out; + case XDP_REDIRECT: + if (unlikely(xdp_do_redirect(ndev, xdp, prog))) + break; + + ndev->stats.rx_bytes += *len; + ndev->stats.rx_packets++; + ret = AM65_CPSW_XDP_REDIRECT; + goto out; + default: + bpf_warn_invalid_xdp_action(ndev, prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(ndev, prog, act); + fallthrough; + case XDP_DROP: + ndev->stats.rx_dropped++; + } + + page = virt_to_head_page(xdp->data); + am65_cpsw_put_page(rx_chn, page, true, desc_idx); + +out: + return ret; +} + static void am65_cpsw_nuss_rx_ts(struct sk_buff *skb, u32 *psdata) { struct skb_shared_hwtstamps *ssh; @@ -795,7 +1090,7 @@ static void am65_cpsw_nuss_rx_csum(struct sk_buff *skb, u32 csum_info) } static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, - u32 flow_idx) + u32 flow_idx, int cpu) { struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns; u32 buf_dma_len, pkt_len, port_id = 0, csum_info; @@ -803,13 +1098,16 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, struct am65_cpsw_ndev_stats *stats; struct cppi5_host_desc_t *desc_rx; struct device *dev = common->dev; - struct sk_buff *skb, *new_skb; + struct page *page, *new_page; dma_addr_t desc_dma, buf_dma; struct am65_cpsw_port *port; + int headroom, desc_idx, ret; struct net_device *ndev; + struct sk_buff *skb; + struct xdp_buff xdp; + void *page_addr; void **swdata; u32 *psdata; - int ret = 0; ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_idx, &desc_dma); if (ret) { @@ -830,7 +1128,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, __func__, flow_idx, &desc_dma); swdata = cppi5_hdesc_get_swdata(desc_rx); - skb = *swdata; + page_addr = *swdata; + page = virt_to_page(page_addr); cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); pkt_len = cppi5_hdesc_get_pktlen(desc_rx); @@ -838,12 +1137,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, dev_dbg(dev, "%s rx port_id:%d\n", __func__, port_id); port = am65_common_get_port(common, port_id); ndev = port->ndev; - skb->dev = ndev; - psdata = cppi5_hdesc_get_psdata(desc_rx); - /* add RX timestamp */ - if (port->rx_ts_enabled) - am65_cpsw_nuss_rx_ts(skb, psdata); csum_info = psdata[2]; dev_dbg(dev, "%s rx csum_info:%#x\n", __func__, csum_info); @@ -851,36 +1145,66 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - new_skb = netdev_alloc_skb_ip_align(ndev, AM65_CPSW_MAX_PACKET_SIZE); - if (new_skb) { - ndev_priv = netdev_priv(ndev); - am65_cpsw_nuss_set_offload_fwd_mark(skb, ndev_priv->offload_fwd_mark); - skb_put(skb, pkt_len); - skb->protocol = eth_type_trans(skb, ndev); - am65_cpsw_nuss_rx_csum(skb, csum_info); - napi_gro_receive(&common->napi_rx, skb); - - stats = this_cpu_ptr(ndev_priv->stats); - - u64_stats_update_begin(&stats->syncp); - stats->rx_packets++; - stats->rx_bytes += pkt_len; - u64_stats_update_end(&stats->syncp); - kmemleak_not_leak(new_skb); - } else { - ndev->stats.rx_dropped++; - new_skb = skb; + desc_idx = am65_cpsw_nuss_desc_idx(rx_chn->desc_pool, desc_rx, + rx_chn->dsize_log2); + + skb = am65_cpsw_build_skb(page_addr, ndev, + AM65_CPSW_MAX_PACKET_SIZE); + if (unlikely(!skb)) { + new_page = page; + goto requeue; + } + + if (port->xdp_prog) { + xdp_init_buff(&xdp, AM65_CPSW_MAX_PACKET_SIZE, &port->xdp_rxq); + + xdp_prepare_buff(&xdp, page_addr, skb_headroom(skb), + pkt_len, false); + + ret = am65_cpsw_run_xdp(common, port, &xdp, desc_idx, + cpu, &pkt_len); + if (ret != AM65_CPSW_XDP_PASS) + return ret; + + /* Compute additional headroom to be reserved */ + headroom = (xdp.data - xdp.data_hard_start) - skb_headroom(skb); + skb_reserve(skb, headroom); } + /* Pass skb to netstack if no XDP prog or returned XDP_PASS */ + if (port->rx_ts_enabled) + am65_cpsw_nuss_rx_ts(skb, psdata); + + ndev_priv = netdev_priv(ndev); + am65_cpsw_nuss_set_offload_fwd_mark(skb, ndev_priv->offload_fwd_mark); + skb_put(skb, pkt_len); + skb_mark_for_recycle(skb); + skb->protocol = eth_type_trans(skb, ndev); + am65_cpsw_nuss_rx_csum(skb, csum_info); + napi_gro_receive(&common->napi_rx, skb); + + stats = this_cpu_ptr(ndev_priv->stats); + + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += pkt_len; + u64_stats_update_end(&stats->syncp); + + new_page = page_pool_dev_alloc_pages(rx_chn->page_pool); + if (unlikely(!new_page)) + return -ENOMEM; + rx_chn->pages[desc_idx] = new_page; + if (netif_dormant(ndev)) { - dev_kfree_skb_any(new_skb); + am65_cpsw_put_page(rx_chn, new_page, true, desc_idx); ndev->stats.rx_dropped++; return 0; } - ret = am65_cpsw_nuss_rx_push(common, new_skb); +requeue: + ret = am65_cpsw_nuss_rx_push(common, new_page); if (WARN_ON(ret < 0)) { - dev_kfree_skb_any(new_skb); + am65_cpsw_put_page(rx_chn, new_page, true, desc_idx); ndev->stats.rx_errors++; ndev->stats.rx_dropped++; } @@ -901,6 +1225,8 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) { struct am65_cpsw_common *common = am65_cpsw_napi_to_common(napi_rx); int flow = AM65_CPSW_MAX_RX_FLOWS; + int cpu = smp_processor_id(); + bool xdp_redirect = false; int cur_budget, ret; int num_rx = 0; @@ -909,9 +1235,12 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) cur_budget = budget - num_rx; while (cur_budget--) { - ret = am65_cpsw_nuss_rx_packets(common, flow); - if (ret) + ret = am65_cpsw_nuss_rx_packets(common, flow, cpu); + if (ret) { + if (ret == AM65_CPSW_XDP_REDIRECT) + xdp_redirect = true; break; + } num_rx++; } @@ -919,6 +1248,9 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) break; } + if (xdp_redirect) + xdp_do_flush(); + dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget); if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) { @@ -938,8 +1270,8 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) } static struct sk_buff * -am65_cpsw_nuss_tx_compl_packet(struct am65_cpsw_tx_chn *tx_chn, - dma_addr_t desc_dma) +am65_cpsw_nuss_tx_compl_packet_skb(struct am65_cpsw_tx_chn *tx_chn, + dma_addr_t desc_dma) { struct am65_cpsw_ndev_priv *ndev_priv; struct am65_cpsw_ndev_stats *stats; @@ -968,6 +1300,39 @@ am65_cpsw_nuss_tx_compl_packet(struct am65_cpsw_tx_chn *tx_chn, return skb; } +static struct xdp_frame * +am65_cpsw_nuss_tx_compl_packet_xdp(struct am65_cpsw_common *common, + struct am65_cpsw_tx_chn *tx_chn, + dma_addr_t desc_dma, + struct net_device **ndev) +{ + struct am65_cpsw_ndev_priv *ndev_priv; + struct am65_cpsw_ndev_stats *stats; + struct cppi5_host_desc_t *desc_tx; + struct am65_cpsw_port *port; + struct xdp_frame *xdpf; + u32 port_id = 0; + void **swdata; + + desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); + cppi5_desc_get_tags_ids(&desc_tx->hdr, NULL, &port_id); + swdata = cppi5_hdesc_get_swdata(desc_tx); + xdpf = *(swdata); + am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); + + port = am65_common_get_port(common, port_id); + *ndev = port->ndev; + + ndev_priv = netdev_priv(*ndev); + stats = this_cpu_ptr(ndev_priv->stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += xdpf->len; + u64_stats_update_end(&stats->syncp); + + return xdpf; +} + static void am65_cpsw_nuss_tx_wake(struct am65_cpsw_tx_chn *tx_chn, struct net_device *ndev, struct netdev_queue *netif_txq) { @@ -988,11 +1353,13 @@ static void am65_cpsw_nuss_tx_wake(struct am65_cpsw_tx_chn *tx_chn, struct net_d static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, int chn, unsigned int budget, bool *tdown) { + enum am65_cpsw_tx_buf_type buf_type; struct device *dev = common->dev; struct am65_cpsw_tx_chn *tx_chn; struct netdev_queue *netif_txq; unsigned int total_bytes = 0; struct net_device *ndev; + struct xdp_frame *xdpf; struct sk_buff *skb; dma_addr_t desc_dma; int res, num_tx = 0; @@ -1013,10 +1380,21 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, break; } - skb = am65_cpsw_nuss_tx_compl_packet(tx_chn, desc_dma); - total_bytes = skb->len; - ndev = skb->dev; - napi_consume_skb(skb, budget); + buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma); + if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) { + skb = am65_cpsw_nuss_tx_compl_packet_skb(tx_chn, desc_dma); + ndev = skb->dev; + total_bytes = skb->len; + napi_consume_skb(skb, budget); + } else { + xdpf = am65_cpsw_nuss_tx_compl_packet_xdp(common, tx_chn, + desc_dma, &ndev); + total_bytes = xdpf->len; + if (buf_type == AM65_CPSW_TX_BUF_TYPE_XDP_TX) + xdp_return_frame_rx_napi(xdpf); + else + xdp_return_frame(xdpf); + } num_tx++; netif_txq = netdev_get_tx_queue(ndev, chn); @@ -1034,11 +1412,13 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, static int am65_cpsw_nuss_tx_compl_packets_2g(struct am65_cpsw_common *common, int chn, unsigned int budget, bool *tdown) { + enum am65_cpsw_tx_buf_type buf_type; struct device *dev = common->dev; struct am65_cpsw_tx_chn *tx_chn; struct netdev_queue *netif_txq; unsigned int total_bytes = 0; struct net_device *ndev; + struct xdp_frame *xdpf; struct sk_buff *skb; dma_addr_t desc_dma; int res, num_tx = 0; @@ -1057,11 +1437,21 @@ static int am65_cpsw_nuss_tx_compl_packets_2g(struct am65_cpsw_common *common, break; } - skb = am65_cpsw_nuss_tx_compl_packet(tx_chn, desc_dma); - - ndev = skb->dev; - total_bytes += skb->len; - napi_consume_skb(skb, budget); + buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma); + if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) { + skb = am65_cpsw_nuss_tx_compl_packet_skb(tx_chn, desc_dma); + ndev = skb->dev; + total_bytes += skb->len; + napi_consume_skb(skb, budget); + } else { + xdpf = am65_cpsw_nuss_tx_compl_packet_xdp(common, tx_chn, + desc_dma, &ndev); + total_bytes += xdpf->len; + if (buf_type == AM65_CPSW_TX_BUF_TYPE_XDP_TX) + xdp_return_frame_rx_napi(xdpf); + else + xdp_return_frame(xdpf); + } num_tx++; } @@ -1183,10 +1573,13 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, goto busy_stop_q; } + am65_cpsw_nuss_set_buf_type(tx_chn, first_desc, + AM65_CPSW_TX_BUF_TYPE_SKB); + cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT, AM65_CPSW_NAV_PS_DATA_SIZE); - cppi5_desc_set_pktids(&first_desc->hdr, 0, 0x3FFF); - cppi5_hdesc_set_pkttype(first_desc, 0x7); + cppi5_desc_set_pktids(&first_desc->hdr, 0, AM65_CPSW_CPPI_TX_FLOW_ID); + cppi5_hdesc_set_pkttype(first_desc, AM65_CPSW_CPPI_TX_PKT_TYPE); cppi5_desc_set_tags_ids(&first_desc->hdr, 0, port->port_id); k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); @@ -1225,6 +1618,9 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, goto busy_free_descs; } + am65_cpsw_nuss_set_buf_type(tx_chn, next_desc, + AM65_CPSW_TX_BUF_TYPE_SKB); + buf_dma = skb_frag_dma_map(tx_chn->dma_dev, frag, 0, frag_size, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(tx_chn->dma_dev, buf_dma))) { @@ -1488,6 +1884,59 @@ static void am65_cpsw_nuss_ndo_get_stats(struct net_device *dev, stats->tx_dropped = dev->stats.tx_dropped; } +static int am65_cpsw_xdp_prog_setup(struct net_device *ndev, + struct bpf_prog *prog) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + bool running = netif_running(ndev); + struct bpf_prog *old_prog; + + if (running) + am65_cpsw_nuss_ndo_slave_stop(ndev); + + old_prog = xchg(&port->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + if (running) + return am65_cpsw_nuss_ndo_slave_open(ndev); + + return 0; +} + +static int am65_cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf) +{ + switch (bpf->command) { + case XDP_SETUP_PROG: + return am65_cpsw_xdp_prog_setup(ndev, bpf->prog); + default: + return -EINVAL; + } +} + +static int am65_cpsw_ndo_xdp_xmit(struct net_device *ndev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct am65_cpsw_tx_chn *tx_chn; + struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); + int i, nxmit = 0; + + tx_chn = &am65_ndev_to_common(ndev)->tx_chns[cpu % AM65_CPSW_MAX_TX_QUEUES]; + netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); + + __netif_tx_lock(netif_txq, cpu); + for (i = 0; i < n; i++) { + if (am65_cpsw_xdp_tx_frame(ndev, tx_chn, frames[i], + AM65_CPSW_TX_BUF_TYPE_XDP_NDO)) + break; + nxmit++; + } + __netif_tx_unlock(netif_txq); + + return nxmit; +} + static const struct net_device_ops am65_cpsw_nuss_netdev_ops = { .ndo_open = am65_cpsw_nuss_ndo_slave_open, .ndo_stop = am65_cpsw_nuss_ndo_slave_stop, @@ -1502,6 +1951,8 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = { .ndo_eth_ioctl = am65_cpsw_nuss_ndo_slave_ioctl, .ndo_setup_tc = am65_cpsw_qos_ndo_setup_tc, .ndo_set_tx_maxrate = am65_cpsw_qos_ndo_tx_p0_set_maxrate, + .ndo_bpf = am65_cpsw_ndo_bpf, + .ndo_xdp_xmit = am65_cpsw_ndo_xdp_xmit, }; static void am65_cpsw_disable_phy(struct phy *phy) @@ -1772,7 +2223,7 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) .mode = K3_RINGACC_RING_MODE_RING, .flags = 0 }; - u32 hdesc_size; + u32 hdesc_size, hdesc_size_out; int i, ret = 0; hdesc_size = cppi5_hdesc_calc_size(true, AM65_CPSW_NAV_PS_DATA_SIZE, @@ -1816,6 +2267,10 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) goto err; } + hdesc_size_out = k3_cppi_desc_pool_desc_size(tx_chn->desc_pool); + tx_chn->dsize_log2 = __fls(hdesc_size_out); + WARN_ON(hdesc_size_out != (1 << tx_chn->dsize_log2)); + tx_chn->irq = k3_udma_glue_tx_get_irq(tx_chn->tx_chn); if (tx_chn->irq < 0) { dev_err(dev, "Failed to get tx dma irq %d\n", @@ -1862,8 +2317,8 @@ static void am65_cpsw_nuss_free_rx_chns(void *data) static void am65_cpsw_nuss_remove_rx_chns(void *data) { struct am65_cpsw_common *common = data; - struct am65_cpsw_rx_chn *rx_chn; struct device *dev = common->dev; + struct am65_cpsw_rx_chn *rx_chn; rx_chn = &common->rx_chns; devm_remove_action(dev, am65_cpsw_nuss_free_rx_chns, common); @@ -1873,11 +2328,7 @@ static void am65_cpsw_nuss_remove_rx_chns(void *data) netif_napi_del(&common->napi_rx); - if (!IS_ERR_OR_NULL(rx_chn->desc_pool)) - k3_cppi_desc_pool_destroy(rx_chn->desc_pool); - - if (!IS_ERR_OR_NULL(rx_chn->rx_chn)) - k3_udma_glue_release_rx_chn(rx_chn->rx_chn); + am65_cpsw_nuss_free_rx_chns(common); common->rx_flow_id_base = -1; } @@ -1888,7 +2339,7 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) struct k3_udma_glue_rx_channel_cfg rx_cfg = { 0 }; u32 max_desc_num = AM65_CPSW_MAX_RX_DESC; struct device *dev = common->dev; - u32 hdesc_size; + u32 hdesc_size, hdesc_size_out; u32 fdqring_id; int i, ret = 0; @@ -1920,6 +2371,17 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) goto err; } + hdesc_size_out = k3_cppi_desc_pool_desc_size(rx_chn->desc_pool); + rx_chn->dsize_log2 = __fls(hdesc_size_out); + WARN_ON(hdesc_size_out != (1 << rx_chn->dsize_log2)); + + rx_chn->page_pool = NULL; + + rx_chn->pages = devm_kcalloc(dev, rx_chn->descs_num, + sizeof(*rx_chn->pages), GFP_KERNEL); + if (!rx_chn->pages) + return -ENOMEM; + common->rx_flow_id_base = k3_udma_glue_rx_get_flow_id_base(rx_chn->rx_chn); dev_info(dev, "set new flow-id-base %u\n", common->rx_flow_id_base); @@ -2252,6 +2714,9 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx) NETIF_F_HW_TC; port->ndev->features = port->ndev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; + port->ndev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; port->ndev->vlan_features |= NETIF_F_SG; port->ndev->netdev_ops = &am65_cpsw_nuss_netdev_ops; port->ndev->ethtool_ops = &am65_cpsw_ethtool_ops_slave; @@ -2315,6 +2780,8 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx) if (ret) dev_err(dev, "failed to add percpu stat free action %d\n", ret); + port->xdp_prog = NULL; + if (!common->dma_ndev) common->dma_ndev = port->ndev; @@ -2940,9 +3407,9 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) struct device_node *node; struct resource *res; struct clk *clk; + int ale_entries; u64 id_temp; int ret, i; - int ale_entries; common = devm_kzalloc(dev, sizeof(struct am65_cpsw_common), GFP_KERNEL); if (!common) @@ -3154,10 +3621,10 @@ static int am65_cpsw_nuss_suspend(struct device *dev) static int am65_cpsw_nuss_resume(struct device *dev) { struct am65_cpsw_common *common = dev_get_drvdata(dev); + struct am65_cpsw_host *host_p = am65_common_get_host(common); struct am65_cpsw_port *port; struct net_device *ndev; int i, ret; - struct am65_cpsw_host *host_p = am65_common_get_host(common); ret = am65_cpsw_nuss_init_tx_chns(common); if (ret) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 7da0492dc091..d8ce88dc9c89 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -14,6 +14,7 @@ #include #include #include +#include #include "am65-cpsw-qos.h" struct am65_cpts; @@ -56,10 +57,18 @@ struct am65_cpsw_port { bool rx_ts_enabled; struct am65_cpsw_qos qos; struct devlink_port devlink_port; + struct bpf_prog *xdp_prog; + struct xdp_rxq_info xdp_rxq; /* Only for suspend resume context */ u32 vid_context; }; +enum am65_cpsw_tx_buf_type { + AM65_CPSW_TX_BUF_TYPE_SKB, + AM65_CPSW_TX_BUF_TYPE_XDP_TX, + AM65_CPSW_TX_BUF_TYPE_XDP_NDO, +}; + struct am65_cpsw_host { struct am65_cpsw_common *common; void __iomem *port_base; @@ -80,6 +89,7 @@ struct am65_cpsw_tx_chn { int irq; u32 id; u32 descs_num; + unsigned char dsize_log2; char tx_chn_name[128]; u32 rate_mbps; }; @@ -89,7 +99,10 @@ struct am65_cpsw_rx_chn { struct device *dma_dev; struct k3_cppi_desc_pool *desc_pool; struct k3_udma_glue_rx_channel *rx_chn; + struct page_pool *page_pool; + struct page **pages; u32 descs_num; + unsigned char dsize_log2; int irq; }; -- cgit v1.2.3 From 57b60ec4b30df188ca31bdd95971a6ef5dfc5094 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 11 Apr 2024 20:22:32 +0300 Subject: gpiolib: acpi: Pass con_id instead of property into acpi_dev_gpio_irq_get_by() Pass the con_id instead of property so that callers won't repeat the GPIO suffixes to try. Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko --- drivers/gpio/gpio-pca953x.c | 2 +- drivers/gpio/gpiolib-acpi.c | 11 +++++------ drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 2 +- drivers/pinctrl/pinctrl-cy8c95x0.c | 2 +- include/linux/acpi.h | 8 ++++---- 5 files changed, 12 insertions(+), 13 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 00ffa168e405..77a2812f2974 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -144,7 +144,7 @@ static int pca953x_acpi_get_irq(struct device *dev) if (ret) dev_warn(dev, "can't add GPIO ACPI mapping\n"); - ret = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(dev), "irq-gpios", 0); + ret = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(dev), "irq", 0); if (ret < 0) return ret; diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 2b3fd43b13fc..909113312a1b 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1013,7 +1013,7 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, /** * acpi_dev_gpio_irq_wake_get_by() - Find GpioInt and translate it to Linux IRQ number * @adev: pointer to a ACPI device to get IRQ from - * @name: optional name of GpioInt resource + * @con_id: optional name of GpioInt resource * @index: index of GpioInt resource (starting from %0) * @wake_capable: Set to true if the IRQ is wake capable * @@ -1024,15 +1024,15 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, * The function is idempotent, though each time it runs it will configure GPIO * pin direction according to the flags in GpioInt resource. * - * The function takes optional @name parameter. If the resource has a property - * name, then only those will be taken into account. + * The function takes optional @con_id parameter. If the resource has + * a @con_id in a property, then only those will be taken into account. * * The GPIO is considered wake capable if the GpioInt resource specifies * SharedAndWake or ExclusiveAndWake. * * Return: Linux IRQ number (> %0) on success, negative errno on failure. */ -int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, int index, +int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *con_id, int index, bool *wake_capable) { int idx, i; @@ -1043,9 +1043,8 @@ int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, in struct acpi_gpio_info info; struct gpio_desc *desc; - desc = acpi_get_gpiod_by_index(adev, name, i, &info); - /* Ignore -EPROBE_DEFER, it only matters if idx matches */ + desc = __acpi_find_gpio(acpi_fwnode_handle(adev), con_id, i, true, &info); if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER) return PTR_ERR(desc); diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 3d09fa54598f..28a0016f1a55 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -428,7 +428,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) priv->rx_irq = platform_get_irq(pdev, MLXBF_GIGE_RECEIVE_PKT_INTR_IDX); priv->llu_plu_irq = platform_get_irq(pdev, MLXBF_GIGE_LLU_PLU_INTR_IDX); - phy_irq = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(&pdev->dev), "phy-gpios", 0); + phy_irq = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(&pdev->dev), "phy", 0); if (phy_irq < 0) { dev_err(&pdev->dev, "Error getting PHY irq. Use polling instead"); phy_irq = PHY_POLL; diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index 67b5d160c027..981c569bd671 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c @@ -95,7 +95,7 @@ static int cy8c95x0_acpi_get_irq(struct device *dev) if (ret) dev_warn(dev, "can't add GPIO ACPI mapping\n"); - ret = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(dev), "irq-gpios", 0); + ret = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(dev), "irq", 0); if (ret < 0) return ret; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 34829f2c517a..35aff121f418 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1233,7 +1233,7 @@ bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); bool acpi_gpio_get_io_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); -int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, int index, +int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *con_id, int index, bool *wake_capable); #else static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, @@ -1246,7 +1246,7 @@ static inline bool acpi_gpio_get_io_resource(struct acpi_resource *ares, { return false; } -static inline int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, +static inline int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *con_id, int index, bool *wake_capable) { return -ENXIO; @@ -1259,10 +1259,10 @@ static inline int acpi_dev_gpio_irq_wake_get(struct acpi_device *adev, int index return acpi_dev_gpio_irq_wake_get_by(adev, NULL, index, wake_capable); } -static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, +static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *con_id, int index) { - return acpi_dev_gpio_irq_wake_get_by(adev, name, index, NULL); + return acpi_dev_gpio_irq_wake_get_by(adev, con_id, index, NULL); } static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) -- cgit v1.2.3 From cc2a9d6c03b804c301447326aff4cf2359867f9c Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Thu, 4 Apr 2024 16:04:51 +0200 Subject: ice: Add automatic VF reset on Tx MDD events In cases when VF sends malformed packets that are classified as malicious, it can cause Tx queue to freeze as a result of Malicious Driver Detection event. Such malformed packets can appear as a result of a faulty userspace app running on VF. This frozen queue can be stuck for several minutes being unusable. User might prefer to immediately bring the VF back to operational state after such event, which can be done by automatically resetting the VF which caused MDD. This is already implemented for Rx events (mdd-auto-reset-vf flag private flag needs to be set). Extend the VF auto reset to also cover Tx MDD events. When any MDD event occurs on VF (Tx or Rx) and the mdd-auto-reset-vf private flag is set, perform a graceful VF reset to quickly bring it back to operational state. Reviewed-by: Wojciech Drewek Reviewed-by: Przemek Kitszel Co-developed-by: Liang-Min Wang Signed-off-by: Liang-Min Wang Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 57 ++++++++++++++++++++++++------ drivers/net/ethernet/intel/ice/ice_sriov.c | 25 +++++++++---- drivers/net/ethernet/intel/ice/ice_sriov.h | 2 ++ 3 files changed, 67 insertions(+), 17 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 9d751954782c..2b173638b877 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1745,6 +1745,39 @@ static void ice_service_timer(struct timer_list *t) ice_service_task_schedule(pf); } +/** + * ice_mdd_maybe_reset_vf - reset VF after MDD event + * @pf: pointer to the PF structure + * @vf: pointer to the VF structure + * @reset_vf_tx: whether Tx MDD has occurred + * @reset_vf_rx: whether Rx MDD has occurred + * + * Since the queue can get stuck on VF MDD events, the PF can be configured to + * automatically reset the VF by enabling the private ethtool flag + * mdd-auto-reset-vf. + */ +static void ice_mdd_maybe_reset_vf(struct ice_pf *pf, struct ice_vf *vf, + bool reset_vf_tx, bool reset_vf_rx) +{ + struct device *dev = ice_pf_to_dev(pf); + + if (!test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) + return; + + /* VF MDD event counters will be cleared by reset, so print the event + * prior to reset. + */ + if (reset_vf_tx) + ice_print_vf_tx_mdd_event(vf); + + if (reset_vf_rx) + ice_print_vf_rx_mdd_event(vf); + + dev_info(dev, "PF-to-VF reset on PF %d VF %d due to MDD event\n", + pf->hw.pf_id, vf->vf_id); + ice_reset_vf(vf, ICE_VF_RESET_NOTIFY | ICE_VF_RESET_LOCK); +} + /** * ice_handle_mdd_event - handle malicious driver detect event * @pf: pointer to the PF structure @@ -1838,6 +1871,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) */ mutex_lock(&pf->vfs.table_lock); ice_for_each_vf(pf, bkt, vf) { + bool reset_vf_tx = false, reset_vf_rx = false; + reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id)); if (reg & VP_MDET_TX_PQM_VALID_M) { wr32(hw, VP_MDET_TX_PQM(vf->vf_id), 0xFFFF); @@ -1846,6 +1881,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n", vf->vf_id); + + reset_vf_tx = true; } reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id)); @@ -1856,6 +1893,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n", vf->vf_id); + + reset_vf_tx = true; } reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id)); @@ -1866,6 +1905,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n", vf->vf_id); + + reset_vf_tx = true; } reg = rd32(hw, VP_MDET_RX(vf->vf_id)); @@ -1877,18 +1918,12 @@ static void ice_handle_mdd_event(struct ice_pf *pf) dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n", vf->vf_id); - /* Since the queue is disabled on VF Rx MDD events, the - * PF can be configured to reset the VF through ethtool - * private flag mdd-auto-reset-vf. - */ - if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) { - /* VF MDD event counters will be cleared by - * reset, so print the event prior to reset. - */ - ice_print_vf_rx_mdd_event(vf); - ice_reset_vf(vf, ICE_VF_RESET_LOCK); - } + reset_vf_rx = true; } + + if (reset_vf_tx || reset_vf_rx) + ice_mdd_maybe_reset_vf(pf, vf, reset_vf_tx, + reset_vf_rx); } mutex_unlock(&pf->vfs.table_lock); diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index fb2e96db647e..a60dacf8942a 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1861,6 +1861,24 @@ void ice_print_vf_rx_mdd_event(struct ice_vf *vf) ? "on" : "off"); } +/** + * ice_print_vf_tx_mdd_event - print VF Tx malicious driver detect event + * @vf: pointer to the VF structure + */ +void ice_print_vf_tx_mdd_event(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + struct device *dev; + + dev = ice_pf_to_dev(pf); + + dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n", + vf->mdd_tx_events.count, pf->hw.pf_id, vf->vf_id, + vf->dev_lan_addr, + test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags) + ? "on" : "off"); +} + /** * ice_print_vfs_mdd_events - print VFs malicious driver detect event * @pf: pointer to the PF structure @@ -1869,8 +1887,6 @@ void ice_print_vf_rx_mdd_event(struct ice_vf *vf) */ void ice_print_vfs_mdd_events(struct ice_pf *pf) { - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; struct ice_vf *vf; unsigned int bkt; @@ -1897,10 +1913,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) { vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count; - - dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n", - vf->mdd_tx_events.count, hw->pf_id, vf->vf_id, - vf->dev_lan_addr); + ice_print_vf_tx_mdd_event(vf); } } mutex_unlock(&pf->vfs.table_lock); diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h index 4ba8fb53aea1..8f22313474d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.h +++ b/drivers/net/ethernet/intel/ice/ice_sriov.h @@ -58,6 +58,7 @@ void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event); void ice_print_vfs_mdd_events(struct ice_pf *pf); void ice_print_vf_rx_mdd_event(struct ice_vf *vf); +void ice_print_vf_tx_mdd_event(struct ice_vf *vf); bool ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto); u32 ice_sriov_get_vf_total_msix(struct pci_dev *pdev); @@ -69,6 +70,7 @@ static inline void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) { } static inline void ice_print_vfs_mdd_events(struct ice_pf *pf) { } static inline void ice_print_vf_rx_mdd_event(struct ice_vf *vf) { } +static inline void ice_print_vf_tx_mdd_event(struct ice_vf *vf) { } static inline void ice_restore_all_vfs_msi_state(struct ice_pf *pf) { } static inline int -- cgit v1.2.3 From 41355365d252e666e1c757f4252317625d03359c Mon Sep 17 00:00:00 2001 From: Dariusz Aftanski Date: Tue, 12 Mar 2024 13:26:35 -0700 Subject: ice: Remove ndo_get_phys_port_name ndo_get_phys_port_name is never actually used, as in switchdev devlink is always being created. Reviewed-by: Michal Swiatkowski Signed-off-by: Dariusz Aftanski Tested-by: Sujai Buvaneswaran Reviewed-by: Jiri Pirko Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_repr.c | 34 ------------------------------- 1 file changed, 34 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index b8009a301e39..d367f4c66dcd 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -9,39 +9,6 @@ #include "ice_tc_lib.h" #include "ice_dcb_lib.h" -/** - * ice_repr_get_sw_port_id - get port ID associated with representor - * @repr: pointer to port representor - */ -static int ice_repr_get_sw_port_id(struct ice_repr *repr) -{ - return repr->src_vsi->back->hw.port_info->lport; -} - -/** - * ice_repr_get_phys_port_name - get phys port name - * @netdev: pointer to port representor netdev - * @buf: write here port name - * @len: max length of buf - */ -static int -ice_repr_get_phys_port_name(struct net_device *netdev, char *buf, size_t len) -{ - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_repr *repr = np->repr; - int res; - - /* Devlink port is registered and devlink core is taking care of name formatting. */ - if (repr->vf->devlink_port.devlink) - return -EOPNOTSUPP; - - res = snprintf(buf, len, "pf%dvfr%d", ice_repr_get_sw_port_id(repr), - repr->id); - if (res <= 0) - return -EOPNOTSUPP; - return 0; -} - /** * ice_repr_inc_tx_stats - increment Tx statistic by one packet * @repr: repr to increment stats on @@ -279,7 +246,6 @@ ice_repr_setup_tc(struct net_device *netdev, enum tc_setup_type type, } static const struct net_device_ops ice_repr_netdev_ops = { - .ndo_get_phys_port_name = ice_repr_get_phys_port_name, .ndo_get_stats64 = ice_repr_get_stats64, .ndo_open = ice_repr_open, .ndo_stop = ice_repr_stop, -- cgit v1.2.3 From 5cb431dcf8048572e9ffc6c30cdbd8832cbe502d Mon Sep 17 00:00:00 2001 From: Nick Child Date: Tue, 16 Apr 2024 11:41:28 -0500 Subject: ibmvnic: Return error code on TX scrq flush fail In ibmvnic_xmit() if ibmvnic_tx_scrq_flush() returns H_CLOSED then it will inform upper level networking functions to disable tx queues. H_CLOSED signals that the connection with the vnic server is down and a transport event is expected to recover the device. Previously, ibmvnic_tx_scrq_flush() was hard-coded to return success. Therefore, the queues would remain active until ibmvnic_cleanup() is called within do_reset(). The problem is that do_reset() depends on the RTNL lock. If several ibmvnic devices are resetting then there can be a long wait time until the last device can grab the lock. During this time the tx/rx queues still appear active to upper level functions. FYI, we do make a call to netif_carrier_off() outside the RTNL lock but its calls to dev_deactivate() are also dependent on the RTNL lock. As a result, large amounts of retransmissions were observed in a short period of time, eventually leading to ETIMEOUT. This was specifically seen with HNV devices, likely because of even more RTNL dependencies. Therefore, ensure the return code of ibmvnic_tx_scrq_flush() is propagated to the xmit function to allow for an earlier (and lock-less) response to a transport event. Signed-off-by: Nick Child Link: https://lore.kernel.org/r/20240416164128.387920-1-nnac123@linux.ibm.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ibm/ibmvnic.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 30c47b8470ad..5e9a93bdb518 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2371,7 +2371,7 @@ static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); else ind_bufp->index = 0; - return 0; + return rc; } static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) @@ -2424,7 +2424,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_dropped++; tx_send_failed++; ret = NETDEV_TX_OK; - ibmvnic_tx_scrq_flush(adapter, tx_scrq); + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (lpar_rc != H_SUCCESS) + goto tx_err; goto out; } @@ -2439,8 +2441,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb_any(skb); tx_send_failed++; tx_dropped++; - ibmvnic_tx_scrq_flush(adapter, tx_scrq); ret = NETDEV_TX_OK; + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (lpar_rc != H_SUCCESS) + goto tx_err; goto out; } -- cgit v1.2.3 From 4713744d9f6ee1f6dc5ac6f58c30f1b9f21067f8 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 17 Apr 2024 13:51:20 +0000 Subject: mlxsw: spectrum_flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Tested-by: Petr Machata Link: https://lore.kernel.org/r/20240417135131.99921-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 9fd1ca079258..f07955b5439f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -595,6 +595,10 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, flow_rule_match_control(rule, &match); addr_type = match.key->addr_type; + + if (flow_rule_has_control_flags(match.mask->flags, + f->common.extack)) + return -EOPNOTSUPP; } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { -- cgit v1.2.3 From bb534830a74f54037df923e15254e8c885dce35a Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 17 Apr 2024 14:07:10 +0000 Subject: sfc: use flow_rule_is_supp_control_flags() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the check for unsupported control flags, to use the new helper flow_rule_is_supp_control_flags(). Since the helper was based on sfc, then nothing really changes. Compile-tested, and compiled objects are identical. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Edward Cree Link: https://lore.kernel.org/r/20240417140712.100905-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/tc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 82e8891a619a..9d140203e273 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -273,11 +273,10 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx, match->value.ip_firstfrag = fm.key->flags & FLOW_DIS_FIRST_FRAG; match->mask.ip_firstfrag = true; } - if (fm.mask->flags & ~(FLOW_DIS_IS_FRAGMENT | FLOW_DIS_FIRST_FRAG)) { - NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on control.flags %#x", - fm.mask->flags); + if (!flow_rule_is_supp_control_flags(FLOW_DIS_IS_FRAGMENT | + FLOW_DIS_FIRST_FRAG, + fm.mask->flags, extack)) return -EOPNOTSUPP; - } } if (dissector->used_keys & ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | -- cgit v1.2.3 From 435f9fcc0e44b419cfd7d5742ca6f0be62d10dbb Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 17 Apr 2024 14:43:58 +0000 Subject: net: mscc: ocelot: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_match_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_match_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Link: https://lore.kernel.org/r/20240417144359.104225-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_flower.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 33b438c6aec5..a057ec3dab97 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -609,11 +609,8 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, return ret; } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_match_control match; - - flow_rule_match_control(rule, &match); - } + if (flow_rule_match_has_control_flags(rule, extack)) + return -EOPNOTSUPP; if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { struct flow_match_vlan match; -- cgit v1.2.3 From fdf412374379bed2532b7180675ee177310be5db Mon Sep 17 00:00:00 2001 From: Ziwei Xiao Date: Wed, 17 Apr 2024 20:57:57 +0000 Subject: gve: Remove qpl_cfg struct since qpl_ids map with queues respectively The qpl_cfg struct was used to make sure that no two different queues are using QPL with the same qpl_id. We can remove that qpl_cfg struct since now the qpl_ids map with the queues respectively as follows: For tx queues: qpl_id = tx_qid For rx queues: qpl_id = max_tx_queues + rx_qid And when XDP is used, it will need the user to reduce the tx queues to be at most half of the max_tx_queues. Then it will use the same number of tx queues starting from the end of existing tx queues for XDP. So the XDP queues will not exceed the max_tx_queues range and will not overlap with the rx queues, where the qpl_ids will not have overlapping too. Considering of that, we remove the qpl_cfg struct to get the qpl_id directly based on the queue id. Unless we are erroneously allocating a rx/tx queue that has already been allocated, we would never allocate the qpl with the same qpl_id twice. In that case, it should fail much earlier than the QPL assignment. Suggested-by: Praveen Kaligineedi Signed-off-by: Ziwei Xiao Reviewed-by: Harshitha Ramamurthy Reviewed-by: Shailend Chand Link: https://lore.kernel.org/r/20240417205757.778551-1-ziweixiao@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve.h | 39 ++------------------------- drivers/net/ethernet/google/gve/gve_ethtool.c | 9 ------- drivers/net/ethernet/google/gve/gve_main.c | 38 +------------------------- drivers/net/ethernet/google/gve/gve_rx.c | 12 +++------ drivers/net/ethernet/google/gve/gve_rx_dqo.c | 12 ++++----- drivers/net/ethernet/google/gve/gve_tx.c | 12 +++------ drivers/net/ethernet/google/gve/gve_tx_dqo.c | 11 +++----- 7 files changed, 20 insertions(+), 113 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index e97633b68e25..53b5244dc7bc 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -639,7 +639,6 @@ struct gve_ptype_lut { /* Parameters for allocating queue page lists */ struct gve_qpls_alloc_cfg { - struct gve_qpl_config *qpl_cfg; struct gve_queue_config *tx_cfg; struct gve_queue_config *rx_cfg; @@ -655,9 +654,8 @@ struct gve_qpls_alloc_cfg { struct gve_tx_alloc_rings_cfg { struct gve_queue_config *qcfg; - /* qpls and qpl_cfg must already be allocated */ + /* qpls must already be allocated */ struct gve_queue_page_list *qpls; - struct gve_qpl_config *qpl_cfg; u16 ring_size; u16 start_idx; @@ -674,9 +672,8 @@ struct gve_rx_alloc_rings_cfg { struct gve_queue_config *qcfg; struct gve_queue_config *qcfg_tx; - /* qpls and qpl_cfg must already be allocated */ + /* qpls must already be allocated */ struct gve_queue_page_list *qpls; - struct gve_qpl_config *qpl_cfg; u16 ring_size; u16 packet_buffer_size; @@ -732,7 +729,6 @@ struct gve_priv { u16 num_xdp_queues; struct gve_queue_config tx_cfg; struct gve_queue_config rx_cfg; - struct gve_qpl_config qpl_cfg; /* map used QPL ids */ u32 num_ntfy_blks; /* spilt between TX and RX so must be even */ struct gve_registers __iomem *reg_bar0; /* see gve_register.h */ @@ -1053,37 +1049,6 @@ static inline u32 gve_get_rx_pages_per_qpl_dqo(u32 rx_desc_cnt) return 2 * rx_desc_cnt; } -/* Returns a pointer to the next available tx qpl in the list of qpls */ -static inline -struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_tx_alloc_rings_cfg *cfg, - int tx_qid) -{ - /* QPL already in use */ - if (test_bit(tx_qid, cfg->qpl_cfg->qpl_id_map)) - return NULL; - set_bit(tx_qid, cfg->qpl_cfg->qpl_id_map); - return &cfg->qpls[tx_qid]; -} - -/* Returns a pointer to the next available rx qpl in the list of qpls */ -static inline -struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_rx_alloc_rings_cfg *cfg, - int rx_qid) -{ - int id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx_qid); - /* QPL already in use */ - if (test_bit(id, cfg->qpl_cfg->qpl_id_map)) - return NULL; - set_bit(id, cfg->qpl_cfg->qpl_id_map); - return &cfg->qpls[id]; -} - -/* Unassigns the qpl with the given id */ -static inline void gve_unassign_qpl(struct gve_qpl_config *qpl_cfg, int id) -{ - clear_bit(id, qpl_cfg->qpl_id_map); -} - /* Returns the correct dma direction for tx and rx qpls */ static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv, int id) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 299206d15c73..bd7632eed776 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -510,7 +510,6 @@ static int gve_adjust_ring_sizes(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; - struct gve_qpl_config new_qpl_cfg; int err; /* get current queue configuration */ @@ -521,14 +520,6 @@ static int gve_adjust_ring_sizes(struct gve_priv *priv, tx_alloc_cfg.ring_size = new_tx_desc_cnt; rx_alloc_cfg.ring_size = new_rx_desc_cnt; - /* qpl_cfg is not read-only, it contains a map that gets updated as - * rings are allocated, which is why we cannot use the yet unreleased - * one in priv. - */ - qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg; - tx_alloc_cfg.qpl_cfg = &new_qpl_cfg; - rx_alloc_cfg.qpl_cfg = &new_qpl_cfg; - if (netif_running(priv->dev)) { err = gve_adjust_config(priv, &qpls_alloc_cfg, &tx_alloc_cfg, &rx_alloc_cfg); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index a515e5af843c..61039e3dd2bb 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -829,7 +829,6 @@ static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, cfg->qcfg = &priv->tx_cfg; cfg->raw_addressing = !gve_is_qpl(priv); cfg->qpls = priv->qpls; - cfg->qpl_cfg = &priv->qpl_cfg; cfg->ring_size = priv->tx_desc_cnt; cfg->start_idx = 0; cfg->num_rings = gve_num_tx_queues(priv); @@ -1119,22 +1118,13 @@ static int gve_alloc_qpls(struct gve_priv *priv, struct gve_qpls_alloc_cfg *cfg, if (!qpls) return -ENOMEM; - cfg->qpl_cfg->qpl_map_size = BITS_TO_LONGS(max_queues) * - sizeof(unsigned long) * BITS_PER_BYTE; - cfg->qpl_cfg->qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), - sizeof(unsigned long), GFP_KERNEL); - if (!cfg->qpl_cfg->qpl_id_map) { - err = -ENOMEM; - goto free_qpl_array; - } - /* Allocate TX QPLs */ page_count = priv->tx_pages_per_qpl; tx_num_qpls = gve_num_tx_qpls(cfg->tx_cfg, cfg->num_xdp_queues, gve_is_qpl(priv)); err = gve_alloc_n_qpls(priv, qpls, page_count, 0, tx_num_qpls); if (err) - goto free_qpl_map; + goto free_qpl_array; /* Allocate RX QPLs */ rx_start_id = gve_rx_start_qpl_id(cfg->tx_cfg); @@ -1157,9 +1147,6 @@ static int gve_alloc_qpls(struct gve_priv *priv, struct gve_qpls_alloc_cfg *cfg, free_tx_qpls: gve_free_n_qpls(priv, qpls, 0, tx_num_qpls); -free_qpl_map: - kvfree(cfg->qpl_cfg->qpl_id_map); - cfg->qpl_cfg->qpl_id_map = NULL; free_qpl_array: kvfree(qpls); return err; @@ -1175,9 +1162,6 @@ static void gve_free_qpls(struct gve_priv *priv, if (!qpls) return; - kvfree(cfg->qpl_cfg->qpl_id_map); - cfg->qpl_cfg->qpl_id_map = NULL; - for (i = 0; i < max_queues; i++) gve_free_queue_page_list(priv, &qpls[i], i); @@ -1292,7 +1276,6 @@ static void gve_qpls_get_curr_alloc_cfg(struct gve_priv *priv, cfg->raw_addressing = !gve_is_qpl(priv); cfg->is_gqi = gve_is_gqi(priv); cfg->num_xdp_queues = priv->num_xdp_queues; - cfg->qpl_cfg = &priv->qpl_cfg; cfg->tx_cfg = &priv->tx_cfg; cfg->rx_cfg = &priv->rx_cfg; cfg->qpls = priv->qpls; @@ -1306,7 +1289,6 @@ static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, cfg->raw_addressing = !gve_is_qpl(priv); cfg->enable_header_split = priv->header_split_enabled; cfg->qpls = priv->qpls; - cfg->qpl_cfg = &priv->qpl_cfg; cfg->ring_size = priv->rx_desc_cnt; cfg->packet_buffer_size = gve_is_gqi(priv) ? GVE_DEFAULT_RX_BUFFER_SIZE : @@ -1419,7 +1401,6 @@ static int gve_queues_start(struct gve_priv *priv, priv->rx = rx_alloc_cfg->rx; /* Record new configs into priv */ - priv->qpl_cfg = *qpls_alloc_cfg->qpl_cfg; priv->tx_cfg = *tx_alloc_cfg->qcfg; priv->rx_cfg = *rx_alloc_cfg->qcfg; priv->tx_desc_cnt = tx_alloc_cfg->ring_size; @@ -1916,20 +1897,11 @@ int gve_adjust_queues(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; - struct gve_qpl_config new_qpl_cfg; int err; gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, &tx_alloc_cfg, &rx_alloc_cfg); - /* qpl_cfg is not read-only, it contains a map that gets updated as - * rings are allocated, which is why we cannot use the yet unreleased - * one in priv. - */ - qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg; - tx_alloc_cfg.qpl_cfg = &new_qpl_cfg; - rx_alloc_cfg.qpl_cfg = &new_qpl_cfg; - /* Relay the new config from ethtool */ qpls_alloc_cfg.tx_cfg = &new_tx_config; tx_alloc_cfg.qcfg = &new_tx_config; @@ -2121,18 +2093,10 @@ static int gve_set_features(struct net_device *netdev, struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(netdev); - struct gve_qpl_config new_qpl_cfg; int err; gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, &tx_alloc_cfg, &rx_alloc_cfg); - /* qpl_cfg is not read-only, it contains a map that gets updated as - * rings are allocated, which is why we cannot use the yet unreleased - * one in priv. - */ - qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg; - tx_alloc_cfg.qpl_cfg = &new_qpl_cfg; - rx_alloc_cfg.qpl_cfg = &new_qpl_cfg; if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index cd727e55ae0f..9b56e89c4f43 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -38,7 +38,6 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, for (i = 0; i < slots; i++) page_ref_sub(rx->data.page_info[i].page, rx->data.page_info[i].pagecnt_bias - 1); - gve_unassign_qpl(cfg->qpl_cfg, rx->data.qpl->id); rx->data.qpl = NULL; for (i = 0; i < rx->qpl_copy_pool_mask + 1; i++) { @@ -145,13 +144,11 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx, return -ENOMEM; if (!rx->data.raw_addressing) { - rx->data.qpl = gve_assign_rx_qpl(cfg, rx->q_num); - if (!rx->data.qpl) { - kvfree(rx->data.page_info); - rx->data.page_info = NULL; - return -ENOMEM; - } + u32 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + + rx->data.qpl = &cfg->qpls[qpl_id]; } + for (i = 0; i < slots; i++) { if (!rx->data.raw_addressing) { struct page *page = rx->data.qpl->pages[i]; @@ -204,7 +201,6 @@ alloc_err_qpl: page_ref_sub(rx->data.page_info[i].page, rx->data.page_info[i].pagecnt_bias - 1); - gve_unassign_qpl(cfg->qpl_cfg, rx->data.qpl->id); rx->data.qpl = NULL; return err; diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 15108407b54f..53fd2d87233f 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -247,10 +247,8 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, if (bs->page_info.page) gve_free_page_dqo(priv, bs, !rx->dqo.qpl); } - if (rx->dqo.qpl) { - gve_unassign_qpl(cfg->qpl_cfg, rx->dqo.qpl->id); - rx->dqo.qpl = NULL; - } + + rx->dqo.qpl = NULL; if (rx->dqo.bufq.desc_ring) { size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; @@ -359,9 +357,9 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, goto err; if (!cfg->raw_addressing) { - rx->dqo.qpl = gve_assign_rx_qpl(cfg, rx->q_num); - if (!rx->dqo.qpl) - goto err; + u32 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + + rx->dqo.qpl = &cfg->qpls[qpl_id]; rx->dqo.next_qpl_page_idx = 0; } diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 4b9853adc113..f805700d67e7 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -225,7 +225,6 @@ static void gve_tx_free_ring_gqi(struct gve_priv *priv, struct gve_tx_ring *tx, if (!tx->raw_addressing) { gve_tx_fifo_release(priv, &tx->tx_fifo); - gve_unassign_qpl(cfg->qpl_cfg, tx->tx_fifo.qpl->id); tx->tx_fifo.qpl = NULL; } @@ -280,12 +279,12 @@ static int gve_tx_alloc_ring_gqi(struct gve_priv *priv, tx->raw_addressing = cfg->raw_addressing; tx->dev = hdev; if (!tx->raw_addressing) { - tx->tx_fifo.qpl = gve_assign_tx_qpl(cfg, idx); - if (!tx->tx_fifo.qpl) - goto abort_with_desc; + u32 qpl_id = gve_tx_qpl_id(priv, tx->q_num); + + tx->tx_fifo.qpl = &cfg->qpls[qpl_id]; /* map Tx FIFO */ if (gve_tx_fifo_init(priv, &tx->tx_fifo)) - goto abort_with_qpl; + goto abort_with_desc; } tx->q_resources = @@ -301,9 +300,6 @@ static int gve_tx_alloc_ring_gqi(struct gve_priv *priv, abort_with_fifo: if (!tx->raw_addressing) gve_tx_fifo_release(priv, &tx->tx_fifo); -abort_with_qpl: - if (!tx->raw_addressing) - gve_unassign_qpl(cfg->qpl_cfg, tx->tx_fifo.qpl->id); abort_with_desc: dma_free_coherent(hdev, bytes, tx->desc, tx->bus); tx->desc = NULL; diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 70f29b90a982..3d825e406c4b 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -236,10 +236,7 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, kvfree(tx->dqo.tx_qpl_buf_next); tx->dqo.tx_qpl_buf_next = NULL; - if (tx->dqo.qpl) { - gve_unassign_qpl(cfg->qpl_cfg, tx->dqo.qpl->id); - tx->dqo.qpl = NULL; - } + tx->dqo.qpl = NULL; netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx); } @@ -352,9 +349,9 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, goto err; if (!cfg->raw_addressing) { - tx->dqo.qpl = gve_assign_tx_qpl(cfg, idx); - if (!tx->dqo.qpl) - goto err; + u32 qpl_id = gve_tx_qpl_id(priv, tx->q_num); + + tx->dqo.qpl = &cfg->qpls[qpl_id]; if (gve_tx_qpl_buf_init(tx)) goto err; -- cgit v1.2.3 From 80b7aae9e3b8fc3f1678be61ccd81d5b6e6bd6be Mon Sep 17 00:00:00 2001 From: Julien Panis Date: Thu, 18 Apr 2024 17:34:55 +0200 Subject: net: ethernet: ti: am65-cpsw: Fix xdp_rxq error for disabled port When an ethX port is disabled in the device tree, an error is returned by xdp_rxq_info_reg() function while transitioning the CPSW device to the up state. The message 'Missing net_device from driver' is output. This patch fixes the issue by registering xdp_rxq info only if ethX port is enabled (i.e. ndev pointer is not NULL). Fixes: 8acacc40f733 ("net: ethernet: ti: am65-cpsw: Add minimal XDP support") Link: https://lore.kernel.org/all/260d258f-87a1-4aac-8883-aab4746b32d8@ti.com/ Reported-by: Siddharth Vadapalli Closes: https://gist.github.com/Siddharth-Vadapalli-at-TI/5ed0e436606001c247a7da664f75edee Signed-off-by: Julien Panis Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 259d7cb13f6e..a7dbbbe3ff5d 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -391,6 +391,9 @@ static void am65_cpsw_destroy_xdp_rxqs(struct am65_cpsw_common *common) int i; for (i = 0; i < common->port_num; i++) { + if (!common->ports[i].ndev) + continue; + rxq = &common->ports[i].xdp_rxq; if (xdp_rxq_info_is_reg(rxq)) @@ -426,6 +429,9 @@ static int am65_cpsw_create_xdp_rxqs(struct am65_cpsw_common *common) rx_chn->page_pool = pool; for (i = 0; i < common->port_num; i++) { + if (!common->ports[i].ndev) + continue; + rxq = &common->ports[i].xdp_rxq; ret = xdp_rxq_info_reg(rxq, common->ports[i].ndev, i, 0); -- cgit v1.2.3 From 6a57f091622a1251c2826f7380577049199b80ea Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Sat, 20 Apr 2024 15:05:05 +0530 Subject: octeontx2-pf: Add support for offload tc with skbedit mark action Support offloading of skbedit mark action. For example, to mark with 0x0008, with dest ip 60.60.60.2 on eth2 interface: # tc qdisc add dev eth2 ingress # tc filter add dev eth2 ingress protocol ip flower \ dst_ip 60.60.60.2 action skbedit mark 0x0008 skip_sw Signed-off-by: Geetha sowjanya Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 2 ++ drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h | 2 ++ drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c | 1 + drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 13 +++++++++++++ drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 3 +++ drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h | 3 +++ 6 files changed, 24 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index c181e7aa9eb6..150635de2bd5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -1187,6 +1187,8 @@ static int npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, action.pf_func = target; action.op = NIX_RX_ACTIONOP_UCAST; } + if (req->match_id) + action.match_id = req->match_id; } entry->action = *(u64 *)&action; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index c5de3ba33e2f..24fbbef265a6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -363,6 +363,7 @@ struct otx2_flow_config { struct list_head flow_list; u32 dmacflt_max_flows; u16 max_flows; + refcount_t mark_flows; struct list_head flow_list_tc; bool ntuple; }; @@ -465,6 +466,7 @@ struct otx2_nic { #define OTX2_FLAG_DMACFLTR_SUPPORT BIT_ULL(14) #define OTX2_FLAG_PTP_ONESTEP_SYNC BIT_ULL(15) #define OTX2_FLAG_ADPTV_INT_COAL_ENABLED BIT_ULL(16) +#define OTX2_FLAG_TC_MARK_ENABLED BIT_ULL(17) u64 flags; u64 *cq_op_addr; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 97a71e9b8563..bc5819237ed7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -252,6 +252,7 @@ static int otx2_mcam_entry_init(struct otx2_nic *pfvf) pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT; + refcount_set(&flow_cfg->mark_flows, 1); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index f4655a8c0705..6d4ce2ece8d0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -511,7 +511,15 @@ static int otx2_tc_parse_actions(struct otx2_nic *nic, nr_police++; break; case FLOW_ACTION_MARK: + if (act->mark & ~OTX2_RX_MATCH_ID_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Bad flow mark, only 16 bit supported"); + return -EOPNOTSUPP; + } mark = act->mark; + req->match_id = mark & OTX2_RX_MATCH_ID_MASK; + req->op = NIX_RX_ACTION_DEFAULT; + nic->flags |= OTX2_FLAG_TC_MARK_ENABLED; + refcount_inc(&nic->flow_cfg->mark_flows); break; case FLOW_ACTION_RX_QUEUE_MAPPING: @@ -1187,6 +1195,11 @@ static int otx2_tc_del_flow(struct otx2_nic *nic, return -EINVAL; } + /* Disable TC MARK flag if they are no rules with skbedit mark action */ + if (flow_node->req.match_id) + if (!refcount_dec_and_test(&flow_cfg->mark_flows)) + nic->flags &= ~OTX2_FLAG_TC_MARK_ENABLED; + if (flow_node->is_act_police) { __clear_bit(flow_node->rq, &nic->rq_bmap); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index f828d32737af..a16e9f244117 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -380,6 +380,9 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf, if (pfvf->netdev->features & NETIF_F_RXCSUM) skb->ip_summed = CHECKSUM_UNNECESSARY; + if (pfvf->flags & OTX2_FLAG_TC_MARK_ENABLED) + skb->mark = parse->match_id; + skb_mark_for_recycle(skb); napi_gro_frags(napi); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index a82ffca8ce1b..3f1d2655ff77 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -62,6 +62,9 @@ #define CQ_OP_STAT_OP_ERR 63 #define CQ_OP_STAT_CQ_ERR 46 +/* Packet mark mask */ +#define OTX2_RX_MATCH_ID_MASK 0x0000ffff + struct queue_stats { u64 bytes; u64 pkts; -- cgit v1.2.3 From 5625ca5640caa3fb797f155601d56379d260d6ba Mon Sep 17 00:00:00 2001 From: Mateusz Polchlopek Date: Fri, 19 Apr 2024 04:08:49 -0400 Subject: devlink: extend devlink_param *set pointer Extend devlink_param *set function pointer to take extack as a param. Sometimes it is needed to pass information to the end user from set function. It is more proper to use for that netlink instead of passing message to dmesg. Reviewed-by: Jiri Pirko Reviewed-by: Przemek Kitszel Signed-off-by: Mateusz Polchlopek Signed-off-by: Tony Nguyen --- drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c | 9 ++++++--- drivers/net/ethernet/amd/pds_core/core.h | 3 ++- drivers/net/ethernet/amd/pds_core/devlink.c | 3 ++- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 6 ++++-- drivers/net/ethernet/intel/ice/devlink/devlink.c | 12 ++++++------ drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c | 12 ++++++++---- drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/main.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 3 ++- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 3 ++- drivers/net/ethernet/netronome/nfp/devlink_param.c | 3 ++- drivers/net/ethernet/qlogic/qed/qed_devlink.c | 3 ++- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 3 ++- drivers/net/ethernet/ti/cpsw_new.c | 6 ++++-- drivers/net/wwan/iosm/iosm_ipc_devlink.c | 3 ++- include/net/devlink.h | 3 ++- include/net/dsa.h | 3 ++- net/devlink/param.c | 7 ++++--- net/dsa/devlink.c | 3 ++- 22 files changed, 66 insertions(+), 37 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c index d2b8d26db968..215a1a8ba7e9 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c @@ -4,7 +4,8 @@ #include "otx2_cpt_devlink.h" static int otx2_cpt_dl_egrp_create(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl); struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; @@ -13,7 +14,8 @@ static int otx2_cpt_dl_egrp_create(struct devlink *dl, u32 id, } static int otx2_cpt_dl_egrp_delete(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl); struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; @@ -45,7 +47,8 @@ static int otx2_cpt_dl_t106_mode_get(struct devlink *dl, u32 id, } static int otx2_cpt_dl_t106_mode_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl); struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index a3e17a0c187a..14522d6d5f86 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -256,7 +256,8 @@ int pdsc_dl_flash_update(struct devlink *dl, int pdsc_dl_enable_get(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx); int pdsc_dl_enable_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx); + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack); int pdsc_dl_enable_validate(struct devlink *dl, u32 id, union devlink_param_value val, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index 54864f27c87a..2681889162a2 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -37,7 +37,8 @@ int pdsc_dl_enable_get(struct devlink *dl, u32 id, } int pdsc_dl_enable_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct pdsc *pdsc = devlink_priv(dl); struct pdsc_viftype *vt_entry; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index ae4529c043f0..d9ea6fa23923 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -1096,7 +1096,8 @@ static int bnxt_dl_nvm_param_get(struct devlink *dl, u32 id, } static int bnxt_dl_nvm_param_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct bnxt *bp = bnxt_get_bp_from_dl(dl); struct hwrm_nvm_set_variable_input *req; @@ -1145,7 +1146,8 @@ static int bnxt_remote_dev_reset_get(struct devlink *dl, u32 id, } static int bnxt_remote_dev_reset_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct bnxt *bp = bnxt_get_bp_from_dl(dl); int rc; diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index be9244bb8bbc..acbace240977 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -1144,9 +1144,9 @@ ice_devlink_enable_roce_get(struct devlink *devlink, u32 id, return 0; } -static int -ice_devlink_enable_roce_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) +static int ice_devlink_enable_roce_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct ice_pf *pf = devlink_priv(devlink); bool roce_ena = ctx->val.vbool; @@ -1195,9 +1195,9 @@ ice_devlink_enable_iw_get(struct devlink *devlink, u32 id, return 0; } -static int -ice_devlink_enable_iw_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) +static int ice_devlink_enable_iw_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct ice_pf *pf = devlink_priv(devlink); bool iw_ena = ctx->val.vbool; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 96c04f7d93f8..7498ab429963 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1202,7 +1202,8 @@ static int rvu_af_dl_dwrr_mtu_validate(struct devlink *devlink, u32 id, } static int rvu_af_dl_dwrr_mtu_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct rvu_devlink *rvu_dl = devlink_priv(devlink); struct rvu *rvu = rvu_dl->rvu; @@ -1256,7 +1257,8 @@ static int rvu_af_npc_exact_feature_get(struct devlink *devlink, u32 id, } static int rvu_af_npc_exact_feature_disable(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct rvu_devlink *rvu_dl = devlink_priv(devlink); struct rvu *rvu = rvu_dl->rvu; @@ -1310,7 +1312,8 @@ static int rvu_af_dl_npc_mcam_high_zone_percent_get(struct devlink *devlink, u32 } static int rvu_af_dl_npc_mcam_high_zone_percent_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct rvu_devlink *rvu_dl = devlink_priv(devlink); struct rvu *rvu = rvu_dl->rvu; @@ -1367,7 +1370,8 @@ static int rvu_af_dl_nix_maxlf_get(struct devlink *devlink, u32 id, } static int rvu_af_dl_nix_maxlf_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct rvu_devlink *rvu_dl = devlink_priv(devlink); struct rvu *rvu = rvu_dl->rvu; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c index 4e1130496573..99ddf31269d9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c @@ -32,7 +32,8 @@ static int otx2_dl_mcam_count_validate(struct devlink *devlink, u32 id, } static int otx2_dl_mcam_count_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct otx2_devlink *otx2_dl = devlink_priv(devlink); struct otx2_nic *pfvf = otx2_dl->pfvf; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 7b02ff61126d..98688e4dbec5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -185,7 +185,8 @@ static int mlx4_devlink_ierr_reset_get(struct devlink *devlink, u32 id, } static int mlx4_devlink_ierr_reset_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { mlx4_internal_err_reset = ctx->val.vbool; return 0; @@ -202,7 +203,8 @@ static int mlx4_devlink_crdump_snapshot_get(struct devlink *devlink, u32 id, } static int mlx4_devlink_crdump_snapshot_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct mlx4_priv *priv = devlink_priv(devlink); struct mlx4_dev *dev = &priv->dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1789800faaeb..17f78091ad30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1805,7 +1805,8 @@ err: } static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index dfcc7e85e0a3..f378b8176e47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2413,7 +2413,8 @@ err: } static int esw_port_metadata_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index cf085a478e3e..32cdacc34a0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3332,7 +3332,8 @@ static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id, } static int mlx5_fs_mode_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); enum mlx5_flow_steering_mode mode; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 2911aa34a5be..979c49ae6b5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -52,7 +52,8 @@ static void mlx5_set_fw_rst_ack(struct mlx5_core_dev *dev) } static int mlx5_fw_reset_enable_remote_dev_reset_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_fw_reset *fw_reset; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index f20052776b3f..baedf0d45e85 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1465,7 +1465,8 @@ mlxsw_sp_acl_tcam_region_rehash_intrvl_get(struct devlink *devlink, u32 id, static int mlxsw_sp_acl_tcam_region_rehash_intrvl_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); struct mlxsw_sp_acl_tcam_vregion *vregion; diff --git a/drivers/net/ethernet/netronome/nfp/devlink_param.c b/drivers/net/ethernet/netronome/nfp/devlink_param.c index a655f9e69a7b..0e1a3800f371 100644 --- a/drivers/net/ethernet/netronome/nfp/devlink_param.c +++ b/drivers/net/ethernet/netronome/nfp/devlink_param.c @@ -132,7 +132,8 @@ exit_close_nsp: static int nfp_devlink_param_u8_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { const struct nfp_devlink_param_u8_arg *arg; struct nfp_pf *pf = devlink_priv(devlink); diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c index dad8e617c393..1adc7fbb3f2f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c +++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c @@ -132,7 +132,8 @@ static int qed_dl_param_get(struct devlink *dl, u32 id, } static int qed_dl_param_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct qed_devlink *qed_dl = devlink_priv(dl); struct qed_dev *cdev; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index a7dbbbe3ff5d..bf3df86554d1 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -3061,7 +3061,8 @@ static void am65_cpsw_init_port_switch_ale(struct am65_cpsw_port *port) } static int am65_cpsw_dl_switch_mode_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct am65_cpsw_devlink *dl_priv = devlink_priv(dl); struct am65_cpsw_common *cpsw = dl_priv->common; diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 087dcb67505a..2baa198ebfa0 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1625,7 +1625,8 @@ static int cpsw_dl_switch_mode_get(struct devlink *dl, u32 id, } static int cpsw_dl_switch_mode_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct cpsw_devlink *dl_priv = devlink_priv(dl); struct cpsw_common *cpsw = dl_priv->cpsw; @@ -1762,7 +1763,8 @@ static int cpsw_dl_ale_ctrl_get(struct devlink *dl, u32 id, } static int cpsw_dl_ale_ctrl_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct cpsw_devlink *dl_priv = devlink_priv(dl); struct cpsw_common *cpsw = dl_priv->cpsw; diff --git a/drivers/net/wwan/iosm/iosm_ipc_devlink.c b/drivers/net/wwan/iosm/iosm_ipc_devlink.c index 2fe724d623c0..bef6819986e9 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_devlink.c +++ b/drivers/net/wwan/iosm/iosm_ipc_devlink.c @@ -33,7 +33,8 @@ static int ipc_devlink_get_param(struct devlink *dl, u32 id, /* Set the param values for the specific param ID's */ static int ipc_devlink_set_param(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct iosm_devlink *ipc_devlink = devlink_priv(dl); diff --git a/include/net/devlink.h b/include/net/devlink.h index d31769a116ce..35eb0f884386 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -483,7 +483,8 @@ struct devlink_param { int (*get)(struct devlink *devlink, u32 id, struct devlink_param_gset_ctx *ctx); int (*set)(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx); + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack); int (*validate)(struct devlink *devlink, u32 id, union devlink_param_value val, struct netlink_ext_ack *extack); diff --git a/include/net/dsa.h b/include/net/dsa.h index 7edfd8de8882..a6ef7e4c503f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1258,7 +1258,8 @@ struct dsa_switch_ops { int dsa_devlink_param_get(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx); int dsa_devlink_param_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx); + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack); int dsa_devlink_params_register(struct dsa_switch *ds, const struct devlink_param *params, size_t params_count); diff --git a/net/devlink/param.c b/net/devlink/param.c index 22bc3b500518..dcf0d1ccebba 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -158,11 +158,12 @@ static int devlink_param_get(struct devlink *devlink, static int devlink_param_set(struct devlink *devlink, const struct devlink_param *param, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { if (!param->set) return -EOPNOTSUPP; - return param->set(devlink, param->id, ctx); + return param->set(devlink, param->id, ctx, extack); } static int @@ -571,7 +572,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, return -EOPNOTSUPP; ctx.val = value; ctx.cmode = cmode; - err = devlink_param_set(devlink, param, &ctx); + err = devlink_param_set(devlink, param, &ctx, info->extack); if (err) return err; } diff --git a/net/dsa/devlink.c b/net/dsa/devlink.c index 431bf52290a1..0aac887d0098 100644 --- a/net/dsa/devlink.c +++ b/net/dsa/devlink.c @@ -194,7 +194,8 @@ int dsa_devlink_param_get(struct devlink *dl, u32 id, EXPORT_SYMBOL_GPL(dsa_devlink_param_get); int dsa_devlink_param_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct dsa_switch *ds = dsa_devlink_to_ds(dl); -- cgit v1.2.3 From 91427e6d9030611b27a4150ad3c59e4db3f02b8c Mon Sep 17 00:00:00 2001 From: Raj Victor Date: Fri, 19 Apr 2024 04:08:50 -0400 Subject: ice: Support 5 layer topology There is a performance issue when the number of VSIs are not multiple of 8. This is caused due to the max children limitation per node(8) in 9 layer topology. The BW credits are shared evenly among the children by default. Assume one node has 8 children and the other has 1. The parent of these nodes share the BW credit equally among them. Apparently this causes a problem for the first node which has 8 children. The 9th VM get more BW credits than the first 8 VMs. Example: 1) With 8 VM's: for x in 0 1 2 3 4 5 6 7; do taskset -c ${x} netperf -P0 -H 172.68.169.125 & sleep .1 ; done tx_queue_0_packets: 23283027 tx_queue_1_packets: 23292289 tx_queue_2_packets: 23276136 tx_queue_3_packets: 23279828 tx_queue_4_packets: 23279828 tx_queue_5_packets: 23279333 tx_queue_6_packets: 23277745 tx_queue_7_packets: 23279950 tx_queue_8_packets: 0 2) With 9 VM's: for x in 0 1 2 3 4 5 6 7 8; do taskset -c ${x} netperf -P0 -H 172.68.169.125 & sleep .1 ; done tx_queue_0_packets: 24163396 tx_queue_1_packets: 24164623 tx_queue_2_packets: 24163188 tx_queue_3_packets: 24163701 tx_queue_4_packets: 24163683 tx_queue_5_packets: 24164668 tx_queue_6_packets: 23327200 tx_queue_7_packets: 24163853 tx_queue_8_packets: 91101417 So on average queue 8 statistics show that 3.7 times more packets were send there than to the other queues. The FW starting with version 3.20, has increased the max number of children per node by reducing the number of layers from 9 to 5. Reflect this on driver side. Signed-off-by: Raj Victor Co-developed-by: Michal Wilczynski Signed-off-by: Michal Wilczynski Co-developed-by: Mateusz Polchlopek Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 23 +++ drivers/net/ethernet/intel/ice/ice_common.c | 5 + drivers/net/ethernet/intel/ice/ice_ddp.c | 209 ++++++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_ddp.h | 2 + drivers/net/ethernet/intel/ice/ice_sched.h | 11 ++ drivers/net/ethernet/intel/ice/ice_type.h | 1 + 6 files changed, 251 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 540c0bdca936..0487c425ae24 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -121,6 +121,7 @@ struct ice_aqc_list_caps_elem { #define ICE_AQC_CAPS_PCIE_RESET_AVOIDANCE 0x0076 #define ICE_AQC_CAPS_POST_UPDATE_RESET_RESTRICT 0x0077 #define ICE_AQC_CAPS_NVM_MGMT 0x0080 +#define ICE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE 0x0085 #define ICE_AQC_CAPS_FW_LAG_SUPPORT 0x0092 #define ICE_AQC_BIT_ROCEV2_LAG 0x01 #define ICE_AQC_BIT_SRIOV_LAG 0x02 @@ -810,6 +811,23 @@ struct ice_aqc_get_topo { __le32 addr_low; }; +/* Get/Set Tx Topology (indirect 0x0418/0x0417) */ +struct ice_aqc_get_set_tx_topo { + u8 set_flags; +#define ICE_AQC_TX_TOPO_FLAGS_CORRER BIT(0) +#define ICE_AQC_TX_TOPO_FLAGS_SRC_RAM BIT(1) +#define ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW BIT(4) +#define ICE_AQC_TX_TOPO_FLAGS_ISSUED BIT(5) + + u8 get_flags; +#define ICE_AQC_TX_TOPO_GET_RAM 2 + + __le16 reserved1; + __le32 reserved2; + __le32 addr_high; + __le32 addr_low; +}; + /* Update TSE (indirect 0x0403) * Get TSE (indirect 0x0404) * Add TSE (indirect 0x0401) @@ -2538,6 +2556,7 @@ struct ice_aq_desc { struct ice_aqc_get_link_topo get_link_topo; struct ice_aqc_i2c read_write_i2c; struct ice_aqc_read_i2c_resp read_i2c_resp; + struct ice_aqc_get_set_tx_topo get_set_tx_topo; } params; }; @@ -2644,6 +2663,10 @@ enum ice_adminq_opc { ice_aqc_opc_query_sched_res = 0x0412, ice_aqc_opc_remove_rl_profiles = 0x0415, + /* tx topology commands */ + ice_aqc_opc_set_tx_topo = 0x0417, + ice_aqc_opc_get_tx_topo = 0x0418, + /* PHY commands */ ice_aqc_opc_get_phy_caps = 0x0600, ice_aqc_opc_set_phy_cfg = 0x0601, diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index f301000aa8a6..f4bc8723ffa9 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1617,6 +1617,8 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf, case ice_aqc_opc_set_port_params: case ice_aqc_opc_get_vlan_mode_parameters: case ice_aqc_opc_set_vlan_mode_parameters: + case ice_aqc_opc_set_tx_topo: + case ice_aqc_opc_get_tx_topo: case ice_aqc_opc_add_recipe: case ice_aqc_opc_recipe_to_profile: case ice_aqc_opc_get_recipe: @@ -2173,6 +2175,9 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps, ice_debug(hw, ICE_DBG_INIT, "%s: sriov_lag = %u\n", prefix, caps->sriov_lag); break; + case ICE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE: + caps->tx_sched_topo_comp_mode_en = (number == 1); + break; default: /* Not one of the recognized common capabilities */ found = false; diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index 2ffa11fa2df1..87d86d8897ad 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -4,6 +4,7 @@ #include "ice_common.h" #include "ice.h" #include "ice_ddp.h" +#include "ice_sched.h" /* For supporting double VLAN mode, it is necessary to enable or disable certain * boost tcam entries. The metadata labels names that match the following @@ -2272,3 +2273,211 @@ enum ice_ddp_state ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, return state; } + +/** + * ice_get_set_tx_topo - get or set Tx topology + * @hw: pointer to the HW struct + * @buf: pointer to Tx topology buffer + * @buf_size: buffer size + * @cd: pointer to command details structure or NULL + * @flags: pointer to descriptor flags + * @set: 0-get, 1-set topology + * + * The function will get or set Tx topology + * + * Return: zero when set was successful, negative values otherwise. + */ +static int +ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size, + struct ice_sq_cd *cd, u8 *flags, bool set) +{ + struct ice_aqc_get_set_tx_topo *cmd; + struct ice_aq_desc desc; + int status; + + cmd = &desc.params.get_set_tx_topo; + if (set) { + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_tx_topo); + cmd->set_flags = ICE_AQC_TX_TOPO_FLAGS_ISSUED; + /* requested to update a new topology, not a default topology */ + if (buf) + cmd->set_flags |= ICE_AQC_TX_TOPO_FLAGS_SRC_RAM | + ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW; + + if (ice_is_e825c(hw)) + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + } else { + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_tx_topo); + cmd->get_flags = ICE_AQC_TX_TOPO_GET_RAM; + } + + if (!ice_is_e825c(hw)) + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (status) + return status; + /* read the return flag values (first byte) for get operation */ + if (!set && flags) + *flags = desc.params.get_set_tx_topo.set_flags; + + return 0; +} + +/** + * ice_cfg_tx_topo - Initialize new Tx topology if available + * @hw: pointer to the HW struct + * @buf: pointer to Tx topology buffer + * @len: buffer size + * + * The function will apply the new Tx topology from the package buffer + * if available. + * + * Return: zero when update was successful, negative values otherwise. + */ +int ice_cfg_tx_topo(struct ice_hw *hw, u8 *buf, u32 len) +{ + u8 *current_topo, *new_topo = NULL; + struct ice_run_time_cfg_seg *seg; + struct ice_buf_hdr *section; + struct ice_pkg_hdr *pkg_hdr; + enum ice_ddp_state state; + u16 offset, size = 0; + u32 reg = 0; + int status; + u8 flags; + + if (!buf || !len) + return -EINVAL; + + /* Does FW support new Tx topology mode ? */ + if (!hw->func_caps.common_cap.tx_sched_topo_comp_mode_en) { + ice_debug(hw, ICE_DBG_INIT, "FW doesn't support compatibility mode\n"); + return -EOPNOTSUPP; + } + + current_topo = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL); + if (!current_topo) + return -ENOMEM; + + /* Get the current Tx topology */ + status = ice_get_set_tx_topo(hw, current_topo, ICE_AQ_MAX_BUF_LEN, NULL, + &flags, false); + + kfree(current_topo); + + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Get current topology is failed\n"); + return status; + } + + /* Is default topology already applied ? */ + if (!(flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) && + hw->num_tx_sched_layers == ICE_SCHED_9_LAYERS) { + ice_debug(hw, ICE_DBG_INIT, "Default topology already applied\n"); + return -EEXIST; + } + + /* Is new topology already applied ? */ + if ((flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) && + hw->num_tx_sched_layers == ICE_SCHED_5_LAYERS) { + ice_debug(hw, ICE_DBG_INIT, "New topology already applied\n"); + return -EEXIST; + } + + /* Setting topology already issued? */ + if (flags & ICE_AQC_TX_TOPO_FLAGS_ISSUED) { + ice_debug(hw, ICE_DBG_INIT, "Update Tx topology was done by another PF\n"); + /* Add a small delay before exiting */ + msleep(2000); + return -EEXIST; + } + + /* Change the topology from new to default (5 to 9) */ + if (!(flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) && + hw->num_tx_sched_layers == ICE_SCHED_5_LAYERS) { + ice_debug(hw, ICE_DBG_INIT, "Change topology from 5 to 9 layers\n"); + goto update_topo; + } + + pkg_hdr = (struct ice_pkg_hdr *)buf; + state = ice_verify_pkg(pkg_hdr, len); + if (state) { + ice_debug(hw, ICE_DBG_INIT, "Failed to verify pkg (err: %d)\n", + state); + return -EIO; + } + + /* Find runtime configuration segment */ + seg = (struct ice_run_time_cfg_seg *) + ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE_RUN_TIME_CFG, pkg_hdr); + if (!seg) { + ice_debug(hw, ICE_DBG_INIT, "5 layer topology segment is missing\n"); + return -EIO; + } + + if (le32_to_cpu(seg->buf_table.buf_count) < ICE_MIN_S_COUNT) { + ice_debug(hw, ICE_DBG_INIT, "5 layer topology segment count(%d) is wrong\n", + seg->buf_table.buf_count); + return -EIO; + } + + section = ice_pkg_val_buf(seg->buf_table.buf_array); + if (!section || le32_to_cpu(section->section_entry[0].type) != + ICE_SID_TX_5_LAYER_TOPO) { + ice_debug(hw, ICE_DBG_INIT, "5 layer topology section type is wrong\n"); + return -EIO; + } + + size = le16_to_cpu(section->section_entry[0].size); + offset = le16_to_cpu(section->section_entry[0].offset); + if (size < ICE_MIN_S_SZ || size > ICE_MAX_S_SZ) { + ice_debug(hw, ICE_DBG_INIT, "5 layer topology section size is wrong\n"); + return -EIO; + } + + /* Make sure the section fits in the buffer */ + if (offset + size > ICE_PKG_BUF_SIZE) { + ice_debug(hw, ICE_DBG_INIT, "5 layer topology buffer > 4K\n"); + return -EIO; + } + + /* Get the new topology buffer */ + new_topo = ((u8 *)section) + offset; + +update_topo: + /* Acquire global lock to make sure that set topology issued + * by one PF. + */ + status = ice_acquire_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID, ICE_RES_WRITE, + ICE_GLOBAL_CFG_LOCK_TIMEOUT); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to acquire global lock\n"); + return status; + } + + /* Check if reset was triggered already. */ + reg = rd32(hw, GLGEN_RSTAT); + if (reg & GLGEN_RSTAT_DEVSTATE_M) { + /* Reset is in progress, re-init the HW again */ + ice_debug(hw, ICE_DBG_INIT, "Reset is in progress. Layer topology might be applied already\n"); + ice_check_reset(hw); + return 0; + } + + /* Set new topology */ + status = ice_get_set_tx_topo(hw, new_topo, size, NULL, NULL, true); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed setting Tx topology\n"); + return status; + } + + /* New topology is updated, delay 1 second before issuing the CORER */ + msleep(1000); + ice_reset(hw, ICE_RESET_CORER); + /* CORER will clear the global lock, so no explicit call + * required for release. + */ + + return 0; +} diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.h b/drivers/net/ethernet/intel/ice/ice_ddp.h index ff66c2ffb1a2..622543f08b43 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.h +++ b/drivers/net/ethernet/intel/ice/ice_ddp.h @@ -454,4 +454,6 @@ u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld); void *ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state, u32 sect_type); +int ice_cfg_tx_topo(struct ice_hw *hw, u8 *buf, u32 len); + #endif diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index 1aef05ea5a57..7b668083be07 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -6,6 +6,17 @@ #include "ice_common.h" +/** + * DOC: ice_sched.h + * + * This header file stores everything that is needed for broadly understood + * scheduler. It consists of defines related to layers, structures related to + * aggregator, functions declarations and others. + */ + +#define ICE_SCHED_5_LAYERS 5 +#define ICE_SCHED_9_LAYERS 9 + #define SCHED_NODE_NAME_MAX_LEN 32 #define ICE_QGRP_LAYER_OFFSET 2 diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 4049ae40c4fb..f0796a93f428 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -296,6 +296,7 @@ struct ice_hw_common_caps { bool pcie_reset_avoidance; /* Post update reset restriction */ bool reset_restrict_support; + bool tx_sched_topo_comp_mode_en; }; /* IEEE 1588 TIME_SYNC specific info */ -- cgit v1.2.3 From 927127cda11ab91ebba84f74c70b56cb4160c569 Mon Sep 17 00:00:00 2001 From: Raj Victor Date: Fri, 19 Apr 2024 04:08:51 -0400 Subject: ice: Adjust the VSI/Aggregator layers Adjust the VSI/Aggregator layers based on the number of logical layers supported by the FW. Currently the VSI and Aggregator layers are fixed based on the 9 layer scheduler tree layout. Due to performance reasons the number of layers of the scheduler tree is changing from 9 to 5. It requires a readjustment of these VSI/Aggregator layer values. Signed-off-by: Raj Victor Co-developed-by: Michal Wilczynski Signed-off-by: Michal Wilczynski Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sched.c | 37 +++++++++++++++--------------- 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index a1525992d14b..ecf8f5d60292 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1128,12 +1128,11 @@ u8 ice_sched_get_vsi_layer(struct ice_hw *hw) * 5 or less sw_entry_point_layer */ /* calculate the VSI layer based on number of layers. */ - if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) { - u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET; - - if (layer > hw->sw_entry_point_layer) - return layer; - } + if (hw->num_tx_sched_layers == ICE_SCHED_9_LAYERS) + return hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET; + else if (hw->num_tx_sched_layers == ICE_SCHED_5_LAYERS) + /* qgroup and VSI layers are same */ + return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET; return hw->sw_entry_point_layer; } @@ -1150,13 +1149,10 @@ u8 ice_sched_get_agg_layer(struct ice_hw *hw) * 7 or less sw_entry_point_layer */ /* calculate the aggregator layer based on number of layers. */ - if (hw->num_tx_sched_layers > ICE_AGG_LAYER_OFFSET + 1) { - u8 layer = hw->num_tx_sched_layers - ICE_AGG_LAYER_OFFSET; - - if (layer > hw->sw_entry_point_layer) - return layer; - } - return hw->sw_entry_point_layer; + if (hw->num_tx_sched_layers == ICE_SCHED_9_LAYERS) + return hw->num_tx_sched_layers - ICE_AGG_LAYER_OFFSET; + else + return hw->sw_entry_point_layer; } /** @@ -1510,10 +1506,11 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc, { struct ice_sched_node *vsi_node, *qgrp_node; struct ice_vsi_ctx *vsi_ctx; + u8 qgrp_layer, vsi_layer; u16 max_children; - u8 qgrp_layer; qgrp_layer = ice_sched_get_qgrp_layer(pi->hw); + vsi_layer = ice_sched_get_vsi_layer(pi->hw); max_children = pi->hw->max_children[qgrp_layer]; vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle); @@ -1524,6 +1521,12 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc, if (!vsi_node) return NULL; + /* If the queue group and VSI layer are same then queues + * are all attached directly to VSI + */ + if (qgrp_layer == vsi_layer) + return vsi_node; + /* get the first queue group node from VSI sub-tree */ qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer); while (qgrp_node) { @@ -3199,7 +3202,7 @@ ice_sched_add_rl_profile(struct ice_port_info *pi, u8 profile_type; int status; - if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM) + if (!pi || layer_num >= pi->hw->num_tx_sched_layers) return NULL; switch (rl_type) { case ICE_MIN_BW: @@ -3215,8 +3218,6 @@ ice_sched_add_rl_profile(struct ice_port_info *pi, return NULL; } - if (!pi) - return NULL; hw = pi->hw; list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num], list_entry) @@ -3446,7 +3447,7 @@ ice_sched_rm_rl_profile(struct ice_port_info *pi, u8 layer_num, u8 profile_type, struct ice_aqc_rl_profile_info *rl_prof_elem; int status = 0; - if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM) + if (layer_num >= pi->hw->num_tx_sched_layers) return -EINVAL; /* Check the existing list for RL profile */ list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num], -- cgit v1.2.3 From cc5776fe183208115e42c044497e193e4671a2b9 Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Fri, 19 Apr 2024 04:08:52 -0400 Subject: ice: Enable switching default Tx scheduler topology Introduce support for Tx scheduler topology change, based on user selection, from default 9-layer to 5-layer. Change requires NVM (version 3.20 or newer) and DDP package (OS Package 1.3.30 or newer - available for over a year in linux-firmware, since commit aed71f296637 in linux-firmware ("ice: Update package to 1.3.30.0")) https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/commit/?id=aed71f296637 Enable 5-layer topology switch in init path of the driver. To accomplish that upload of the DDP package needs to be delayed, until change in Tx topology is finished. To trigger the Tx change user selection should be changed in NVM using devlink. Then the platform should be rebooted. Signed-off-by: Michal Wilczynski Co-developed-by: Mateusz Polchlopek Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 108 ++++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2b173638b877..6adfd7e11460 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4453,11 +4453,13 @@ static char *ice_get_opt_fw_name(struct ice_pf *pf) /** * ice_request_fw - Device initialization routine * @pf: pointer to the PF instance + * @firmware: double pointer to firmware struct + * + * Return: zero when successful, negative values otherwise. */ -static void ice_request_fw(struct ice_pf *pf) +static int ice_request_fw(struct ice_pf *pf, const struct firmware **firmware) { char *opt_fw_filename = ice_get_opt_fw_name(pf); - const struct firmware *firmware = NULL; struct device *dev = ice_pf_to_dev(pf); int err = 0; @@ -4466,29 +4468,95 @@ static void ice_request_fw(struct ice_pf *pf) * and warning messages for other errors. */ if (opt_fw_filename) { - err = firmware_request_nowarn(&firmware, opt_fw_filename, dev); - if (err) { - kfree(opt_fw_filename); - goto dflt_pkg_load; - } - - /* request for firmware was successful. Download to device */ - ice_load_pkg(firmware, pf); + err = firmware_request_nowarn(firmware, opt_fw_filename, dev); kfree(opt_fw_filename); - release_firmware(firmware); - return; + if (!err) + return err; } + err = request_firmware(firmware, ICE_DDP_PKG_FILE, dev); + if (err) + dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n"); + + return err; +} + +/** + * ice_init_tx_topology - performs Tx topology initialization + * @hw: pointer to the hardware structure + * @firmware: pointer to firmware structure + * + * Return: zero when init was successful, negative values otherwise. + */ +static int +ice_init_tx_topology(struct ice_hw *hw, const struct firmware *firmware) +{ + u8 num_tx_sched_layers = hw->num_tx_sched_layers; + struct ice_pf *pf = hw->back; + struct device *dev; + u8 *buf_copy; + int err; + + dev = ice_pf_to_dev(pf); + /* ice_cfg_tx_topo buf argument is not a constant, + * so we have to make a copy + */ + buf_copy = kmemdup(firmware->data, firmware->size, GFP_KERNEL); + + err = ice_cfg_tx_topo(hw, buf_copy, firmware->size); + if (!err) { + if (hw->num_tx_sched_layers > num_tx_sched_layers) + dev_info(dev, "Tx scheduling layers switching feature disabled\n"); + else + dev_info(dev, "Tx scheduling layers switching feature enabled\n"); + /* if there was a change in topology ice_cfg_tx_topo triggered + * a CORER and we need to re-init hw + */ + ice_deinit_hw(hw); + err = ice_init_hw(hw); -dflt_pkg_load: - err = request_firmware(&firmware, ICE_DDP_PKG_FILE, dev); + return err; + } else if (err == -EIO) { + dev_info(dev, "DDP package does not support Tx scheduling layers switching feature - please update to the latest DDP package and try again\n"); + } + + return 0; +} + +/** + * ice_init_ddp_config - DDP related configuration + * @hw: pointer to the hardware structure + * @pf: pointer to pf structure + * + * This function loads DDP file from the disk, then initializes Tx + * topology. At the end DDP package is loaded on the card. + * + * Return: zero when init was successful, negative values otherwise. + */ +static int ice_init_ddp_config(struct ice_hw *hw, struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + const struct firmware *firmware = NULL; + int err; + + err = ice_request_fw(pf, &firmware); if (err) { - dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n"); - return; + dev_err(dev, "Fail during requesting FW: %d\n", err); + return err; + } + + err = ice_init_tx_topology(hw, firmware); + if (err) { + dev_err(dev, "Fail during initialization of Tx topology: %d\n", + err); + release_firmware(firmware); + return err; } - /* request for firmware was successful. Download to device */ + /* Download firmware to device */ ice_load_pkg(firmware, pf); release_firmware(firmware); + + return 0; } /** @@ -4661,9 +4729,11 @@ int ice_init_dev(struct ice_pf *pf) ice_init_feature_support(pf); - ice_request_fw(pf); + err = ice_init_ddp_config(hw, pf); + if (err) + return err; - /* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be + /* if ice_init_ddp_config fails, ICE_FLAG_ADV_FEATURES bit won't be * set in pf->state, which will cause ice_is_safe_mode to return * true */ -- cgit v1.2.3 From 109eb29172847df26dca40ac095a3e1b17bd4a44 Mon Sep 17 00:00:00 2001 From: Lukasz Czapnik Date: Fri, 19 Apr 2024 04:08:53 -0400 Subject: ice: Add tx_scheduling_layers devlink param It was observed that Tx performance was inconsistent across all queues and/or VSIs and that it was directly connected to existing 9-layer topology of the Tx scheduler. Introduce new private devlink param - tx_scheduling_layers. This parameter gives user flexibility to choose the 5-layer transmit scheduler topology which helps to smooth out the transmit performance. Allowed parameter values are 5 and 9. Example usage: Show: devlink dev param show pci/0000:4b:00.0 name tx_scheduling_layers pci/0000:4b:00.0: name tx_scheduling_layers type driver-specific values: cmode permanent value 9 Set: devlink dev param set pci/0000:4b:00.0 name tx_scheduling_layers value 5 cmode permanent devlink dev param set pci/0000:4b:00.0 name tx_scheduling_layers value 9 cmode permanent Signed-off-by: Lukasz Czapnik Reviewed-by: Przemek Kitszel Co-developed-by: Mateusz Polchlopek Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/devlink/devlink.c | 172 ++++++++++++++++++++++- drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 9 ++ drivers/net/ethernet/intel/ice/ice_fw_update.c | 7 +- drivers/net/ethernet/intel/ice/ice_fw_update.h | 3 + drivers/net/ethernet/intel/ice/ice_nvm.c | 7 +- drivers/net/ethernet/intel/ice/ice_nvm.h | 3 + 6 files changed, 191 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index acbace240977..d191c5709899 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -523,6 +523,156 @@ ice_devlink_reload_empr_finish(struct ice_pf *pf, return 0; } +/** + * ice_get_tx_topo_user_sel - Read user's choice from flash + * @pf: pointer to pf structure + * @layers: value read from flash will be saved here + * + * Reads user's preference for Tx Scheduler Topology Tree from PFA TLV. + * + * Return: zero when read was successful, negative values otherwise. + */ +static int ice_get_tx_topo_user_sel(struct ice_pf *pf, uint8_t *layers) +{ + struct ice_aqc_nvm_tx_topo_user_sel usr_sel = {}; + struct ice_hw *hw = &pf->hw; + int err; + + err = ice_acquire_nvm(hw, ICE_RES_READ); + if (err) + return err; + + err = ice_aq_read_nvm(hw, ICE_AQC_NVM_TX_TOPO_MOD_ID, 0, + sizeof(usr_sel), &usr_sel, true, true, NULL); + if (err) + goto exit_release_res; + + if (usr_sel.data & ICE_AQC_NVM_TX_TOPO_USER_SEL) + *layers = ICE_SCHED_5_LAYERS; + else + *layers = ICE_SCHED_9_LAYERS; + +exit_release_res: + ice_release_nvm(hw); + + return err; +} + +/** + * ice_update_tx_topo_user_sel - Save user's preference in flash + * @pf: pointer to pf structure + * @layers: value to be saved in flash + * + * Variable "layers" defines user's preference about number of layers in Tx + * Scheduler Topology Tree. This choice should be stored in PFA TLV field + * and be picked up by driver, next time during init. + * + * Return: zero when save was successful, negative values otherwise. + */ +static int ice_update_tx_topo_user_sel(struct ice_pf *pf, int layers) +{ + struct ice_aqc_nvm_tx_topo_user_sel usr_sel = {}; + struct ice_hw *hw = &pf->hw; + int err; + + err = ice_acquire_nvm(hw, ICE_RES_WRITE); + if (err) + return err; + + err = ice_aq_read_nvm(hw, ICE_AQC_NVM_TX_TOPO_MOD_ID, 0, + sizeof(usr_sel), &usr_sel, true, true, NULL); + if (err) + goto exit_release_res; + + if (layers == ICE_SCHED_5_LAYERS) + usr_sel.data |= ICE_AQC_NVM_TX_TOPO_USER_SEL; + else + usr_sel.data &= ~ICE_AQC_NVM_TX_TOPO_USER_SEL; + + err = ice_write_one_nvm_block(pf, ICE_AQC_NVM_TX_TOPO_MOD_ID, 2, + sizeof(usr_sel.data), &usr_sel.data, + true, NULL, NULL); +exit_release_res: + ice_release_nvm(hw); + + return err; +} + +/** + * ice_devlink_tx_sched_layers_get - Get tx_scheduling_layers parameter + * @devlink: pointer to the devlink instance + * @id: the parameter ID to set + * @ctx: context to store the parameter value + * + * Return: zero on success and negative value on failure. + */ +static int ice_devlink_tx_sched_layers_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct ice_pf *pf = devlink_priv(devlink); + int err; + + err = ice_get_tx_topo_user_sel(pf, &ctx->val.vu8); + if (err) + return err; + + return 0; +} + +/** + * ice_devlink_tx_sched_layers_set - Set tx_scheduling_layers parameter + * @devlink: pointer to the devlink instance + * @id: the parameter ID to set + * @ctx: context to get the parameter value + * @extack: netlink extended ACK structure + * + * Return: zero on success and negative value on failure. + */ +static int ice_devlink_tx_sched_layers_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + int err; + + err = ice_update_tx_topo_user_sel(pf, ctx->val.vu8); + if (err) + return err; + + NL_SET_ERR_MSG_MOD(extack, + "Tx scheduling layers have been changed on this device. You must do the PCI slot powercycle for the change to take effect."); + + return 0; +} + +/** + * ice_devlink_tx_sched_layers_validate - Validate passed tx_scheduling_layers + * parameter value + * @devlink: unused pointer to devlink instance + * @id: the parameter ID to validate + * @val: value to validate + * @extack: netlink extended ACK structure + * + * Supported values are: + * - 5 - five layers Tx Scheduler Topology Tree + * - 9 - nine layers Tx Scheduler Topology Tree + * + * Return: zero when passed parameter value is supported. Negative value on + * error. + */ +static int ice_devlink_tx_sched_layers_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + if (val.vu8 != ICE_SCHED_5_LAYERS && val.vu8 != ICE_SCHED_9_LAYERS) { + NL_SET_ERR_MSG_MOD(extack, + "Wrong number of tx scheduler layers provided."); + return -EINVAL; + } + + return 0; +} + /** * ice_tear_down_devlink_rate_tree - removes devlink-rate exported tree * @pf: pf struct @@ -1235,6 +1385,11 @@ ice_devlink_enable_iw_validate(struct devlink *devlink, u32 id, return 0; } +enum ice_param_id { + ICE_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS, +}; + static const struct devlink_param ice_devlink_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_RUNTIME), ice_devlink_enable_roce_get, @@ -1244,7 +1399,13 @@ static const struct devlink_param ice_devlink_params[] = { ice_devlink_enable_iw_get, ice_devlink_enable_iw_set, ice_devlink_enable_iw_validate), - + DEVLINK_PARAM_DRIVER(ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS, + "tx_scheduling_layers", + DEVLINK_PARAM_TYPE_U8, + BIT(DEVLINK_PARAM_CMODE_PERMANENT), + ice_devlink_tx_sched_layers_get, + ice_devlink_tx_sched_layers_set, + ice_devlink_tx_sched_layers_validate), }; static void ice_devlink_free(void *devlink_ptr) @@ -1304,9 +1465,16 @@ void ice_devlink_unregister(struct ice_pf *pf) int ice_devlink_register_params(struct ice_pf *pf) { struct devlink *devlink = priv_to_devlink(pf); + struct ice_hw *hw = &pf->hw; + size_t params_size; + + params_size = ARRAY_SIZE(ice_devlink_params); + + if (!hw->func_caps.common_cap.tx_sched_topo_comp_mode_en) + params_size--; return devl_params_register(devlink, ice_devlink_params, - ARRAY_SIZE(ice_devlink_params)); + params_size); } void ice_devlink_unregister_params(struct ice_pf *pf) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 0487c425ae24..e76c388b9905 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1684,6 +1684,15 @@ struct ice_aqc_nvm { #define ICE_AQC_NVM_START_POINT 0 +#define ICE_AQC_NVM_TX_TOPO_MOD_ID 0x14B + +struct ice_aqc_nvm_tx_topo_user_sel { + __le16 length; + u8 data; +#define ICE_AQC_NVM_TX_TOPO_USER_SEL BIT(4) + u8 reserved; +}; + /* NVM Checksum Command (direct, 0x0706) */ struct ice_aqc_nvm_checksum { u8 flags; diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c index 319a2d6fe26c..f81db6c107c8 100644 --- a/drivers/net/ethernet/intel/ice/ice_fw_update.c +++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c @@ -286,10 +286,9 @@ ice_send_component_table(struct pldmfw *context, struct pldmfw_component *compon * * Returns: zero on success, or a negative error code on failure. */ -static int -ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset, - u16 block_size, u8 *block, bool last_cmd, - u8 *reset_level, struct netlink_ext_ack *extack) +int ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset, + u16 block_size, u8 *block, bool last_cmd, + u8 *reset_level, struct netlink_ext_ack *extack) { u16 completion_module, completion_retval; struct device *dev = ice_pf_to_dev(pf); diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.h b/drivers/net/ethernet/intel/ice/ice_fw_update.h index 750574885716..04b200462757 100644 --- a/drivers/net/ethernet/intel/ice/ice_fw_update.h +++ b/drivers/net/ethernet/intel/ice/ice_fw_update.h @@ -9,5 +9,8 @@ int ice_devlink_flash_update(struct devlink *devlink, struct netlink_ext_ack *extack); int ice_get_pending_updates(struct ice_pf *pf, u8 *pending, struct netlink_ext_ack *extack); +int ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset, + u16 block_size, u8 *block, bool last_cmd, + u8 *reset_level, struct netlink_ext_ack *extack); #endif diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index d4e05d2cb30c..84eab92dc03c 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -18,10 +18,9 @@ * * Read the NVM using the admin queue commands (0x0701) */ -static int -ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length, - void *data, bool last_command, bool read_shadow_ram, - struct ice_sq_cd *cd) +int ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, + u16 length, void *data, bool last_command, + bool read_shadow_ram, struct ice_sq_cd *cd) { struct ice_aq_desc desc; struct ice_aqc_nvm *cmd; diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h index 774c2317967d..63cdc6bdac58 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.h +++ b/drivers/net/ethernet/intel/ice/ice_nvm.h @@ -14,6 +14,9 @@ struct ice_orom_civd_info { int ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access); void ice_release_nvm(struct ice_hw *hw); +int ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, + u16 length, void *data, bool last_command, + bool read_shadow_ram, struct ice_sq_cd *cd); int ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data, bool read_shadow_ram); -- cgit v1.2.3 From 7ec56914d3ac0a84a71836296ffd35df0e88007b Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Fri, 19 Apr 2024 11:04:41 +0300 Subject: net/mlx5e: Move DIM function declarations to en/dim.h Create a header specifically for DIM-related declarations. Move existing DIM-specific functionality from en.h. Future DIM-related functionality will be declared in en/dim.h in subsequent patches. Co-developed-by: Nabil S. Alramli Signed-off-by: Nabil S. Alramli Co-developed-by: Joe Damato Signed-off-by: Joe Damato Signed-off-by: Rahul Rameshbabu Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240419080445.417574-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 -- drivers/net/ethernet/mellanox/mlx5/core/en/dim.h | 15 +++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_dim.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 + 5 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/dim.h (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 2acd1ebb0888..1c98199b5267 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1210,8 +1210,6 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); -void mlx5e_rx_dim_work(struct work_struct *work); -void mlx5e_tx_dim_work(struct work_struct *work); void mlx5e_set_xdp_feature(struct net_device *netdev); netdev_features_t mlx5e_features_check(struct sk_buff *skb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dim.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dim.h new file mode 100644 index 000000000000..cd2cf647c85a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dim.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved */ + +#ifndef __MLX5_EN_DIM_H__ +#define __MLX5_EN_DIM_H__ + +#include + +/* Forward declarations */ +struct work_struct; + +void mlx5e_rx_dim_work(struct work_struct *work); +void mlx5e_tx_dim_work(struct work_struct *work); + +#endif /* __MLX5_EN_DIM_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index ca9cfbf57d8f..df692e29ab8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -30,8 +30,8 @@ * SOFTWARE. */ -#include #include "en.h" +#include "en/dim.h" static void mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 5cca04796d74..75bf7f3d9f25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -33,6 +33,7 @@ #include #include "en.h" +#include "en/dim.h" #include "en/port.h" #include "en/params.h" #include "en/ptp.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 78ff737f0378..cf529f07faf1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -43,6 +43,7 @@ #include #include "eswitch.h" #include "en.h" +#include "en/dim.h" #include "en/txrx.h" #include "en_tc.h" #include "en_rep.h" -- cgit v1.2.3 From eca1e8a62888a31c8dcfad79457232594be40c1a Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Fri, 19 Apr 2024 11:04:42 +0300 Subject: net/mlx5e: Use DIM constants for CQ period mode parameter Use core DIM CQ period mode enum values for the CQ parameter for the period mode. Translate the value to the specific mlx5 device constant for the selected period mode when creating a CQ. Avoid needing to translate mlx5 device constants to DIM constants for core DIM functionality. Co-developed-by: Nabil S. Alramli Signed-off-by: Nabil S. Alramli Co-developed-by: Joe Damato Signed-off-by: Joe Damato Signed-off-by: Rahul Rameshbabu Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240419080445.417574-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/dim.h | 26 +++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en/params.c | 34 +++++++--------------- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 16 +++------- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 20 ++++--------- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 7 +++-- include/linux/mlx5/mlx5_ifc.h | 4 +-- 6 files changed, 53 insertions(+), 54 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dim.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dim.h index cd2cf647c85a..6411ae4c6b94 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/dim.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dim.h @@ -4,11 +4,37 @@ #ifndef __MLX5_EN_DIM_H__ #define __MLX5_EN_DIM_H__ +#include #include +#include /* Forward declarations */ struct work_struct; +/* convert a boolean value for cqe mode to appropriate dim constant + * true : DIM_CQ_PERIOD_MODE_START_FROM_CQE + * false : DIM_CQ_PERIOD_MODE_START_FROM_EQE + */ +static inline int mlx5e_dim_cq_period_mode(bool start_from_cqe) +{ + return start_from_cqe ? DIM_CQ_PERIOD_MODE_START_FROM_CQE : + DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +static inline enum mlx5_cq_period_mode +mlx5e_cq_period_mode(enum dim_cq_period_mode cq_period_mode) +{ + switch (cq_period_mode) { + case DIM_CQ_PERIOD_MODE_START_FROM_EQE: + return MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + case DIM_CQ_PERIOD_MODE_START_FROM_CQE: + return MLX5_CQ_PERIOD_MODE_START_FROM_CQE; + default: + WARN_ON_ONCE(true); + return MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + } +} + void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index a3f31d9d527e..0424628405e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -6,6 +6,7 @@ #include "en/port.h" #include "en_accel/en_accel.h" #include "en_accel/ipsec.h" +#include #include #include @@ -520,7 +521,7 @@ static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) moder.cq_period_mode = cq_period_mode; moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + if (cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE) moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; return moder; @@ -533,39 +534,26 @@ static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) moder.cq_period_mode = cq_period_mode; moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + if (cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE) moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; return moder; } -static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) -{ - return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? - DIM_CQ_PERIOD_MODE_START_FROM_CQE : - DIM_CQ_PERIOD_MODE_START_FROM_EQE; -} - void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode) { - if (params->tx_dim_enabled) { - u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); - - params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode); - } else { + if (params->tx_dim_enabled) + params->tx_cq_moderation = net_dim_get_def_tx_moderation(cq_period_mode); + else params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); - } } void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode) { - if (params->rx_dim_enabled) { - u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); - - params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode); - } else { + if (params->rx_dim_enabled) + params->rx_cq_moderation = net_dim_get_def_rx_moderation(cq_period_mode); + else params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); - } } void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) @@ -573,7 +561,7 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) mlx5e_reset_tx_moderation(params, cq_period_mode); MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, params->tx_cq_moderation.cq_period_mode == - MLX5_CQ_PERIOD_MODE_START_FROM_CQE); + DIM_CQ_PERIOD_MODE_START_FROM_CQE); } void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) @@ -581,7 +569,7 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) mlx5e_reset_rx_moderation(params, cq_period_mode); MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, params->rx_cq_moderation.cq_period_mode == - MLX5_CQ_PERIOD_MODE_START_FROM_CQE); + DIM_CQ_PERIOD_MODE_START_FROM_CQE); } bool slow_pci_heuristic(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 75bf7f3d9f25..c07785e675bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include #include #include "en.h" @@ -627,15 +628,6 @@ mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coal } } -/* convert a boolean value of cq_mode to mlx5 period mode - * true : MLX5_CQ_PERIOD_MODE_START_FROM_CQE - * false : MLX5_CQ_PERIOD_MODE_START_FROM_EQE - */ -static int cqe_mode_to_period_mode(bool val) -{ - return val ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; -} - int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal, struct kernel_ethtool_coalesce *kernel_coal, @@ -688,13 +680,13 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled; - cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_rx); + cq_period_mode = mlx5e_dim_cq_period_mode(kernel_coal->use_cqe_mode_rx); if (cq_period_mode != rx_moder->cq_period_mode) { mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode); reset_rx = true; } - cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_tx); + cq_period_mode = mlx5e_dim_cq_period_mode(kernel_coal->use_cqe_mode_tx); if (cq_period_mode != tx_moder->cq_period_mode) { mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode); reset_tx = true; @@ -1915,7 +1907,7 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) return -EOPNOTSUPP; - cq_period_mode = cqe_mode_to_period_mode(enable); + cq_period_mode = mlx5e_dim_cq_period_mode(enable); current_cq_period_mode = is_rx_cq ? priv->channels.params.rx_cq_moderation.cq_period_mode : diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cf529f07faf1..12d1f4548343 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include #include #include #include @@ -962,15 +963,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, } INIT_WORK(&rq->dim.work, mlx5e_rx_dim_work); - - switch (params->rx_cq_moderation.cq_period_mode) { - case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: - rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; - break; - case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: - default: - rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; - } + rq->dim.mode = params->rx_cq_moderation.cq_period_mode; return 0; @@ -2090,7 +2083,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); - MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); + MLX5_SET(cqc, cqc, cq_period_mode, mlx5e_cq_period_mode(param->cq_period_mode)); MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - @@ -5059,13 +5052,12 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ - rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? - MLX5_CQ_PERIOD_MODE_START_FROM_CQE : - MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + rx_cq_period_mode = + mlx5e_dim_cq_period_mode(MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)); params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode); - mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); + mlx5e_set_tx_cq_mode_params(params, DIM_CQ_PERIOD_MODE_START_FROM_EQE); /* TX inline */ mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 6acecf2e7cf6..309771300581 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include #include #include #include @@ -40,6 +41,7 @@ #include "eswitch.h" #include "en.h" +#include "en/dim.h" #include "en_rep.h" #include "en/params.h" #include "en/txrx.h" @@ -836,9 +838,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev) struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_params *params; - u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? - MLX5_CQ_PERIOD_MODE_START_FROM_CQE : - MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + u8 cq_period_mode = + mlx5e_dim_cq_period_mode(MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)); params = &priv->channels.params; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 35ffc9b9f241..8c7ddb22bf20 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4385,10 +4385,10 @@ enum { MLX5_CQC_ST_FIRED = 0xa, }; -enum { +enum mlx5_cq_period_mode { MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, - MLX5_CQ_PERIOD_NUM_MODES + MLX5_CQ_PERIOD_NUM_MODES, }; struct mlx5_ifc_cqc_bits { -- cgit v1.2.3 From a5e89a3f353b931e917b77e7cf6719c8e8d64c54 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Fri, 19 Apr 2024 11:04:43 +0300 Subject: net/mlx5e: Dynamically allocate DIM structure for SQs/RQs Make it possible for the DIM structure to be torn down while an SQ or RQ is still active. Changing the CQ period mode is an example where the previous sampling done with the DIM structure would need to be invalidated. Co-developed-by: Nabil S. Alramli Signed-off-by: Nabil S. Alramli Co-developed-by: Joe Damato Signed-off-by: Joe Damato Signed-off-by: Rahul Rameshbabu Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240419080445.417574-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 +-- drivers/net/ethernet/mellanox/mlx5/core/en_dim.c | 4 +-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 31 ++++++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 4 +-- 4 files changed, 31 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 1c98199b5267..c8c0a0614e7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -430,7 +430,7 @@ struct mlx5e_txqsq { u16 cc; u16 skb_fifo_cc; u32 dma_fifo_cc; - struct dim dim; /* Adaptive Moderation */ + struct dim *dim; /* Adaptive Moderation */ /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; @@ -722,7 +722,7 @@ struct mlx5e_rq { int ix; unsigned int hw_mtu; - struct dim dim; /* Dynamic Interrupt Moderation */ + struct dim *dim; /* Dynamic Interrupt Moderation */ /* XDP */ struct bpf_prog __rcu *xdp_prog; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index df692e29ab8a..106a1f70dd9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -44,7 +44,7 @@ mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder, void mlx5e_rx_dim_work(struct work_struct *work) { struct dim *dim = container_of(work, struct dim, work); - struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); + struct mlx5e_rq *rq = dim->priv; struct dim_cq_moder cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); @@ -54,7 +54,7 @@ void mlx5e_rx_dim_work(struct work_struct *work) void mlx5e_tx_dim_work(struct work_struct *work) { struct dim *dim = container_of(work, struct dim, work); - struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); + struct mlx5e_txqsq *sq = dim->priv; struct dim_cq_moder cur_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 12d1f4548343..8b4ecae0fd9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -962,11 +962,20 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, } } - INIT_WORK(&rq->dim.work, mlx5e_rx_dim_work); - rq->dim.mode = params->rx_cq_moderation.cq_period_mode; + rq->dim = kvzalloc_node(sizeof(*rq->dim), GFP_KERNEL, node); + if (!rq->dim) { + err = -ENOMEM; + goto err_unreg_xdp_rxq_info; + } + + rq->dim->priv = rq; + INIT_WORK(&rq->dim->work, mlx5e_rx_dim_work); + rq->dim->mode = params->rx_cq_moderation.cq_period_mode; return 0; +err_unreg_xdp_rxq_info: + xdp_rxq_info_unreg(&rq->xdp_rxq); err_destroy_page_pool: page_pool_destroy(rq->page_pool); err_free_by_rq_type: @@ -1014,6 +1023,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) mlx5e_free_wqe_alloc_info(rq); } + kvfree(rq->dim); xdp_rxq_info_unreg(&rq->xdp_rxq); page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); @@ -1341,7 +1351,7 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { - cancel_work_sync(&rq->dim.work); + cancel_work_sync(&rq->dim->work); cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); @@ -1616,12 +1626,20 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); if (err) goto err_sq_wq_destroy; + sq->dim = kvzalloc_node(sizeof(*sq->dim), GFP_KERNEL, cpu_to_node(c->cpu)); + if (!sq->dim) { + err = -ENOMEM; + goto err_free_txqsq_db; + } - INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work); - sq->dim.mode = params->tx_cq_moderation.cq_period_mode; + sq->dim->priv = sq; + INIT_WORK(&sq->dim->work, mlx5e_tx_dim_work); + sq->dim->mode = params->tx_cq_moderation.cq_period_mode; return 0; +err_free_txqsq_db: + mlx5e_free_txqsq_db(sq); err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); @@ -1630,6 +1648,7 @@ err_sq_wq_destroy: void mlx5e_free_txqsq(struct mlx5e_txqsq *sq) { + kvfree(sq->dim); mlx5e_free_txqsq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); } @@ -1841,7 +1860,7 @@ void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) struct mlx5_core_dev *mdev = sq->mdev; struct mlx5_rate_limit rl = {0}; - cancel_work_sync(&sq->dim.work); + cancel_work_sync(&sq->dim->work); cancel_work_sync(&sq->recover_work); mlx5e_destroy_sq(mdev, sq->sqn); if (sq->rate_limit) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index a7d9b7cb4297..5873fde65c2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -55,7 +55,7 @@ static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) return; dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); - net_dim(&sq->dim, dim_sample); + net_dim(sq->dim, dim_sample); } static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) @@ -67,7 +67,7 @@ static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) return; dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); - net_dim(&rq->dim, dim_sample); + net_dim(rq->dim, dim_sample); } void mlx5e_trigger_irq(struct mlx5e_icosq *sq) -- cgit v1.2.3 From 445a25f6e1a2f6a132b06af6ede4f3c9b5f9af68 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Fri, 19 Apr 2024 11:04:44 +0300 Subject: net/mlx5e: Support updating coalescing configuration without resetting channels When CQE mode or DIM state is changed, gracefully reconfigure channels to handle new configuration. Previously, would create new channels that would reflect the changes rather than update the original channels. Co-developed-by: Nabil S. Alramli Signed-off-by: Nabil S. Alramli Co-developed-by: Joe Damato Signed-off-by: Joe Damato Signed-off-by: Rahul Rameshbabu Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240419080445.417574-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 20 ++ .../net/ethernet/mellanox/mlx5/core/en/channels.c | 83 ++++++++ .../net/ethernet/mellanox/mlx5/core/en/channels.h | 4 + drivers/net/ethernet/mellanox/mlx5/core/en/dim.h | 4 + .../net/ethernet/mellanox/mlx5/core/en/params.c | 58 ------ .../net/ethernet/mellanox/mlx5/core/en/params.h | 5 - drivers/net/ethernet/mellanox/mlx5/core/en_dim.c | 89 ++++++++- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 141 ++++++++------ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 210 +++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 5 +- include/linux/mlx5/cq.h | 7 +- include/linux/mlx5/mlx5_ifc.h | 3 +- 12 files changed, 460 insertions(+), 169 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c8c0a0614e7b..eb09778327cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -320,6 +320,8 @@ struct mlx5e_params { bool scatter_fcs_en; bool rx_dim_enabled; bool tx_dim_enabled; + bool rx_moder_use_cqe_mode; + bool tx_moder_use_cqe_mode; u32 pflags; struct bpf_prog *xdp_prog; struct mlx5e_xsk *xsk; @@ -797,6 +799,10 @@ struct mlx5e_channel { int cpu; /* Sync between icosq recovery and XSK enable/disable. */ struct mutex icosq_recovery_lock; + + /* coalescing configuration */ + struct dim_cq_moder rx_cq_moder; + struct dim_cq_moder tx_cq_moder; }; struct mlx5e_ptp; @@ -1040,6 +1046,11 @@ void mlx5e_close_rq(struct mlx5e_rq *rq); int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_counter); void mlx5e_destroy_rq(struct mlx5e_rq *rq); +bool mlx5e_reset_rx_moderation(struct dim_cq_moder *cq_moder, u8 cq_period_mode, + bool dim_enabled); +bool mlx5e_reset_rx_channels_moderation(struct mlx5e_channels *chs, u8 cq_period_mode, + bool dim_enabled, bool keep_dim_state); + struct mlx5e_sq_param; int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, @@ -1060,6 +1071,10 @@ int mlx5e_open_cq(struct mlx5_core_dev *mdev, struct dim_cq_moder moder, struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp, struct mlx5e_cq *cq); void mlx5e_close_cq(struct mlx5e_cq *cq); +int mlx5e_modify_cq_period_mode(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u8 cq_period_mode); +int mlx5e_modify_cq_moderation(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u16 cq_period, u16 cq_max_count, u8 cq_period_mode); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -1118,6 +1133,11 @@ int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); void mlx5e_close_txqsq(struct mlx5e_txqsq *sq); +bool mlx5e_reset_tx_moderation(struct dim_cq_moder *cq_moder, u8 cq_period_mode, + bool dim_enabled); +bool mlx5e_reset_tx_channels_moderation(struct mlx5e_channels *chs, u8 cq_period_mode, + bool dim_enabled, bool keep_dim_state); + static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) { return MLX5_CAP_ETH(mdev, swp) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c index 874a1016623c..66e719e88503 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c @@ -3,6 +3,7 @@ #include "channels.h" #include "en.h" +#include "en/dim.h" #include "en/ptp.h" unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs) @@ -55,3 +56,85 @@ bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn) *rqn = c->rq.rqn; return true; } + +int mlx5e_channels_rx_change_dim(struct mlx5e_channels *chs, bool enable) +{ + int i; + + for (i = 0; i < chs->num; i++) { + int err = mlx5e_dim_rx_change(&chs->c[i]->rq, enable); + + if (err) + return err; + } + + return 0; +} + +int mlx5e_channels_tx_change_dim(struct mlx5e_channels *chs, bool enable) +{ + int i, tc; + + for (i = 0; i < chs->num; i++) { + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&chs->params); tc++) { + int err = mlx5e_dim_tx_change(&chs->c[i]->sq[tc], enable); + + if (err) + return err; + } + } + + return 0; +} + +int mlx5e_channels_rx_toggle_dim(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) { + /* If dim is enabled for the channel, reset the dim state so the + * collected statistics will be reset. This is useful for + * supporting legacy interfaces that allow things like changing + * the CQ period mode for all channels without disturbing + * individual channel configurations. + */ + if (chs->c[i]->rq.dim) { + int err; + + mlx5e_dim_rx_change(&chs->c[i]->rq, false); + err = mlx5e_dim_rx_change(&chs->c[i]->rq, true); + if (err) + return err; + } + } + + return 0; +} + +int mlx5e_channels_tx_toggle_dim(struct mlx5e_channels *chs) +{ + int i, tc; + + for (i = 0; i < chs->num; i++) { + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&chs->params); tc++) { + int err; + + /* If dim is enabled for the channel, reset the dim + * state so the collected statistics will be reset. This + * is useful for supporting legacy interfaces that allow + * things like changing the CQ period mode for all + * channels without disturbing individual channel + * configurations. + */ + if (!chs->c[i]->sq[tc].dim) + continue; + + mlx5e_dim_tx_change(&chs->c[i]->sq[tc], false); + err = mlx5e_dim_tx_change(&chs->c[i]->sq[tc], true); + if (err) + return err; + } + } + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h index 6715aa9383b9..eda80f8c6c02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h @@ -15,5 +15,9 @@ void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn, u32 *vhca_id); bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn); +int mlx5e_channels_rx_change_dim(struct mlx5e_channels *chs, bool enabled); +int mlx5e_channels_tx_change_dim(struct mlx5e_channels *chs, bool enabled); +int mlx5e_channels_rx_toggle_dim(struct mlx5e_channels *chs); +int mlx5e_channels_tx_toggle_dim(struct mlx5e_channels *chs); #endif /* __MLX5_EN_CHANNELS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dim.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dim.h index 6411ae4c6b94..110e2c6b7e51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/dim.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dim.h @@ -9,6 +9,8 @@ #include /* Forward declarations */ +struct mlx5e_rq; +struct mlx5e_txqsq; struct work_struct; /* convert a boolean value for cqe mode to appropriate dim constant @@ -37,5 +39,7 @@ mlx5e_cq_period_mode(enum dim_cq_period_mode cq_period_mode) void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); +int mlx5e_dim_rx_change(struct mlx5e_rq *rq, bool enabled); +int mlx5e_dim_tx_change(struct mlx5e_txqsq *sq, bool enabled); #endif /* __MLX5_EN_DIM_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 0424628405e0..ec819dfc98be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -514,64 +514,6 @@ int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *param return 0; } -static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) -{ - struct dim_cq_moder moder = {}; - - moder.cq_period_mode = cq_period_mode; - moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; - moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - if (cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE) - moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; - - return moder; -} - -static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) -{ - struct dim_cq_moder moder = {}; - - moder.cq_period_mode = cq_period_mode; - moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; - moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; - if (cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE) - moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; - - return moder; -} - -void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode) -{ - if (params->tx_dim_enabled) - params->tx_cq_moderation = net_dim_get_def_tx_moderation(cq_period_mode); - else - params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); -} - -void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode) -{ - if (params->rx_dim_enabled) - params->rx_cq_moderation = net_dim_get_def_rx_moderation(cq_period_mode); - else - params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); -} - -void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) -{ - mlx5e_reset_tx_moderation(params, cq_period_mode); - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, - params->tx_cq_moderation.cq_period_mode == - DIM_CQ_PERIOD_MODE_START_FROM_CQE); -} - -void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) -{ - mlx5e_reset_rx_moderation(params, cq_period_mode); - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, - params->rx_cq_moderation.cq_period_mode == - DIM_CQ_PERIOD_MODE_START_FROM_CQE); -} - bool slow_pci_heuristic(struct mlx5_core_dev *mdev) { u32 link_speed = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 9a781f18b57f..749b2ec0436e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -77,11 +77,6 @@ u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, /* Parameter calculations */ -void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); - bool slow_pci_heuristic(struct mlx5_core_dev *mdev); int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params); int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index 106a1f70dd9a..298bb74ec5e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -37,7 +37,8 @@ static void mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder, struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) { - mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); + mlx5e_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts, + mlx5e_cq_period_mode(moder.cq_period_mode)); dim->state = DIM_START_MEASURE; } @@ -60,3 +61,89 @@ void mlx5e_tx_dim_work(struct work_struct *work) mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq); } + +static struct dim *mlx5e_dim_enable(struct mlx5_core_dev *mdev, + void (*work_fun)(struct work_struct *), int cpu, + u8 cq_period_mode, struct mlx5_core_cq *mcq, + void *queue) +{ + struct dim *dim; + int err; + + dim = kvzalloc_node(sizeof(*dim), GFP_KERNEL, cpu_to_node(cpu)); + if (!dim) + return ERR_PTR(-ENOMEM); + + INIT_WORK(&dim->work, work_fun); + + dim->mode = cq_period_mode; + dim->priv = queue; + + err = mlx5e_modify_cq_period_mode(mdev, mcq, dim->mode); + if (err) { + kvfree(dim); + return ERR_PTR(err); + } + + return dim; +} + +static void mlx5e_dim_disable(struct dim *dim) +{ + cancel_work_sync(&dim->work); + kvfree(dim); +} + +int mlx5e_dim_rx_change(struct mlx5e_rq *rq, bool enable) +{ + if (enable == !!rq->dim) + return 0; + + if (enable) { + struct mlx5e_channel *c = rq->channel; + struct dim *dim; + + dim = mlx5e_dim_enable(rq->mdev, mlx5e_rx_dim_work, c->cpu, + c->rx_cq_moder.cq_period_mode, &rq->cq.mcq, rq); + if (IS_ERR(dim)) + return PTR_ERR(dim); + + rq->dim = dim; + + __set_bit(MLX5E_RQ_STATE_DIM, &rq->state); + } else { + __clear_bit(MLX5E_RQ_STATE_DIM, &rq->state); + + mlx5e_dim_disable(rq->dim); + rq->dim = NULL; + } + + return 0; +} + +int mlx5e_dim_tx_change(struct mlx5e_txqsq *sq, bool enable) +{ + if (enable == !!sq->dim) + return 0; + + if (enable) { + struct mlx5e_channel *c = sq->channel; + struct dim *dim; + + dim = mlx5e_dim_enable(sq->mdev, mlx5e_tx_dim_work, c->cpu, + c->tx_cq_moder.cq_period_mode, &sq->cq.mcq, sq); + if (IS_ERR(dim)) + return PTR_ERR(dim); + + sq->dim = dim; + + __set_bit(MLX5E_SQ_STATE_DIM, &sq->state); + } else { + __clear_bit(MLX5E_SQ_STATE_DIM, &sq->state); + + mlx5e_dim_disable(sq->dim); + sq->dim = NULL; + } + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c07785e675bc..c968874569cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -34,6 +34,7 @@ #include #include "en.h" +#include "en/channels.h" #include "en/dim.h" #include "en/port.h" #include "en/params.h" @@ -567,16 +568,13 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, coal->rx_coalesce_usecs = rx_moder->usec; coal->rx_max_coalesced_frames = rx_moder->pkts; coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled; + kernel_coal->use_cqe_mode_rx = priv->channels.params.rx_moder_use_cqe_mode; tx_moder = &priv->channels.params.tx_cq_moderation; coal->tx_coalesce_usecs = tx_moder->usec; coal->tx_max_coalesced_frames = tx_moder->pkts; coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled; - - kernel_coal->use_cqe_mode_rx = - MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_BASED_MODER); - kernel_coal->use_cqe_mode_tx = - MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_CQE_BASED_MODER); + kernel_coal->use_cqe_mode_tx = priv->channels.params.tx_moder_use_cqe_mode; return 0; } @@ -595,7 +593,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev, #define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT static void -mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) +mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct dim_cq_moder *moder) { int tc; int i; @@ -603,28 +601,34 @@ mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coal for (i = 0; i < priv->channels.num; ++i) { struct mlx5e_channel *c = priv->channels.c[i]; struct mlx5_core_dev *mdev = c->mdev; + enum mlx5_cq_period_mode mode; + + mode = mlx5e_cq_period_mode(moder->cq_period_mode); + c->tx_cq_moder = *moder; for (tc = 0; tc < c->num_tc; tc++) { - mlx5_core_modify_cq_moderation(mdev, - &c->sq[tc].cq.mcq, - coal->tx_coalesce_usecs, - coal->tx_max_coalesced_frames); + mlx5e_modify_cq_moderation(mdev, &c->sq[tc].cq.mcq, + moder->usec, moder->pkts, + mode); } } } static void -mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) +mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct dim_cq_moder *moder) { int i; for (i = 0; i < priv->channels.num; ++i) { struct mlx5e_channel *c = priv->channels.c[i]; struct mlx5_core_dev *mdev = c->mdev; + enum mlx5_cq_period_mode mode; + + mode = mlx5e_cq_period_mode(moder->cq_period_mode); + c->rx_cq_moder = *moder; - mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq, - coal->rx_coalesce_usecs, - coal->rx_max_coalesced_frames); + mlx5e_modify_cq_moderation(mdev, &c->rq.cq.mcq, moder->usec, moder->pkts, + mode); } } @@ -635,13 +639,14 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, { struct dim_cq_moder *rx_moder, *tx_moder; struct mlx5_core_dev *mdev = priv->mdev; + bool rx_dim_enabled, tx_dim_enabled; struct mlx5e_params new_params; bool reset_rx, reset_tx; - bool reset = true; u8 cq_period_mode; int err = 0; - if (!MLX5_CAP_GEN(mdev, cq_moderation)) + if (!MLX5_CAP_GEN(mdev, cq_moderation) || + !MLX5_CAP_GEN(mdev, cq_period_mode_modify)) return -EOPNOTSUPP; if (coal->tx_coalesce_usecs > MLX5E_MAX_COAL_TIME || @@ -664,60 +669,70 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; + tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; + mutex_lock(&priv->state_lock); new_params = priv->channels.params; - rx_moder = &new_params.rx_cq_moderation; - rx_moder->usec = coal->rx_coalesce_usecs; - rx_moder->pkts = coal->rx_max_coalesced_frames; - new_params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; + cq_period_mode = mlx5e_dim_cq_period_mode(kernel_coal->use_cqe_mode_rx); + reset_rx = mlx5e_reset_rx_channels_moderation(&priv->channels, cq_period_mode, + rx_dim_enabled, false); + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_BASED_MODER, cq_period_mode); - tx_moder = &new_params.tx_cq_moderation; - tx_moder->usec = coal->tx_coalesce_usecs; - tx_moder->pkts = coal->tx_max_coalesced_frames; - new_params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; + cq_period_mode = mlx5e_dim_cq_period_mode(kernel_coal->use_cqe_mode_tx); + reset_tx = mlx5e_reset_tx_channels_moderation(&priv->channels, cq_period_mode, + tx_dim_enabled, false); + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_TX_CQE_BASED_MODER, cq_period_mode); - reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; - reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled; + reset_rx |= rx_dim_enabled != new_params.rx_dim_enabled; + reset_tx |= tx_dim_enabled != new_params.tx_dim_enabled; - cq_period_mode = mlx5e_dim_cq_period_mode(kernel_coal->use_cqe_mode_rx); - if (cq_period_mode != rx_moder->cq_period_mode) { - mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode); - reset_rx = true; - } + /* Solely used for global ethtool get coalesce */ + rx_moder = &new_params.rx_cq_moderation; + new_params.rx_dim_enabled = rx_dim_enabled; + new_params.rx_moder_use_cqe_mode = kernel_coal->use_cqe_mode_rx; - cq_period_mode = mlx5e_dim_cq_period_mode(kernel_coal->use_cqe_mode_tx); - if (cq_period_mode != tx_moder->cq_period_mode) { - mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode); - reset_tx = true; - } + tx_moder = &new_params.tx_cq_moderation; + new_params.tx_dim_enabled = tx_dim_enabled; + new_params.tx_moder_use_cqe_mode = kernel_coal->use_cqe_mode_tx; if (reset_rx) { - u8 mode = MLX5E_GET_PFLAG(&new_params, - MLX5E_PFLAG_RX_CQE_BASED_MODER); + mlx5e_channels_rx_change_dim(&priv->channels, false); + mlx5e_reset_rx_moderation(rx_moder, new_params.rx_moder_use_cqe_mode, + rx_dim_enabled); - mlx5e_reset_rx_moderation(&new_params, mode); + mlx5e_set_priv_channels_rx_coalesce(priv, rx_moder); + } else if (!rx_dim_enabled) { + rx_moder->usec = coal->rx_coalesce_usecs; + rx_moder->pkts = coal->rx_max_coalesced_frames; + + mlx5e_set_priv_channels_rx_coalesce(priv, rx_moder); } + if (reset_tx) { - u8 mode = MLX5E_GET_PFLAG(&new_params, - MLX5E_PFLAG_TX_CQE_BASED_MODER); + mlx5e_channels_tx_change_dim(&priv->channels, false); + mlx5e_reset_tx_moderation(tx_moder, new_params.tx_moder_use_cqe_mode, + tx_dim_enabled); - mlx5e_reset_tx_moderation(&new_params, mode); - } + mlx5e_set_priv_channels_tx_coalesce(priv, tx_moder); + } else if (!tx_dim_enabled) { + tx_moder->usec = coal->tx_coalesce_usecs; + tx_moder->pkts = coal->tx_max_coalesced_frames; - /* If DIM state hasn't changed, it's possible to modify interrupt - * moderation parameters on the fly, even if the channels are open. - */ - if (!reset_rx && !reset_tx && test_bit(MLX5E_STATE_OPENED, &priv->state)) { - if (!coal->use_adaptive_rx_coalesce) - mlx5e_set_priv_channels_rx_coalesce(priv, coal); - if (!coal->use_adaptive_tx_coalesce) - mlx5e_set_priv_channels_tx_coalesce(priv, coal); - reset = false; + mlx5e_set_priv_channels_tx_coalesce(priv, tx_moder); } - err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); + /* DIM enable/disable Rx and Tx channels */ + err = mlx5e_channels_rx_change_dim(&priv->channels, rx_dim_enabled); + if (err) + goto state_unlock; + err = mlx5e_channels_tx_change_dim(&priv->channels, tx_dim_enabled); + if (err) + goto state_unlock; + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, false); +state_unlock: mutex_unlock(&priv->state_lock); return err; } @@ -1917,12 +1932,22 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, return 0; new_params = priv->channels.params; - if (is_rx_cq) - mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode); - else - mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode); + if (is_rx_cq) { + mlx5e_reset_rx_channels_moderation(&priv->channels, cq_period_mode, + false, true); + mlx5e_channels_rx_toggle_dim(&priv->channels); + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_BASED_MODER, + cq_period_mode); + } else { + mlx5e_reset_tx_channels_moderation(&priv->channels, cq_period_mode, + false, true); + mlx5e_channels_tx_toggle_dim(&priv->channels); + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_TX_CQE_BASED_MODER, + cq_period_mode); + } - return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); + /* Update pflags of existing channels without resetting them */ + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, false); } static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8b4ecae0fd9f..3bd0695845c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -962,20 +962,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, } } - rq->dim = kvzalloc_node(sizeof(*rq->dim), GFP_KERNEL, node); - if (!rq->dim) { - err = -ENOMEM; - goto err_unreg_xdp_rxq_info; - } - - rq->dim->priv = rq; - INIT_WORK(&rq->dim->work, mlx5e_rx_dim_work); - rq->dim->mode = params->rx_cq_moderation.cq_period_mode; - return 0; -err_unreg_xdp_rxq_info: - xdp_rxq_info_unreg(&rq->xdp_rxq); err_destroy_page_pool: page_pool_destroy(rq->page_pool); err_free_by_rq_type: @@ -1304,8 +1292,21 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state); - if (params->rx_dim_enabled) - __set_bit(MLX5E_RQ_STATE_DIM, &rq->state); + if (rq->channel && !params->rx_dim_enabled) { + rq->channel->rx_cq_moder = params->rx_cq_moderation; + } else if (rq->channel) { + u8 cq_period_mode; + + cq_period_mode = params->rx_moder_use_cqe_mode ? + DIM_CQ_PERIOD_MODE_START_FROM_CQE : + DIM_CQ_PERIOD_MODE_START_FROM_EQE; + mlx5e_reset_rx_moderation(&rq->channel->rx_cq_moder, cq_period_mode, + params->rx_dim_enabled); + + err = mlx5e_dim_rx_change(rq, params->rx_dim_enabled); + if (err) + goto err_destroy_rq; + } /* We disable csum_complete when XDP is enabled since * XDP programs might manipulate packets which will render @@ -1351,7 +1352,8 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { - cancel_work_sync(&rq->dim->work); + if (rq->dim) + cancel_work_sync(&rq->dim->work); cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); @@ -1626,20 +1628,9 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); if (err) goto err_sq_wq_destroy; - sq->dim = kvzalloc_node(sizeof(*sq->dim), GFP_KERNEL, cpu_to_node(c->cpu)); - if (!sq->dim) { - err = -ENOMEM; - goto err_free_txqsq_db; - } - - sq->dim->priv = sq; - INIT_WORK(&sq->dim->work, mlx5e_tx_dim_work); - sq->dim->mode = params->tx_cq_moderation.cq_period_mode; return 0; -err_free_txqsq_db: - mlx5e_free_txqsq_db(sq); err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); @@ -1804,11 +1795,27 @@ int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, if (tx_rate) mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); - if (params->tx_dim_enabled) - sq->state |= BIT(MLX5E_SQ_STATE_DIM); + if (sq->channel && !params->tx_dim_enabled) { + sq->channel->tx_cq_moder = params->tx_cq_moderation; + } else if (sq->channel) { + u8 cq_period_mode; + + cq_period_mode = params->tx_moder_use_cqe_mode ? + DIM_CQ_PERIOD_MODE_START_FROM_CQE : + DIM_CQ_PERIOD_MODE_START_FROM_EQE; + mlx5e_reset_tx_moderation(&sq->channel->tx_cq_moder, + cq_period_mode, + params->tx_dim_enabled); + + err = mlx5e_dim_tx_change(sq, params->tx_dim_enabled); + if (err) + goto err_destroy_sq; + } return 0; +err_destroy_sq: + mlx5e_destroy_sq(c->mdev, sq->sqn); err_free_txqsq: mlx5e_free_txqsq(sq); @@ -1860,7 +1867,8 @@ void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) struct mlx5_core_dev *mdev = sq->mdev; struct mlx5_rate_limit rl = {0}; - cancel_work_sync(&sq->dim->work); + if (sq->dim) + cancel_work_sync(&sq->dim->work); cancel_work_sync(&sq->recover_work); mlx5e_destroy_sq(mdev, sq->sqn); if (sq->rate_limit) { @@ -1879,6 +1887,49 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_tx_err_cqe(sq); } +static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) +{ + return (struct dim_cq_moder) { + .cq_period_mode = cq_period_mode, + .pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS, + .usec = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE : + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC, + }; +} + +bool mlx5e_reset_tx_moderation(struct dim_cq_moder *cq_moder, u8 cq_period_mode, + bool dim_enabled) +{ + bool reset_needed = cq_moder->cq_period_mode != cq_period_mode; + + if (dim_enabled) + *cq_moder = net_dim_get_def_tx_moderation(cq_period_mode); + else + *cq_moder = mlx5e_get_def_tx_moderation(cq_period_mode); + + return reset_needed; +} + +bool mlx5e_reset_tx_channels_moderation(struct mlx5e_channels *chs, u8 cq_period_mode, + bool dim_enabled, bool keep_dim_state) +{ + bool reset = false; + int i, tc; + + for (i = 0; i < chs->num; i++) { + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&chs->params); tc++) { + if (keep_dim_state) + dim_enabled = !!chs->c[i]->sq[tc].dim; + + reset |= mlx5e_reset_tx_moderation(&chs->c[i]->tx_cq_moder, + cq_period_mode, dim_enabled); + } + } + + return reset; +} + static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, work_func_t recover_work_func) @@ -2102,7 +2153,8 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); - MLX5_SET(cqc, cqc, cq_period_mode, mlx5e_cq_period_mode(param->cq_period_mode)); + MLX5_SET(cqc, cqc, cq_period_mode, mlx5e_cq_period_mode(param->cq_period_mode)); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - @@ -2140,8 +2192,10 @@ int mlx5e_open_cq(struct mlx5_core_dev *mdev, struct dim_cq_moder moder, if (err) goto err_free_cq; - if (MLX5_CAP_GEN(mdev, cq_moderation)) - mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts); + if (MLX5_CAP_GEN(mdev, cq_moderation) && + MLX5_CAP_GEN(mdev, cq_period_mode_modify)) + mlx5e_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts, + mlx5e_cq_period_mode(moder.cq_period_mode)); return 0; err_free_cq: @@ -2156,6 +2210,40 @@ void mlx5e_close_cq(struct mlx5e_cq *cq) mlx5e_free_cq(cq); } +int mlx5e_modify_cq_period_mode(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u8 cq_period_mode) +{ + u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {}; + void *cqc; + + MLX5_SET(modify_cq_in, in, cqn, cq->cqn); + cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, cq_period_mode, mlx5e_cq_period_mode(cq_period_mode)); + MLX5_SET(modify_cq_in, in, + modify_field_select_resize_field_select.modify_field_select.modify_field_select, + MLX5_CQ_MODIFY_PERIOD_MODE); + + return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); +} + +int mlx5e_modify_cq_moderation(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u16 cq_period, u16 cq_max_count, u8 cq_period_mode) +{ + u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {}; + void *cqc; + + MLX5_SET(modify_cq_in, in, cqn, cq->cqn); + cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, cq_period, cq_period); + MLX5_SET(cqc, cqc, cq_max_count, cq_max_count); + MLX5_SET(cqc, cqc, cq_period_mode, cq_period_mode); + MLX5_SET(modify_cq_in, in, + modify_field_select_resize_field_select.modify_field_select.modify_field_select, + MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT | MLX5_CQ_MODIFY_PERIOD_MODE); + + return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); +} + static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_create_cq_param *ccp, @@ -3973,6 +4061,47 @@ static int set_feature_rx_all(struct net_device *netdev, bool enable) return mlx5_set_port_fcs(mdev, !enable); } +static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +{ + return (struct dim_cq_moder) { + .cq_period_mode = cq_period_mode, + .pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS, + .usec = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE : + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC, + }; +} + +bool mlx5e_reset_rx_moderation(struct dim_cq_moder *cq_moder, u8 cq_period_mode, + bool dim_enabled) +{ + bool reset_needed = cq_moder->cq_period_mode != cq_period_mode; + + if (dim_enabled) + *cq_moder = net_dim_get_def_rx_moderation(cq_period_mode); + else + *cq_moder = mlx5e_get_def_rx_moderation(cq_period_mode); + + return reset_needed; +} + +bool mlx5e_reset_rx_channels_moderation(struct mlx5e_channels *chs, u8 cq_period_mode, + bool dim_enabled, bool keep_dim_state) +{ + bool reset = false; + int i; + + for (i = 0; i < chs->num; i++) { + if (keep_dim_state) + dim_enabled = !!chs->c[i]->rq.dim; + + reset |= mlx5e_reset_rx_moderation(&chs->c[i]->rx_cq_moder, + cq_period_mode, dim_enabled); + } + + return reset; +} + static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable) { u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {}; @@ -5037,7 +5166,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 { struct mlx5e_params *params = &priv->channels.params; struct mlx5_core_dev *mdev = priv->mdev; - u8 rx_cq_period_mode; params->sw_mtu = mtu; params->hard_mtu = MLX5E_ETH_HARD_MTU; @@ -5071,12 +5199,16 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ - rx_cq_period_mode = - mlx5e_dim_cq_period_mode(MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)); - params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); - params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); - mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode); - mlx5e_set_tx_cq_mode_params(params, DIM_CQ_PERIOD_MODE_START_FROM_EQE); + params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation) && + MLX5_CAP_GEN(mdev, cq_period_mode_modify); + params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation) && + MLX5_CAP_GEN(mdev, cq_period_mode_modify); + params->rx_moder_use_cqe_mode = !!MLX5_CAP_GEN(mdev, cq_period_start_from_cqe); + params->tx_moder_use_cqe_mode = false; + mlx5e_reset_rx_moderation(¶ms->rx_cq_moderation, params->rx_moder_use_cqe_mode, + params->rx_dim_enabled); + mlx5e_reset_tx_moderation(¶ms->tx_cq_moderation, params->tx_moder_use_cqe_mode, + params->tx_dim_enabled); /* TX inline */ mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 309771300581..6477b91ff512 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -838,9 +838,6 @@ static void mlx5e_build_rep_params(struct net_device *netdev) struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_params *params; - u8 cq_period_mode = - mlx5e_dim_cq_period_mode(MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)); - params = &priv->channels.params; params->num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS; @@ -868,7 +865,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev) /* CQ moderation params */ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); - mlx5e_set_rx_cq_mode_params(params, cq_period_mode); + params->rx_moder_use_cqe_mode = !!MLX5_CAP_GEN(mdev, cq_period_start_from_cqe); params->mqprio.num_tc = 1; if (rep->vport != MLX5_VPORT_UPLINK) diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index cb15308b5cb0..991526039ccb 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -95,9 +95,10 @@ enum { }; enum { - MLX5_CQ_MODIFY_PERIOD = 1 << 0, - MLX5_CQ_MODIFY_COUNT = 1 << 1, - MLX5_CQ_MODIFY_OVERRUN = 1 << 2, + MLX5_CQ_MODIFY_PERIOD = BIT(0), + MLX5_CQ_MODIFY_COUNT = BIT(1), + MLX5_CQ_MODIFY_OVERRUN = BIT(2), + MLX5_CQ_MODIFY_PERIOD_MODE = BIT(4), }; enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8c7ddb22bf20..f468763478ae 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1686,7 +1686,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cq_oi[0x1]; u8 cq_resize[0x1]; u8 cq_moderation[0x1]; - u8 reserved_at_223[0x3]; + u8 cq_period_mode_modify[0x1]; + u8 reserved_at_224[0x2]; u8 cq_eq_remap[0x1]; u8 pg[0x1]; u8 block_lb_mc[0x1]; -- cgit v1.2.3 From 651ebaad6e3cf18810a755e6ae735253b1032929 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Fri, 19 Apr 2024 11:04:45 +0300 Subject: net/mlx5e: Implement ethtool callbacks for supporting per-queue coalescing Use mlx5 on-the-fly coalescing configuration support to enable individual channel configuration. Co-developed-by: Nabil S. Alramli Signed-off-by: Nabil S. Alramli Co-developed-by: Joe Damato Signed-off-by: Joe Damato Signed-off-by: Rahul Rameshbabu Reviewed-by: Saeed Mahameed Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240419080445.417574-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 + .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 146 +++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 + 3 files changed, 152 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index eb09778327cc..f8bd9dbf59cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1199,6 +1199,10 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack); +int mlx5e_get_per_queue_coalesce(struct net_device *dev, u32 queue, + struct ethtool_coalesce *coal); +int mlx5e_set_per_queue_coalesce(struct net_device *dev, u32 queue, + struct ethtool_coalesce *coal); u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c968874569cc..1eb3a712930b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -589,6 +589,68 @@ static int mlx5e_get_coalesce(struct net_device *netdev, return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); } +static int mlx5e_ethtool_get_per_queue_coalesce(struct mlx5e_priv *priv, u32 queue, + struct ethtool_coalesce *coal) +{ + struct dim_cq_moder cur_moder; + struct mlx5e_channels *chs; + struct mlx5e_channel *c; + + if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) + return -EOPNOTSUPP; + + mutex_lock(&priv->state_lock); + + chs = &priv->channels; + if (chs->num <= queue) { + mutex_unlock(&priv->state_lock); + return -EINVAL; + } + + c = chs->c[queue]; + + coal->use_adaptive_rx_coalesce = !!c->rq.dim; + if (coal->use_adaptive_rx_coalesce) { + cur_moder = net_dim_get_rx_moderation(c->rq.dim->mode, + c->rq.dim->profile_ix); + + coal->rx_coalesce_usecs = cur_moder.usec; + coal->rx_max_coalesced_frames = cur_moder.pkts; + } else { + coal->rx_coalesce_usecs = c->rx_cq_moder.usec; + coal->rx_max_coalesced_frames = c->rx_cq_moder.pkts; + } + + coal->use_adaptive_tx_coalesce = !!c->sq[0].dim; + if (coal->use_adaptive_tx_coalesce) { + /* NOTE: Will only display DIM coalesce profile information of + * first channel. The current interface cannot display this + * information for all tc. + */ + cur_moder = net_dim_get_tx_moderation(c->sq[0].dim->mode, + c->sq[0].dim->profile_ix); + + coal->tx_coalesce_usecs = cur_moder.usec; + coal->tx_max_coalesced_frames = cur_moder.pkts; + + } else { + coal->tx_coalesce_usecs = c->tx_cq_moder.usec; + coal->tx_max_coalesced_frames = c->tx_cq_moder.pkts; + } + + mutex_unlock(&priv->state_lock); + + return 0; +} + +int mlx5e_get_per_queue_coalesce(struct net_device *dev, u32 queue, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_per_queue_coalesce(priv, queue, coal); +} + #define MLX5E_MAX_COAL_TIME MLX5_MAX_CQ_PERIOD #define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT @@ -747,6 +809,88 @@ static int mlx5e_set_coalesce(struct net_device *netdev, return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); } +static int mlx5e_ethtool_set_per_queue_coalesce(struct mlx5e_priv *priv, u32 queue, + struct ethtool_coalesce *coal) +{ + struct mlx5_core_dev *mdev = priv->mdev; + bool rx_dim_enabled, tx_dim_enabled; + struct mlx5e_channels *chs; + struct mlx5e_channel *c; + int err = 0; + int tc; + + if (!MLX5_CAP_GEN(mdev, cq_moderation)) + return -EOPNOTSUPP; + + if (coal->tx_coalesce_usecs > MLX5E_MAX_COAL_TIME || + coal->rx_coalesce_usecs > MLX5E_MAX_COAL_TIME) { + netdev_info(priv->netdev, "%s: maximum coalesce time supported is %lu usecs\n", + __func__, MLX5E_MAX_COAL_TIME); + return -ERANGE; + } + + if (coal->tx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES || + coal->rx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES) { + netdev_info(priv->netdev, "%s: maximum coalesced frames supported is %lu\n", + __func__, MLX5E_MAX_COAL_FRAMES); + return -ERANGE; + } + + rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; + tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; + + mutex_lock(&priv->state_lock); + + chs = &priv->channels; + if (chs->num <= queue) { + mutex_unlock(&priv->state_lock); + return -EINVAL; + } + + c = chs->c[queue]; + + err = mlx5e_dim_rx_change(&c->rq, rx_dim_enabled); + if (err) + goto state_unlock; + + for (tc = 0; tc < c->num_tc; tc++) { + err = mlx5e_dim_tx_change(&c->sq[tc], tx_dim_enabled); + if (err) + goto state_unlock; + } + + if (!rx_dim_enabled) { + c->rx_cq_moder.usec = coal->rx_coalesce_usecs; + c->rx_cq_moder.pkts = coal->rx_max_coalesced_frames; + + mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq, + coal->rx_coalesce_usecs, + coal->rx_max_coalesced_frames); + } + + if (!tx_dim_enabled) { + c->tx_cq_moder.usec = coal->tx_coalesce_usecs; + c->tx_cq_moder.pkts = coal->tx_max_coalesced_frames; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5_core_modify_cq_moderation(mdev, &c->sq[tc].cq.mcq, + coal->tx_coalesce_usecs, + coal->tx_max_coalesced_frames); + } + +state_unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +int mlx5e_set_per_queue_coalesce(struct net_device *dev, u32 queue, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_per_queue_coalesce(priv, queue, coal); +} + static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev, unsigned long *supported_modes, u32 eth_proto_cap) @@ -2472,6 +2616,8 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_channels = mlx5e_set_channels, .get_coalesce = mlx5e_get_coalesce, .set_coalesce = mlx5e_set_coalesce, + .get_per_queue_coalesce = mlx5e_get_per_queue_coalesce, + .set_per_queue_coalesce = mlx5e_set_per_queue_coalesce, .get_link_ksettings = mlx5e_get_link_ksettings, .set_link_ksettings = mlx5e_set_link_ksettings, .get_rxfh_key_size = mlx5e_get_rxfh_key_size, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 6477b91ff512..8790d57dc6db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -428,6 +428,8 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .set_channels = mlx5e_rep_set_channels, .get_coalesce = mlx5e_rep_get_coalesce, .set_coalesce = mlx5e_rep_set_coalesce, + .get_per_queue_coalesce = mlx5e_get_per_queue_coalesce, + .set_per_queue_coalesce = mlx5e_set_per_queue_coalesce, .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, }; -- cgit v1.2.3 From 93a8540aac72772b5b5c933cea5aae2c88a7777e Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Thu, 18 Apr 2024 16:17:49 +0000 Subject: cxgb4: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_match_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_match_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Only compile tested, no hardware available. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240418161751.189226-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 3a6987cafe59..69d045d769c4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -327,6 +327,9 @@ static int cxgb4_validate_flow_match(struct netlink_ext_ack *extack, return -EOPNOTSUPP; } + if (flow_rule_match_has_control_flags(rule, extack)) + return -EOPNOTSUPP; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; -- cgit v1.2.3 From af7dfa94c2f9a2a2e7a427d1a6287285fd0402b9 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Thu, 18 Apr 2024 16:18:01 +0000 Subject: dpaa2-switch: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_match_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_match_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Reviewed-by: Ioana Ciornei Tested-by: Ioana Ciornei Link: https://lore.kernel.org/r/20240418161802.189247-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c index b6a534a3e0b1..701a87370737 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c @@ -33,6 +33,9 @@ static int dpaa2_switch_flower_parse_key(struct flow_cls_offload *cls, acl_h = &acl_key->match; acl_m = &acl_key->mask; + if (flow_rule_match_has_control_flags(rule, extack)) + return -EOPNOTSUPP; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; @@ -548,6 +551,9 @@ static int dpaa2_switch_flower_parse_mirror_key(struct flow_cls_offload *cls, return -EOPNOTSUPP; } + if (flow_rule_match_has_control_flags(rule, extack)) + return -EOPNOTSUPP; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { struct flow_match_vlan match; -- cgit v1.2.3 From 077633afe07f4df8297e93332a4c07934c1f7b64 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Thu, 18 Apr 2024 16:18:15 +0000 Subject: net: ethernet: mtk_eth_soc: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240418161821.189263-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index fbb5e9d5af13..aa262e6f4b85 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -273,6 +273,10 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f, flow_rule_match_control(rule, &match); addr_type = match.key->addr_type; + + if (flow_rule_has_control_flags(match.mask->flags, + f->common.extack)) + return -EOPNOTSUPP; } else { return -EOPNOTSUPP; } -- cgit v1.2.3 From c459f606f66dfdb14aef5a7927c2f4b274682fee Mon Sep 17 00:00:00 2001 From: Chintan Vankar Date: Fri, 19 Apr 2024 13:56:25 +0530 Subject: net: ethernet: ti: am65-cpts: Enable RX HW timestamp for PTP packets using CPTS FIFO Add a new function "am65_cpts_rx_timestamp()" which checks for PTP packets from header and timestamps them. Add another function "am65_cpts_find_rx_ts()" which finds CPTS FIFO Event to get the timestamp of received PTP packet. Signed-off-by: Chintan Vankar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpts.c | 84 +++++++++++++++++++++++++++++++++---- drivers/net/ethernet/ti/am65-cpts.h | 6 +++ 2 files changed, 83 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index c66618d91c28..599454c1d19f 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -275,15 +275,13 @@ static bool am65_cpts_fifo_pop_event(struct am65_cpts *cpts, return true; } -static int am65_cpts_fifo_read(struct am65_cpts *cpts) +static int __am65_cpts_fifo_read(struct am65_cpts *cpts) { struct ptp_clock_event pevent; struct am65_cpts_event *event; bool schedule = false; int i, type, ret = 0; - unsigned long flags; - spin_lock_irqsave(&cpts->lock, flags); for (i = 0; i < AM65_CPTS_FIFO_DEPTH; i++) { event = list_first_entry_or_null(&cpts->pool, struct am65_cpts_event, list); @@ -312,8 +310,7 @@ static int am65_cpts_fifo_read(struct am65_cpts *cpts) event->tmo = jiffies + msecs_to_jiffies(AM65_CPTS_EVENT_RX_TX_TIMEOUT); - list_del_init(&event->list); - list_add_tail(&event->list, &cpts->events); + list_move_tail(&event->list, &cpts->events); dev_dbg(cpts->dev, "AM65_CPTS_EV_TX e1:%08x e2:%08x t:%lld\n", @@ -356,14 +353,24 @@ static int am65_cpts_fifo_read(struct am65_cpts *cpts) } out: - spin_unlock_irqrestore(&cpts->lock, flags); - if (schedule) ptp_schedule_worker(cpts->ptp_clock, 0); return ret; } +static int am65_cpts_fifo_read(struct am65_cpts *cpts) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cpts->lock, flags); + ret = __am65_cpts_fifo_read(cpts); + spin_unlock_irqrestore(&cpts->lock, flags); + + return ret; +} + static u64 am65_cpts_gettime(struct am65_cpts *cpts, struct ptp_system_timestamp *sts) { @@ -906,6 +913,69 @@ static int am65_skb_get_mtype_seqid(struct sk_buff *skb, u32 *mtype_seqid) return 1; } +static u64 am65_cpts_find_rx_ts(struct am65_cpts *cpts, u32 skb_mtype_seqid) +{ + struct list_head *this, *next; + struct am65_cpts_event *event; + unsigned long flags; + u32 mtype_seqid; + u64 ns = 0; + + spin_lock_irqsave(&cpts->lock, flags); + __am65_cpts_fifo_read(cpts); + list_for_each_safe(this, next, &cpts->events) { + event = list_entry(this, struct am65_cpts_event, list); + if (time_after(jiffies, event->tmo)) { + list_move(&event->list, &cpts->pool); + continue; + } + + mtype_seqid = event->event1 & + (AM65_CPTS_EVENT_1_MESSAGE_TYPE_MASK | + AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK | + AM65_CPTS_EVENT_1_EVENT_TYPE_MASK); + + if (mtype_seqid == skb_mtype_seqid) { + ns = event->timestamp; + list_move(&event->list, &cpts->pool); + break; + } + } + spin_unlock_irqrestore(&cpts->lock, flags); + + return ns; +} + +void am65_cpts_rx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb) +{ + struct am65_cpts_skb_cb_data *skb_cb = (struct am65_cpts_skb_cb_data *)skb->cb; + struct skb_shared_hwtstamps *ssh; + int ret; + u64 ns; + + /* am65_cpts_rx_timestamp() is called before eth_type_trans(), so + * skb MAC Hdr properties are not configured yet. Hence need to + * reset skb MAC header here + */ + skb_reset_mac_header(skb); + ret = am65_skb_get_mtype_seqid(skb, &skb_cb->skb_mtype_seqid); + if (!ret) + return; /* if not PTP class packet */ + + skb_cb->skb_mtype_seqid |= (AM65_CPTS_EV_RX << AM65_CPTS_EVENT_1_EVENT_TYPE_SHIFT); + + dev_dbg(cpts->dev, "%s mtype seqid %08x\n", __func__, skb_cb->skb_mtype_seqid); + + ns = am65_cpts_find_rx_ts(cpts, skb_cb->skb_mtype_seqid); + if (!ns) + return; + + ssh = skb_hwtstamps(skb); + memset(ssh, 0, sizeof(*ssh)); + ssh->hwtstamp = ns_to_ktime(ns); +} +EXPORT_SYMBOL_GPL(am65_cpts_rx_timestamp); + /** * am65_cpts_tx_timestamp - save tx packet for timestamping * @cpts: cpts handle diff --git a/drivers/net/ethernet/ti/am65-cpts.h b/drivers/net/ethernet/ti/am65-cpts.h index 6e14df0be113..90296968a75c 100644 --- a/drivers/net/ethernet/ti/am65-cpts.h +++ b/drivers/net/ethernet/ti/am65-cpts.h @@ -22,6 +22,7 @@ void am65_cpts_release(struct am65_cpts *cpts); struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs, struct device_node *node); int am65_cpts_phc_index(struct am65_cpts *cpts); +void am65_cpts_rx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb); void am65_cpts_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb); void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb); void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en); @@ -48,6 +49,11 @@ static inline int am65_cpts_phc_index(struct am65_cpts *cpts) return -1; } +static inline void am65_cpts_rx_timestamp(struct am65_cpts *cpts, + struct sk_buff *skb) +{ +} + static inline void am65_cpts_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb) { -- cgit v1.2.3 From c03a6fd398269e345ee808cdced311fd01215c18 Mon Sep 17 00:00:00 2001 From: Chintan Vankar Date: Fri, 19 Apr 2024 13:56:26 +0530 Subject: net: ethernet: ti: am65-cpsw/ethtool: Enable RX HW timestamp only for PTP packets In the current mechanism of timestamping, am65-cpsw-nuss driver enables hardware timestamping for all received packets by setting the TSTAMP_EN bit in CPTS_CONTROL register, which directs the CPTS module to timestamp all received packets, followed by passing timestamp via DMA descriptors. This mechanism causes CPSW Port to Lock up. To prevent port lock up, don't enable rx packet timestamping by setting TSTAMP_EN bit in CPTS_CONTROL register. The workaround for timestamping received packets is to utilize the CPTS Event FIFO that records timestamps corresponding to certain events. The CPTS module is configured to generate timestamps for Multicast Ethernet, UDP/IPv4 and UDP/IPv6 PTP packets. Update supported hwtstamp_rx_filters values for CPSW's timestamping capability. Fixes: b1f66a5bee07 ("net: ethernet: ti: am65-cpsw-nuss: enable packet timestamping support") Signed-off-by: Chintan Vankar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpsw-ethtool.c | 13 +++++++- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 51 +++++++++++++---------------- drivers/net/ethernet/ti/am65-cpts.c | 23 ------------- drivers/net/ethernet/ti/am65-cpts.h | 5 --- 4 files changed, 35 insertions(+), 57 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c index d6ce2c9f0a8d..a1d0935d1ebe 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c +++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c @@ -695,6 +695,17 @@ static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) { struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + unsigned int ptp_v2_filter; + + ptp_v2_filter = BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V2_DELAY_REQ); if (!IS_ENABLED(CONFIG_TI_K3_AM65_CPTS)) return ethtool_op_get_ts_info(ndev, info); @@ -708,7 +719,7 @@ static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev, SOF_TIMESTAMPING_RAW_HARDWARE; info->phc_index = am65_cpts_phc_index(common->cpts); info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON); - info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | ptp_v2_filter; return 0; } diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index a7dbbbe3ff5d..81a0fe1f1045 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -103,6 +103,12 @@ #define AM65_CPSW_PN_TS_CTL_TX_HOST_TS_EN BIT(11) #define AM65_CPSW_PN_TS_CTL_MSG_TYPE_EN_SHIFT 16 +#define AM65_CPSW_PN_TS_CTL_RX_ANX_F_EN BIT(0) +#define AM65_CPSW_PN_TS_CTL_RX_VLAN_LT1_EN BIT(1) +#define AM65_CPSW_PN_TS_CTL_RX_VLAN_LT2_EN BIT(2) +#define AM65_CPSW_PN_TS_CTL_RX_ANX_D_EN BIT(3) +#define AM65_CPSW_PN_TS_CTL_RX_ANX_E_EN BIT(9) + /* AM65_CPSW_PORTN_REG_TS_SEQ_LTYPE_REG register fields */ #define AM65_CPSW_PN_TS_SEQ_ID_OFFSET_SHIFT 16 @@ -126,6 +132,11 @@ AM65_CPSW_PN_TS_CTL_TX_ANX_E_EN | \ AM65_CPSW_PN_TS_CTL_TX_ANX_F_EN) +#define AM65_CPSW_TS_RX_ANX_ALL_EN \ + (AM65_CPSW_PN_TS_CTL_RX_ANX_D_EN | \ + AM65_CPSW_PN_TS_CTL_RX_ANX_E_EN | \ + AM65_CPSW_PN_TS_CTL_RX_ANX_F_EN) + #define AM65_CPSW_ALE_AGEOUT_DEFAULT 30 /* Number of TX/RX descriptors */ #define AM65_CPSW_MAX_TX_DESC 500 @@ -1050,18 +1061,6 @@ out: return ret; } -static void am65_cpsw_nuss_rx_ts(struct sk_buff *skb, u32 *psdata) -{ - struct skb_shared_hwtstamps *ssh; - u64 ns; - - ns = ((u64)psdata[1] << 32) | psdata[0]; - - ssh = skb_hwtstamps(skb); - memset(ssh, 0, sizeof(*ssh)); - ssh->hwtstamp = ns_to_ktime(ns); -} - /* RX psdata[2] word format - checksum information */ #define AM65_CPSW_RX_PSD_CSUM_ADD GENMASK(15, 0) #define AM65_CPSW_RX_PSD_CSUM_ERR BIT(16) @@ -1177,13 +1176,11 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, skb_reserve(skb, headroom); } - /* Pass skb to netstack if no XDP prog or returned XDP_PASS */ - if (port->rx_ts_enabled) - am65_cpsw_nuss_rx_ts(skb, psdata); - ndev_priv = netdev_priv(ndev); am65_cpsw_nuss_set_offload_fwd_mark(skb, ndev_priv->offload_fwd_mark); skb_put(skb, pkt_len); + if (port->rx_ts_enabled) + am65_cpts_rx_timestamp(common->cpts, skb); skb_mark_for_recycle(skb); skb->protocol = eth_type_trans(skb, ndev); am65_cpsw_nuss_rx_csum(skb, csum_info); @@ -1736,7 +1733,6 @@ static int am65_cpsw_nuss_ndo_slave_set_mac_address(struct net_device *ndev, static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr) { - struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct am65_cpsw_port *port = am65_ndev_to_port(ndev); u32 ts_ctrl, seq_id, ts_ctrl_ltype2, ts_vlan_ltype; struct hwtstamp_config cfg; @@ -1760,11 +1756,6 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, case HWTSTAMP_FILTER_NONE: port->rx_ts_enabled = false; break; - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_SOME: - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: @@ -1774,10 +1765,13 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - case HWTSTAMP_FILTER_NTP_ALL: port->rx_ts_enabled = true; - cfg.rx_filter = HWTSTAMP_FILTER_ALL; + cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_NTP_ALL: + return -EOPNOTSUPP; default: return -ERANGE; } @@ -1807,6 +1801,10 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, ts_ctrl |= AM65_CPSW_TS_TX_ANX_ALL_EN | AM65_CPSW_PN_TS_CTL_TX_VLAN_LT1_EN; + if (port->rx_ts_enabled) + ts_ctrl |= AM65_CPSW_TS_RX_ANX_ALL_EN | + AM65_CPSW_PN_TS_CTL_RX_VLAN_LT1_EN; + writel(seq_id, port->port_base + AM65_CPSW_PORTN_REG_TS_SEQ_LTYPE_REG); writel(ts_vlan_ltype, port->port_base + AM65_CPSW_PORTN_REG_TS_VLAN_LTYPE_REG); @@ -1814,9 +1812,6 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, AM65_CPSW_PORTN_REG_TS_CTL_LTYPE2); writel(ts_ctrl, port->port_base + AM65_CPSW_PORTN_REG_TS_CTL); - /* en/dis RX timestamp */ - am65_cpts_rx_enable(common->cpts, port->rx_ts_enabled); - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; } @@ -1833,7 +1828,7 @@ static int am65_cpsw_nuss_hwtstamp_get(struct net_device *ndev, cfg.tx_type = port->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; cfg.rx_filter = port->rx_ts_enabled ? - HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE; + HWTSTAMP_FILTER_PTP_V2_EVENT : HWTSTAMP_FILTER_NONE; return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; } diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index 599454c1d19f..68891a227dc8 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -866,29 +866,6 @@ static long am65_cpts_ts_work(struct ptp_clock_info *ptp) return delay; } -/** - * am65_cpts_rx_enable - enable rx timestamping - * @cpts: cpts handle - * @en: enable - * - * This functions enables rx packets timestamping. The CPTS can timestamp all - * rx packets. - */ -void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en) -{ - u32 val; - - mutex_lock(&cpts->ptp_clk_lock); - val = am65_cpts_read32(cpts, control); - if (en) - val |= AM65_CPTS_CONTROL_TSTAMP_EN; - else - val &= ~AM65_CPTS_CONTROL_TSTAMP_EN; - am65_cpts_write32(cpts, val, control); - mutex_unlock(&cpts->ptp_clk_lock); -} -EXPORT_SYMBOL_GPL(am65_cpts_rx_enable); - static int am65_skb_get_mtype_seqid(struct sk_buff *skb, u32 *mtype_seqid) { unsigned int ptp_class = ptp_classify_raw(skb); diff --git a/drivers/net/ethernet/ti/am65-cpts.h b/drivers/net/ethernet/ti/am65-cpts.h index 90296968a75c..6099d772799d 100644 --- a/drivers/net/ethernet/ti/am65-cpts.h +++ b/drivers/net/ethernet/ti/am65-cpts.h @@ -25,7 +25,6 @@ int am65_cpts_phc_index(struct am65_cpts *cpts); void am65_cpts_rx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb); void am65_cpts_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb); void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb); -void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en); u64 am65_cpts_ns_gettime(struct am65_cpts *cpts); int am65_cpts_estf_enable(struct am65_cpts *cpts, int idx, struct am65_cpts_estf_cfg *cfg); @@ -64,10 +63,6 @@ static inline void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, { } -static inline void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en) -{ -} - static inline s64 am65_cpts_ns_gettime(struct am65_cpts *cpts) { return 0; -- cgit v1.2.3 From dc144baeb4fbfa0d91ce9c3875307566f58704ec Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 19 Apr 2024 12:03:05 +0300 Subject: net: stmmac: Rename phylink_get_caps() callback to update_caps() Since recent commits the stmmac_ops::phylink_get_caps() callback has no longer been responsible for the phylink MAC capabilities getting, but merely updates the MAC capabilities in the mac_device_info::link::caps field. Rename the callback to comply with the what the method does now. Signed-off-by: Serge Semin Reviewed-by: Romain Gantois Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 8 ++++---- drivers/net/ethernet/stmicro/stmmac/hwif.h | 8 ++++---- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index a38226d7cc6a..b25774d69195 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -68,7 +68,7 @@ static void dwmac4_core_init(struct mac_device_info *hw, init_waitqueue_head(&priv->tstamp_busy_wait); } -static void dwmac4_phylink_get_caps(struct stmmac_priv *priv) +static void dwmac4_update_caps(struct stmmac_priv *priv) { if (priv->plat->tx_queues_to_use > 1) priv->hw->link.caps &= ~(MAC_10HD | MAC_100HD | MAC_1000HD); @@ -1190,7 +1190,7 @@ static void dwmac4_set_hw_vlan_mode(struct mac_device_info *hw) const struct stmmac_ops dwmac4_ops = { .core_init = dwmac4_core_init, - .phylink_get_caps = dwmac4_phylink_get_caps, + .update_caps = dwmac4_update_caps, .set_mac = stmmac_set_mac, .rx_ipc = dwmac4_rx_ipc_enable, .rx_queue_enable = dwmac4_rx_queue_enable, @@ -1235,7 +1235,7 @@ const struct stmmac_ops dwmac4_ops = { const struct stmmac_ops dwmac410_ops = { .core_init = dwmac4_core_init, - .phylink_get_caps = dwmac4_phylink_get_caps, + .update_caps = dwmac4_update_caps, .set_mac = stmmac_dwmac4_set_mac, .rx_ipc = dwmac4_rx_ipc_enable, .rx_queue_enable = dwmac4_rx_queue_enable, @@ -1284,7 +1284,7 @@ const struct stmmac_ops dwmac410_ops = { const struct stmmac_ops dwmac510_ops = { .core_init = dwmac4_core_init, - .phylink_get_caps = dwmac4_phylink_get_caps, + .update_caps = dwmac4_update_caps, .set_mac = stmmac_dwmac4_set_mac, .rx_ipc = dwmac4_rx_ipc_enable, .rx_queue_enable = dwmac4_rx_queue_enable, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 7be04b54738b..90384db228b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -308,8 +308,8 @@ struct stmmac_est; struct stmmac_ops { /* MAC core initialization */ void (*core_init)(struct mac_device_info *hw, struct net_device *dev); - /* Get phylink capabilities */ - void (*phylink_get_caps)(struct stmmac_priv *priv); + /* Update MAC capabilities */ + void (*update_caps)(struct stmmac_priv *priv); /* Enable the MAC RX/TX */ void (*set_mac)(void __iomem *ioaddr, bool enable); /* Enable and verify that the IPC module is supported */ @@ -430,8 +430,8 @@ struct stmmac_ops { #define stmmac_core_init(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, core_init, __args) -#define stmmac_mac_phylink_get_caps(__priv) \ - stmmac_do_void_callback(__priv, mac, phylink_get_caps, __priv) +#define stmmac_mac_update_caps(__priv) \ + stmmac_do_void_callback(__priv, mac, update_caps, __priv) #define stmmac_mac_set(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, set_mac, __args) #define stmmac_rx_ipc(__priv, __args...) \ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2ea9f0fa0cf9..98067bd371b4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1225,8 +1225,8 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) xpcs_get_interfaces(priv->hw->xpcs, priv->phylink_config.supported_interfaces); - /* Get the MAC specific capabilities */ - stmmac_mac_phylink_get_caps(priv); + /* Refresh the MAC-specific capabilities */ + stmmac_mac_update_caps(priv); priv->phylink_config.mac_capabilities = priv->hw->link.caps; @@ -7343,7 +7343,7 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) priv->rss.table[i] = ethtool_rxfh_indir_default(i, rx_cnt); - stmmac_mac_phylink_get_caps(priv); + stmmac_mac_update_caps(priv); priv->phylink_config.mac_capabilities = priv->hw->link.caps; -- cgit v1.2.3 From f951a64922a8576975673c42985a89a798e19b4e Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 19 Apr 2024 12:03:06 +0300 Subject: net: stmmac: Move MAC caps init to phylink MAC caps getter After a set of recent fixes the stmmac_phy_setup() and stmmac_reinit_queues() methods have turned to having some duplicated code. Let's get rid from the duplication by moving the MAC-capabilities initialization to the PHYLINK MAC-capabilities getter. The getter is called during each network device interface open/close cycle. So the MAC-capabilities will be initialized in generic device open procedure and in case of the Tx/Rx queues re-initialization as the original code semantics implies. Signed-off-by: Serge Semin Reviewed-by: Romain Gantois Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 36 +++++++++++------------ 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 98067bd371b4..59bf83904b62 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -936,6 +936,22 @@ static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex) priv->pause, tx_cnt); } +static unsigned long stmmac_mac_get_caps(struct phylink_config *config, + phy_interface_t interface) +{ + struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); + + /* Refresh the MAC-specific capabilities */ + stmmac_mac_update_caps(priv); + + config->mac_capabilities = priv->hw->link.caps; + + if (priv->plat->max_speed) + phylink_limit_mac_speed(config, priv->plat->max_speed); + + return config->mac_capabilities; +} + static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config, phy_interface_t interface) { @@ -1102,6 +1118,7 @@ static void stmmac_mac_link_up(struct phylink_config *config, } static const struct phylink_mac_ops stmmac_phylink_mac_ops = { + .mac_get_caps = stmmac_mac_get_caps, .mac_select_pcs = stmmac_mac_select_pcs, .mac_config = stmmac_mac_config, .mac_link_down = stmmac_mac_link_down, @@ -1201,7 +1218,6 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) int mode = priv->plat->phy_interface; struct fwnode_handle *fwnode; struct phylink *phylink; - int max_speed; priv->phylink_config.dev = &priv->dev->dev; priv->phylink_config.type = PHYLINK_NETDEV; @@ -1225,15 +1241,6 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) xpcs_get_interfaces(priv->hw->xpcs, priv->phylink_config.supported_interfaces); - /* Refresh the MAC-specific capabilities */ - stmmac_mac_update_caps(priv); - - priv->phylink_config.mac_capabilities = priv->hw->link.caps; - - max_speed = priv->plat->max_speed; - if (max_speed) - phylink_limit_mac_speed(&priv->phylink_config, max_speed); - fwnode = priv->plat->port_node; if (!fwnode) fwnode = dev_fwnode(priv->device); @@ -7329,7 +7336,6 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) { struct stmmac_priv *priv = netdev_priv(dev); int ret = 0, i; - int max_speed; if (netif_running(dev)) stmmac_release(dev); @@ -7343,14 +7349,6 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) priv->rss.table[i] = ethtool_rxfh_indir_default(i, rx_cnt); - stmmac_mac_update_caps(priv); - - priv->phylink_config.mac_capabilities = priv->hw->link.caps; - - max_speed = priv->plat->max_speed; - if (max_speed) - phylink_limit_mac_speed(&priv->phylink_config, max_speed); - stmmac_napi_add(dev); if (netif_running(dev)) -- cgit v1.2.3 From ec24c06eb3126350cea6699eddb41d01a0134818 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 22 Apr 2024 05:38:57 -0700 Subject: net: marvell: prestera: allocate dummy net_device dynamically Embedding net_device into structures prohibits the usage of flexible arrays in the net_device structure. For more details, see the discussion at [1]. Un-embed the net_device from the private struct by converting it into a pointer. Then use the leverage the new alloc_netdev_dummy() helper to allocate and initialize dummy devices. [1] https://lore.kernel.org/all/20240229225910.79e224cf@kernel.org/ Signed-off-by: Breno Leitao Acked-by: Elad Nachman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/prestera/prestera_rxtx.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c index cc2a9ae794be..39d9bf82c115 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c @@ -96,7 +96,7 @@ struct prestera_sdma { struct dma_pool *desc_pool; struct work_struct tx_work; struct napi_struct rx_napi; - struct net_device napi_dev; + struct net_device *napi_dev; u32 map_addr; u64 dma_mask; /* protect SDMA with concurrent access from multiple CPUs */ @@ -654,13 +654,21 @@ static int prestera_sdma_switch_init(struct prestera_switch *sw) if (err) goto err_evt_register; - init_dummy_netdev(&sdma->napi_dev); + sdma->napi_dev = alloc_netdev_dummy(0); + if (!sdma->napi_dev) { + dev_err(dev, "not able to initialize dummy device\n"); + err = -ENOMEM; + goto err_alloc_dummy; + } - netif_napi_add(&sdma->napi_dev, &sdma->rx_napi, prestera_sdma_rx_poll); + netif_napi_add(sdma->napi_dev, &sdma->rx_napi, prestera_sdma_rx_poll); napi_enable(&sdma->rx_napi); return 0; +err_alloc_dummy: + prestera_hw_event_handler_unregister(sw, PRESTERA_EVENT_TYPE_RXTX, + prestera_rxtx_handle_event); err_evt_register: err_tx_init: prestera_sdma_tx_fini(sdma); @@ -677,6 +685,7 @@ static void prestera_sdma_switch_fini(struct prestera_switch *sw) napi_disable(&sdma->rx_napi); netif_napi_del(&sdma->rx_napi); + free_netdev(sdma->napi_dev); prestera_hw_event_handler_unregister(sw, PRESTERA_EVENT_TYPE_RXTX, prestera_rxtx_handle_event); prestera_sdma_tx_fini(sdma); -- cgit v1.2.3 From b209bd6d0bffb8991aba568e2d9a892c86a1a43c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 22 Apr 2024 05:38:58 -0700 Subject: net: mediatek: mtk_eth_sock: allocate dummy net_device dynamically Embedding net_device into structures prohibits the usage of flexible arrays in the net_device structure. For more details, see the discussion at [1]. Un-embed the net_device from the private struct by converting it into a pointer. Then use the leverage the new alloc_netdev_dummy() helper to allocate and initialize dummy devices. [1] https://lore.kernel.org/all/20240229225910.79e224cf@kernel.org/ Signed-off-by: Breno Leitao Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 17 +++++++++++++---- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index caa13b9cedff..d7a96dc11c07 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1710,7 +1710,7 @@ static struct page_pool *mtk_create_page_pool(struct mtk_eth *eth, if (IS_ERR(pp)) return pp; - err = __xdp_rxq_info_reg(xdp_q, ð->dummy_dev, id, + err = __xdp_rxq_info_reg(xdp_q, eth->dummy_dev, id, eth->rx_napi.napi_id, PAGE_SIZE); if (err < 0) goto err_free_pp; @@ -4188,6 +4188,8 @@ static int mtk_free_dev(struct mtk_eth *eth) metadata_dst_free(eth->dsa_meta[i]); } + free_netdev(eth->dummy_dev); + return 0; } @@ -4983,9 +4985,14 @@ static int mtk_probe(struct platform_device *pdev) /* we run 2 devices on the same DMA ring so we need a dummy device * for NAPI to work */ - init_dummy_netdev(ð->dummy_dev); - netif_napi_add(ð->dummy_dev, ð->tx_napi, mtk_napi_tx); - netif_napi_add(ð->dummy_dev, ð->rx_napi, mtk_napi_rx); + eth->dummy_dev = alloc_netdev_dummy(0); + if (!eth->dummy_dev) { + err = -ENOMEM; + dev_err(eth->dev, "failed to allocated dummy device\n"); + goto err_unreg_netdev; + } + netif_napi_add(eth->dummy_dev, ð->tx_napi, mtk_napi_tx); + netif_napi_add(eth->dummy_dev, ð->rx_napi, mtk_napi_rx); platform_set_drvdata(pdev, eth); schedule_delayed_work(ð->reset.monitor_work, @@ -4993,6 +5000,8 @@ static int mtk_probe(struct platform_device *pdev) return 0; +err_unreg_netdev: + mtk_unreg_dev(eth); err_deinit_ppe: mtk_ppe_deinit(eth); mtk_mdio_cleanup(eth); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 9ae3b8a71d0e..723fc637027c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -1242,7 +1242,7 @@ struct mtk_eth { spinlock_t page_lock; spinlock_t tx_irq_lock; spinlock_t rx_irq_lock; - struct net_device dummy_dev; + struct net_device *dummy_dev; struct net_device *netdev[MTK_MAX_DEVS]; struct mtk_mac *mac[MTK_MAX_DEVS]; int irq[3]; -- cgit v1.2.3 From 2eb5e25d84956c264dc03fc45deb3701f3a0ccea Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 22 Apr 2024 05:39:00 -0700 Subject: net: ibm/emac: allocate dummy net_device dynamically Embedding net_device into structures prohibits the usage of flexible arrays in the net_device structure. For more details, see the discussion at [1]. Un-embed the net_device from the private struct by converting it into a pointer. Then use the leverage the new alloc_netdev_dummy() helper to allocate and initialize dummy devices. [1] https://lore.kernel.org/all/20240229225910.79e224cf@kernel.org/ Signed-off-by: Breno Leitao Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/mal.c | 14 +++++++++++--- drivers/net/ethernet/ibm/emac/mal.h | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index 2439f7e96e05..d92dd9c83031 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -605,9 +605,13 @@ static int mal_probe(struct platform_device *ofdev) INIT_LIST_HEAD(&mal->list); spin_lock_init(&mal->lock); - init_dummy_netdev(&mal->dummy_dev); + mal->dummy_dev = alloc_netdev_dummy(0); + if (!mal->dummy_dev) { + err = -ENOMEM; + goto fail_unmap; + } - netif_napi_add_weight(&mal->dummy_dev, &mal->napi, mal_poll, + netif_napi_add_weight(mal->dummy_dev, &mal->napi, mal_poll, CONFIG_IBM_EMAC_POLL_WEIGHT); /* Load power-on reset defaults */ @@ -637,7 +641,7 @@ static int mal_probe(struct platform_device *ofdev) GFP_KERNEL); if (mal->bd_virt == NULL) { err = -ENOMEM; - goto fail_unmap; + goto fail_dummy; } for (i = 0; i < mal->num_tx_chans; ++i) @@ -703,6 +707,8 @@ static int mal_probe(struct platform_device *ofdev) free_irq(mal->serr_irq, mal); fail2: dma_free_coherent(&ofdev->dev, bd_size, mal->bd_virt, mal->bd_dma); + fail_dummy: + free_netdev(mal->dummy_dev); fail_unmap: dcr_unmap(mal->dcr_host, 0x100); fail: @@ -734,6 +740,8 @@ static void mal_remove(struct platform_device *ofdev) mal_reset(mal); + free_netdev(mal->dummy_dev); + dma_free_coherent(&ofdev->dev, sizeof(struct mal_descriptor) * (NUM_TX_BUFF * mal->num_tx_chans + diff --git a/drivers/net/ethernet/ibm/emac/mal.h b/drivers/net/ethernet/ibm/emac/mal.h index d212373a72e7..e0ddc41186a2 100644 --- a/drivers/net/ethernet/ibm/emac/mal.h +++ b/drivers/net/ethernet/ibm/emac/mal.h @@ -205,7 +205,7 @@ struct mal_instance { int index; spinlock_t lock; - struct net_device dummy_dev; + struct net_device *dummy_dev; unsigned int features; }; -- cgit v1.2.3 From 8c82bfdd84998c2d78dda0f4a32ecc7b91f7ea8d Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Sat, 20 Apr 2024 21:29:10 +0200 Subject: net: sparx5: add new register definitions In preparation for port mirroring support through tc matchall, add the required register definitions. Signed-off-by: Daniel Machon Reviewed-by: Steen Hegelund Signed-off-by: David S. Miller --- .../ethernet/microchip/sparx5/sparx5_main_regs.h | 68 ++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h index bd03a0a3c1da..22acc1f3380c 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h @@ -83,6 +83,64 @@ enum sparx5_target { #define ANA_AC_OWN_UPSID_OWN_UPSID_GET(x)\ FIELD_GET(ANA_AC_OWN_UPSID_OWN_UPSID, x) +/* ANA_AC:MIRROR_PROBE:PROBE_CFG */ +#define ANA_AC_PROBE_CFG(g) \ + __REG(TARGET_ANA_AC, 0, 1, 893696, g, 3, 32, 0, 0, 1, 4) + +#define ANA_AC_PROBE_CFG_PROBE_RX_CPU_AND_VD GENMASK(31, 27) +#define ANA_AC_PROBE_CFG_PROBE_RX_CPU_AND_VD_SET(x)\ + FIELD_PREP(ANA_AC_PROBE_CFG_PROBE_RX_CPU_AND_VD, x) +#define ANA_AC_PROBE_CFG_PROBE_RX_CPU_AND_VD_GET(x)\ + FIELD_GET(ANA_AC_PROBE_CFG_PROBE_RX_CPU_AND_VD, x) + +#define ANA_AC_PROBE_CFG_PROBE_CPU_SET GENMASK(26, 19) +#define ANA_AC_PROBE_CFG_PROBE_CPU_SET_SET(x)\ + FIELD_PREP(ANA_AC_PROBE_CFG_PROBE_CPU_SET, x) +#define ANA_AC_PROBE_CFG_PROBE_CPU_SET_GET(x)\ + FIELD_GET(ANA_AC_PROBE_CFG_PROBE_CPU_SET, x) + +#define ANA_AC_PROBE_CFG_PROBE_VID GENMASK(18, 6) +#define ANA_AC_PROBE_CFG_PROBE_VID_SET(x)\ + FIELD_PREP(ANA_AC_PROBE_CFG_PROBE_VID, x) +#define ANA_AC_PROBE_CFG_PROBE_VID_GET(x)\ + FIELD_GET(ANA_AC_PROBE_CFG_PROBE_VID, x) + +#define ANA_AC_PROBE_CFG_PROBE_VLAN_MODE GENMASK(5, 4) +#define ANA_AC_PROBE_CFG_PROBE_VLAN_MODE_SET(x)\ + FIELD_PREP(ANA_AC_PROBE_CFG_PROBE_VLAN_MODE, x) +#define ANA_AC_PROBE_CFG_PROBE_VLAN_MODE_GET(x)\ + FIELD_GET(ANA_AC_PROBE_CFG_PROBE_VLAN_MODE, x) + +#define ANA_AC_PROBE_CFG_PROBE_MAC_MODE GENMASK(3, 2) +#define ANA_AC_PROBE_CFG_PROBE_MAC_MODE_SET(x)\ + FIELD_PREP(ANA_AC_PROBE_CFG_PROBE_MAC_MODE, x) +#define ANA_AC_PROBE_CFG_PROBE_MAC_MODE_GET(x)\ + FIELD_GET(ANA_AC_PROBE_CFG_PROBE_MAC_MODE, x) + +#define ANA_AC_PROBE_CFG_PROBE_DIRECTION GENMASK(1, 0) +#define ANA_AC_PROBE_CFG_PROBE_DIRECTION_SET(x)\ + FIELD_PREP(ANA_AC_PROBE_CFG_PROBE_DIRECTION, x) +#define ANA_AC_PROBE_CFG_PROBE_DIRECTION_GET(x)\ + FIELD_GET(ANA_AC_PROBE_CFG_PROBE_DIRECTION, x) + +/* ANA_AC:MIRROR_PROBE:PROBE_PORT_CFG */ +#define ANA_AC_PROBE_PORT_CFG(g) \ + __REG(TARGET_ANA_AC, 0, 1, 893696, g, 3, 32, 8, 0, 1, 4) + +/* ANA_AC:MIRROR_PROBE:PROBE_PORT_CFG1 */ +#define ANA_AC_PROBE_PORT_CFG1(g) \ + __REG(TARGET_ANA_AC, 0, 1, 893696, g, 3, 32, 12, 0, 1, 4) + +/* ANA_AC:MIRROR_PROBE:PROBE_PORT_CFG2 */ +#define ANA_AC_PROBE_PORT_CFG2(g) \ + __REG(TARGET_ANA_AC, 0, 1, 893696, g, 3, 32, 16, 0, 1, 4) + +#define ANA_AC_PROBE_PORT_CFG2_PROBE_PORT_MASK2 BIT(0) +#define ANA_AC_PROBE_PORT_CFG2_PROBE_PORT_MASK2_SET(x)\ + FIELD_PREP(ANA_AC_PROBE_PORT_CFG2_PROBE_PORT_MASK2, x) +#define ANA_AC_PROBE_PORT_CFG2_PROBE_PORT_MASK2_GET(x)\ + FIELD_GET(ANA_AC_PROBE_PORT_CFG2_PROBE_PORT_MASK2, x) + /* ANA_AC:SRC:SRC_CFG */ #define ANA_AC_SRC_CFG(g) __REG(TARGET_ANA_AC,\ 0, 1, 849920, g, 102, 16, 0, 0, 1, 4) @@ -6203,6 +6261,16 @@ enum sparx5_target { #define QFWD_SWITCH_PORT_MODE_LEARNALL_MORE_GET(x)\ FIELD_GET(QFWD_SWITCH_PORT_MODE_LEARNALL_MORE, x) +/* QFWD:SYSTEM:FRAME_COPY_CFG */ +#define QFWD_FRAME_COPY_CFG(r)\ + __REG(TARGET_QFWD, 0, 1, 0, 0, 1, 340, 284, r, 12, 4) + +#define QFWD_FRAME_COPY_CFG_FRMC_PORT_VAL GENMASK(12, 6) +#define QFWD_FRAME_COPY_CFG_FRMC_PORT_VAL_SET(x)\ + FIELD_PREP(QFWD_FRAME_COPY_CFG_FRMC_PORT_VAL, x) +#define QFWD_FRAME_COPY_CFG_FRMC_PORT_VAL_GET(x)\ + FIELD_GET(QFWD_FRAME_COPY_CFG_FRMC_PORT_VAL, x) + /* QRES:RES_CTRL:RES_CFG */ #define QRES_RES_CFG(g) __REG(TARGET_QRES,\ 0, 1, 0, g, 5120, 16, 0, 0, 1, 4) -- cgit v1.2.3 From 1ede4acf045ced70d9b72465e2e968aec71e0a42 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Sat, 20 Apr 2024 21:29:11 +0200 Subject: net: sparx5: add bookkeeping code for matchall rules In preparation for new tc matchall rules, we add a bit of bookkeeping code to keep track of them. The rules are identified by the cookie passed from the tc stack. Signed-off-by: Daniel Machon Reviewed-by: Steen Hegelund Signed-off-by: David S. Miller --- .../net/ethernet/microchip/sparx5/sparx5_main.c | 3 ++ .../net/ethernet/microchip/sparx5/sparx5_main.h | 10 ++++ .../ethernet/microchip/sparx5/sparx5_tc_matchall.c | 62 +++++++++++++++++++--- 3 files changed, 67 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 3c066b62e689..b64c814eac11 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -899,6 +899,9 @@ static int mchp_sparx5_probe(struct platform_device *pdev) dev_err(sparx5->dev, "PTP failed\n"); goto cleanup_ports; } + + INIT_LIST_HEAD(&sparx5->mall_entries); + goto cleanup_config; cleanup_ports: diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 316fed5f2735..4de37a5387a4 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "sparx5_main_regs.h" @@ -227,6 +228,14 @@ struct sparx5_mdb_entry { u16 pgid_idx; }; +struct sparx5_mall_entry { + struct list_head list; + struct sparx5_port *port; + unsigned long cookie; + enum flow_action_id type; + bool ingress; +}; + #define SPARX5_PTP_TIMEOUT msecs_to_jiffies(10) #define SPARX5_SKB_CB(skb) \ ((struct sparx5_skb_cb *)((skb)->cb)) @@ -295,6 +304,7 @@ struct sparx5 { struct vcap_control *vcap_ctrl; /* PGID allocation map */ u8 pgid_map[PGID_TABLE_SIZE]; + struct list_head mall_entries; /* Common root for debugfs */ struct dentry *debugfs_root; }; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_matchall.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_matchall.c index d88a93f22606..2a33b347098f 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_matchall.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_matchall.c @@ -11,11 +11,37 @@ #include "sparx5_main.h" #include "sparx5_vcap_impl.h" +static struct sparx5_mall_entry * +sparx5_tc_matchall_entry_find(struct list_head *entries, unsigned long cookie) +{ + struct sparx5_mall_entry *entry; + + list_for_each_entry(entry, entries, list) { + if (entry->cookie == cookie) + return entry; + } + + return NULL; +} + +static void sparx5_tc_matchall_parse_action(struct sparx5_port *port, + struct sparx5_mall_entry *entry, + struct flow_action_entry *action, + bool ingress, + unsigned long cookie) +{ + entry->port = port; + entry->type = action->id; + entry->ingress = ingress; + entry->cookie = cookie; +} + static int sparx5_tc_matchall_replace(struct net_device *ndev, struct tc_cls_matchall_offload *tmo, bool ingress) { struct sparx5_port *port = netdev_priv(ndev); + struct sparx5_mall_entry *mall_entry; struct flow_action_entry *action; struct sparx5 *sparx5; int err; @@ -27,6 +53,16 @@ static int sparx5_tc_matchall_replace(struct net_device *ndev, } action = &tmo->rule->action.entries[0]; + mall_entry = kzalloc(sizeof(*mall_entry), GFP_KERNEL); + if (!mall_entry) + return -ENOMEM; + + sparx5_tc_matchall_parse_action(port, + mall_entry, + action, + ingress, + tmo->cookie); + sparx5 = port->sparx5; switch (action->id) { case FLOW_ACTION_GOTO: @@ -59,6 +95,9 @@ static int sparx5_tc_matchall_replace(struct net_device *ndev, NL_SET_ERR_MSG_MOD(tmo->common.extack, "Unsupported action"); return -EOPNOTSUPP; } + + list_add_tail(&mall_entry->list, &sparx5->mall_entries); + return 0; } @@ -67,19 +106,26 @@ static int sparx5_tc_matchall_destroy(struct net_device *ndev, bool ingress) { struct sparx5_port *port = netdev_priv(ndev); - struct sparx5 *sparx5; + struct sparx5 *sparx5 = port->sparx5; + struct sparx5_mall_entry *entry; int err; - sparx5 = port->sparx5; - if (!tmo->rule && tmo->cookie) { + entry = sparx5_tc_matchall_entry_find(&sparx5->mall_entries, + tmo->cookie); + if (!entry) + return -ENOENT; + + if (entry->type == FLOW_ACTION_GOTO) { err = vcap_enable_lookups(sparx5->vcap_ctrl, ndev, 0, 0, tmo->cookie, false); - if (err) - return err; - return 0; + } else { + NL_SET_ERR_MSG_MOD(tmo->common.extack, "Unsupported action"); + err = -EOPNOTSUPP; } - NL_SET_ERR_MSG_MOD(tmo->common.extack, "Unsupported action"); - return -EOPNOTSUPP; + + list_del(&entry->list); + + return err; } int sparx5_tc_matchall(struct net_device *ndev, -- cgit v1.2.3 From 4e50d72b3b95fd1ba678f1fab395c3f45105d6bd Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Sat, 20 Apr 2024 21:29:12 +0200 Subject: net: sparx5: add port mirroring implementation The hardware supports three independent mirroring probes. Each probe can be configured to mirror rx or tx traffic (direction). Using tc matchall, it is now possible to add a source port and a monitor port to a mirror probe. Depending on the mirror direction, rx or tx traffic from a source port will be mirrored to the monitor port. A single source port can be a member of multiple mirror probes. Signed-off-by: Daniel Machon Reviewed-by: Steen Hegelund Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/Makefile | 3 +- .../net/ethernet/microchip/sparx5/sparx5_main.h | 12 ++ .../net/ethernet/microchip/sparx5/sparx5_mirror.c | 198 +++++++++++++++++++++ 3 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_mirror.c (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile index 1cb1cc3f1a85..b68fe9c9a656 100644 --- a/drivers/net/ethernet/microchip/sparx5/Makefile +++ b/drivers/net/ethernet/microchip/sparx5/Makefile @@ -10,7 +10,8 @@ sparx5-switch-y := sparx5_main.o sparx5_packet.o \ sparx5_switchdev.o sparx5_calendar.o sparx5_ethtool.o sparx5_fdma.o \ sparx5_ptp.o sparx5_pgid.o sparx5_tc.o sparx5_qos.o \ sparx5_vcap_impl.o sparx5_vcap_ag_api.o sparx5_tc_flower.o \ - sparx5_tc_matchall.o sparx5_pool.o sparx5_sdlb.o sparx5_police.o sparx5_psfp.o + sparx5_tc_matchall.o sparx5_pool.o sparx5_sdlb.o sparx5_police.o \ + sparx5_psfp.o sparx5_mirror.o sparx5-switch-$(CONFIG_SPARX5_DCB) += sparx5_dcb.o sparx5-switch-$(CONFIG_DEBUG_FS) += sparx5_vcap_debugfs.o diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 4de37a5387a4..5d026e1670f5 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -228,12 +228,20 @@ struct sparx5_mdb_entry { u16 pgid_idx; }; +struct sparx5_mall_mirror_entry { + u32 idx; + struct sparx5_port *port; +}; + struct sparx5_mall_entry { struct list_head list; struct sparx5_port *port; unsigned long cookie; enum flow_action_id type; bool ingress; + union { + struct sparx5_mall_mirror_entry mirror; + }; }; #define SPARX5_PTP_TIMEOUT msecs_to_jiffies(10) @@ -551,6 +559,10 @@ void sparx5_psfp_init(struct sparx5 *sparx5); void sparx5_new_base_time(struct sparx5 *sparx5, const u32 cycle_time, const ktime_t org_base_time, ktime_t *new_base_time); +/* sparx5_mirror.c */ +int sparx5_mirror_add(struct sparx5_mall_entry *entry); +void sparx5_mirror_del(struct sparx5_mall_entry *entry); + /* Clock period in picoseconds */ static inline u32 sparx5_clk_period(enum sparx5_core_clockfreq cclock) { diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mirror.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mirror.c new file mode 100644 index 000000000000..f029634a6a42 --- /dev/null +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mirror.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Microchip Sparx5 Switch driver + * + * Copyright (c) 2024 Microchip Technology Inc. and its subsidiaries. + */ + +#include "sparx5_main.h" +#include "sparx5_main_regs.h" +#include "sparx5_tc.h" + +#define SPX5_MIRROR_PROBE_MAX 3 +#define SPX5_MIRROR_DISABLED 0 +#define SPX5_MIRROR_EGRESS 1 +#define SPX5_MIRROR_INGRESS 2 +#define SPX5_MIRROR_MONITOR_PORT_DEFAULT 65 +#define SPX5_QFWD_MP_OFFSET 9 /* Mirror port offset in the QFWD register */ + +/* Convert from bool ingress/egress to mirror direction */ +static u32 sparx5_mirror_to_dir(bool ingress) +{ + return ingress ? SPX5_MIRROR_INGRESS : SPX5_MIRROR_EGRESS; +} + +/* Get ports belonging to this mirror */ +static u64 sparx5_mirror_port_get(struct sparx5 *sparx5, u32 idx) +{ + return (u64)spx5_rd(sparx5, ANA_AC_PROBE_PORT_CFG1(idx)) << 32 | + spx5_rd(sparx5, ANA_AC_PROBE_PORT_CFG(idx)); +} + +/* Add port to mirror (only front ports) */ +static void sparx5_mirror_port_add(struct sparx5 *sparx5, u32 idx, u32 portno) +{ + u32 val, reg = portno; + + reg = portno / BITS_PER_BYTE; + val = BIT(portno % BITS_PER_BYTE); + + if (reg == 0) + return spx5_rmw(val, val, sparx5, ANA_AC_PROBE_PORT_CFG(idx)); + else + return spx5_rmw(val, val, sparx5, ANA_AC_PROBE_PORT_CFG1(idx)); +} + +/* Delete port from mirror (only front ports) */ +static void sparx5_mirror_port_del(struct sparx5 *sparx5, u32 idx, u32 portno) +{ + u32 val, reg = portno; + + reg = portno / BITS_PER_BYTE; + val = BIT(portno % BITS_PER_BYTE); + + if (reg == 0) + return spx5_rmw(0, val, sparx5, ANA_AC_PROBE_PORT_CFG(idx)); + else + return spx5_rmw(0, val, sparx5, ANA_AC_PROBE_PORT_CFG1(idx)); +} + +/* Check if mirror contains port */ +static bool sparx5_mirror_contains(struct sparx5 *sparx5, u32 idx, u32 portno) +{ + return (sparx5_mirror_port_get(sparx5, idx) & BIT_ULL(portno)) != 0; +} + +/* Check if mirror is empty */ +static bool sparx5_mirror_is_empty(struct sparx5 *sparx5, u32 idx) +{ + return sparx5_mirror_port_get(sparx5, idx) == 0; +} + +/* Get direction of mirror */ +static u32 sparx5_mirror_dir_get(struct sparx5 *sparx5, u32 idx) +{ + u32 val = spx5_rd(sparx5, ANA_AC_PROBE_CFG(idx)); + + return ANA_AC_PROBE_CFG_PROBE_DIRECTION_GET(val); +} + +/* Set direction of mirror */ +static void sparx5_mirror_dir_set(struct sparx5 *sparx5, u32 idx, u32 dir) +{ + spx5_rmw(ANA_AC_PROBE_CFG_PROBE_DIRECTION_SET(dir), + ANA_AC_PROBE_CFG_PROBE_DIRECTION, sparx5, + ANA_AC_PROBE_CFG(idx)); +} + +/* Set the monitor port for this mirror */ +static void sparx5_mirror_monitor_set(struct sparx5 *sparx5, u32 idx, + u32 portno) +{ + spx5_rmw(QFWD_FRAME_COPY_CFG_FRMC_PORT_VAL_SET(portno), + QFWD_FRAME_COPY_CFG_FRMC_PORT_VAL, sparx5, + QFWD_FRAME_COPY_CFG(idx + SPX5_QFWD_MP_OFFSET)); +} + +/* Get the monitor port of this mirror */ +static u32 sparx5_mirror_monitor_get(struct sparx5 *sparx5, u32 idx) +{ + u32 val = spx5_rd(sparx5, + QFWD_FRAME_COPY_CFG(idx + SPX5_QFWD_MP_OFFSET)); + + return QFWD_FRAME_COPY_CFG_FRMC_PORT_VAL_GET(val); +} + +/* Check if port is the monitor port of this mirror */ +static bool sparx5_mirror_has_monitor(struct sparx5 *sparx5, u32 idx, + u32 portno) +{ + return sparx5_mirror_monitor_get(sparx5, idx) == portno; +} + +/* Get a suitable mirror for this port */ +static int sparx5_mirror_get(struct sparx5_port *sport, + struct sparx5_port *mport, u32 dir, u32 *idx) +{ + struct sparx5 *sparx5 = sport->sparx5; + u32 i; + + /* Check if this port is already used as a monitor port */ + for (i = 0; i < SPX5_MIRROR_PROBE_MAX; i++) + if (sparx5_mirror_has_monitor(sparx5, i, sport->portno)) + return -EINVAL; + + /* Check if existing mirror can be reused + * (same direction and monitor port). + */ + for (i = 0; i < SPX5_MIRROR_PROBE_MAX; i++) { + if (sparx5_mirror_dir_get(sparx5, i) == dir && + sparx5_mirror_has_monitor(sparx5, i, mport->portno)) { + *idx = i; + return 0; + } + } + + /* Return free mirror */ + for (i = 0; i < SPX5_MIRROR_PROBE_MAX; i++) { + if (sparx5_mirror_is_empty(sparx5, i)) { + *idx = i; + return 0; + } + } + + return -ENOENT; +} + +int sparx5_mirror_add(struct sparx5_mall_entry *entry) +{ + u32 mirror_idx, dir = sparx5_mirror_to_dir(entry->ingress); + struct sparx5_port *sport, *mport; + struct sparx5 *sparx5; + int err; + + /* Source port */ + sport = entry->port; + /* monitor port */ + mport = entry->mirror.port; + sparx5 = sport->sparx5; + + if (sport->portno == mport->portno) + return -EINVAL; + + err = sparx5_mirror_get(sport, mport, dir, &mirror_idx); + if (err) + return err; + + if (sparx5_mirror_contains(sparx5, mirror_idx, sport->portno)) + return -EEXIST; + + /* Add port to mirror */ + sparx5_mirror_port_add(sparx5, mirror_idx, sport->portno); + + /* Set direction of mirror */ + sparx5_mirror_dir_set(sparx5, mirror_idx, dir); + + /* Set monitor port for mirror */ + sparx5_mirror_monitor_set(sparx5, mirror_idx, mport->portno); + + entry->mirror.idx = mirror_idx; + + return 0; +} + +void sparx5_mirror_del(struct sparx5_mall_entry *entry) +{ + struct sparx5_port *port = entry->port; + struct sparx5 *sparx5 = port->sparx5; + u32 mirror_idx = entry->mirror.idx; + + sparx5_mirror_port_del(sparx5, mirror_idx, port->portno); + if (!sparx5_mirror_is_empty(sparx5, mirror_idx)) + return; + + sparx5_mirror_dir_set(sparx5, mirror_idx, SPX5_MIRROR_DISABLED); + + sparx5_mirror_monitor_set(sparx5, + mirror_idx, + SPX5_MIRROR_MONITOR_PORT_DEFAULT); +} -- cgit v1.2.3 From 2ac99ed9c3ef43abcdc20210c67f4d1d901fe1a1 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Sat, 20 Apr 2024 21:29:13 +0200 Subject: net: sparx5: add the tc glue to support port mirroring Add the necessary tc glue to add and delete mirror rules through tc matchall. Signed-off-by: Daniel Machon Reviewed-by: Steen Hegelund Signed-off-by: David S. Miller --- .../ethernet/microchip/sparx5/sparx5_tc_matchall.c | 38 ++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_matchall.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_matchall.c index 2a33b347098f..4f9ae026fa47 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_matchall.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_matchall.c @@ -36,6 +36,13 @@ static void sparx5_tc_matchall_parse_action(struct sparx5_port *port, entry->cookie = cookie; } +static void +sparx5_tc_matchall_parse_mirror_action(struct sparx5_mall_entry *entry, + struct flow_action_entry *action) +{ + entry->mirror.port = netdev_priv(action->dev); +} + static int sparx5_tc_matchall_replace(struct net_device *ndev, struct tc_cls_matchall_offload *tmo, bool ingress) @@ -65,6 +72,31 @@ static int sparx5_tc_matchall_replace(struct net_device *ndev, sparx5 = port->sparx5; switch (action->id) { + case FLOW_ACTION_MIRRED: + sparx5_tc_matchall_parse_mirror_action(mall_entry, action); + err = sparx5_mirror_add(mall_entry); + if (err) { + switch (err) { + case -EEXIST: + NL_SET_ERR_MSG_MOD(tmo->common.extack, + "Mirroring already exists"); + break; + case -EINVAL: + NL_SET_ERR_MSG_MOD(tmo->common.extack, + "Cannot mirror a monitor port"); + break; + case -ENOENT: + NL_SET_ERR_MSG_MOD(tmo->common.extack, + "No more mirror probes available"); + break; + default: + NL_SET_ERR_MSG_MOD(tmo->common.extack, + "Unknown error"); + break; + } + return err; + } + break; case FLOW_ACTION_GOTO: err = vcap_enable_lookups(sparx5->vcap_ctrl, ndev, tmo->common.chain_index, @@ -108,14 +140,16 @@ static int sparx5_tc_matchall_destroy(struct net_device *ndev, struct sparx5_port *port = netdev_priv(ndev); struct sparx5 *sparx5 = port->sparx5; struct sparx5_mall_entry *entry; - int err; + int err = 0; entry = sparx5_tc_matchall_entry_find(&sparx5->mall_entries, tmo->cookie); if (!entry) return -ENOENT; - if (entry->type == FLOW_ACTION_GOTO) { + if (entry->type == FLOW_ACTION_MIRRED) { + sparx5_mirror_del(entry); + } else if (entry->type == FLOW_ACTION_GOTO) { err = vcap_enable_lookups(sparx5->vcap_ctrl, ndev, 0, 0, tmo->cookie, false); } else { -- cgit v1.2.3 From 5af946f4bb421cac8b6936b59ffdd426279c0b2e Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Sat, 20 Apr 2024 21:29:14 +0200 Subject: net: sparx5: add support for matchall mirror stats Add support for tc matchall mirror stats. When a new matchall mirror rule is added, the baseline stats for that port is saved. Signed-off-by: Daniel Machon Reviewed-by: Steen Hegelund Signed-off-by: David S. Miller --- .../net/ethernet/microchip/sparx5/sparx5_main.h | 3 ++ .../net/ethernet/microchip/sparx5/sparx5_mirror.c | 37 ++++++++++++++++++++++ .../ethernet/microchip/sparx5/sparx5_tc_matchall.c | 27 ++++++++++++++++ 3 files changed, 67 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 5d026e1670f5..1982ae03b4fe 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -174,6 +174,7 @@ struct sparx5_port { struct phylink_config phylink_config; struct phylink *phylink; struct phylink_pcs phylink_pcs; + struct flow_stats mirror_stats; u16 portno; /* Ingress default VLAN (pvid) */ u16 pvid; @@ -562,6 +563,8 @@ void sparx5_new_base_time(struct sparx5 *sparx5, const u32 cycle_time, /* sparx5_mirror.c */ int sparx5_mirror_add(struct sparx5_mall_entry *entry); void sparx5_mirror_del(struct sparx5_mall_entry *entry); +void sparx5_mirror_stats(struct sparx5_mall_entry *entry, + struct flow_stats *fstats); /* Clock period in picoseconds */ static inline u32 sparx5_clk_period(enum sparx5_core_clockfreq cclock) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mirror.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mirror.c index f029634a6a42..15db423be4aa 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mirror.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mirror.c @@ -196,3 +196,40 @@ void sparx5_mirror_del(struct sparx5_mall_entry *entry) mirror_idx, SPX5_MIRROR_MONITOR_PORT_DEFAULT); } + +void sparx5_mirror_stats(struct sparx5_mall_entry *entry, + struct flow_stats *fstats) +{ + struct sparx5_port *port = entry->port; + struct rtnl_link_stats64 new_stats; + struct flow_stats *old_stats; + + old_stats = &entry->port->mirror_stats; + sparx5_get_stats64(port->ndev, &new_stats); + + if (entry->ingress) { + flow_stats_update(fstats, + new_stats.rx_bytes - old_stats->bytes, + new_stats.rx_packets - old_stats->pkts, + new_stats.rx_dropped - old_stats->drops, + old_stats->lastused, + FLOW_ACTION_HW_STATS_IMMEDIATE); + + old_stats->bytes = new_stats.rx_bytes; + old_stats->pkts = new_stats.rx_packets; + old_stats->drops = new_stats.rx_dropped; + old_stats->lastused = jiffies; + } else { + flow_stats_update(fstats, + new_stats.tx_bytes - old_stats->bytes, + new_stats.tx_packets - old_stats->pkts, + new_stats.tx_dropped - old_stats->drops, + old_stats->lastused, + FLOW_ACTION_HW_STATS_IMMEDIATE); + + old_stats->bytes = new_stats.tx_bytes; + old_stats->pkts = new_stats.tx_packets; + old_stats->drops = new_stats.tx_dropped; + old_stats->lastused = jiffies; + } +} diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_matchall.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_matchall.c index 4f9ae026fa47..6b4d1d7b9730 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_matchall.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_matchall.c @@ -96,6 +96,8 @@ static int sparx5_tc_matchall_replace(struct net_device *ndev, } return err; } + /* Get baseline stats for this port */ + sparx5_mirror_stats(mall_entry, &tmo->stats); break; case FLOW_ACTION_GOTO: err = vcap_enable_lookups(sparx5->vcap_ctrl, ndev, @@ -162,6 +164,29 @@ static int sparx5_tc_matchall_destroy(struct net_device *ndev, return err; } +static int sparx5_tc_matchall_stats(struct net_device *ndev, + struct tc_cls_matchall_offload *tmo, + bool ingress) +{ + struct sparx5_port *port = netdev_priv(ndev); + struct sparx5 *sparx5 = port->sparx5; + struct sparx5_mall_entry *entry; + + entry = sparx5_tc_matchall_entry_find(&sparx5->mall_entries, + tmo->cookie); + if (!entry) + return -ENOENT; + + if (entry->type == FLOW_ACTION_MIRRED) { + sparx5_mirror_stats(entry, &tmo->stats); + } else { + NL_SET_ERR_MSG_MOD(tmo->common.extack, "Unsupported action"); + return -EOPNOTSUPP; + } + + return 0; +} + int sparx5_tc_matchall(struct net_device *ndev, struct tc_cls_matchall_offload *tmo, bool ingress) @@ -171,6 +196,8 @@ int sparx5_tc_matchall(struct net_device *ndev, return sparx5_tc_matchall_replace(ndev, tmo, ingress); case TC_CLSMATCHALL_DESTROY: return sparx5_tc_matchall_destroy(ndev, tmo, ingress); + case TC_CLSMATCHALL_STATS: + return sparx5_tc_matchall_stats(ndev, tmo, ingress); default: return -EOPNOTSUPP; } -- cgit v1.2.3 From 306ec721d043bbe5e818d59fbb37c28d999b5d8b Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 18 Apr 2024 13:36:07 +0200 Subject: net: intel: introduce {, Intel} Ethernet common library Not a secret there's a ton of code duplication between two and more Intel ethernet modules. Before introducing new changes, which would need to be copied over again, start decoupling the already existing duplicate functionality into a new module, which will be shared between several Intel Ethernet drivers. Add the lookup table which converts 8/10-bit hardware packet type into a parsed bitfield structure for easy checking packet format parameters, such as payload level, IP version, etc. This is currently used by i40e, ice and iavf and it's all the same in all three drivers. The only difference introduced in this implementation is that instead of defining a 256 (or 1024 in case of ice) element array, add unlikely() condition to limit the input to 154 (current maximum non-reserved packet type). There's no reason to waste 600 (or even 3600) bytes only to not hurt very unlikely exception packets. The hash computation function now takes payload level directly as a pkt_hash_type. There's a couple cases when non-IP ptypes are marked as L3 payload and in the previous versions their hash level would be 2, not 3. But skb_set_hash() only sees difference between L4 and non-L4, thus this won't change anything at all. The module is behind the hidden Kconfig symbol, which the drivers will select when needed. The exports are behind 'LIBIE' namespace to limit the scope of the functions. Not that non-HW-specific symbols will live in yet another module, libeth. This is done to easily distinguish pretty generic code ready for reusing by any other vendor and/or for moving the layer up from the code useful in Intel's 1-100G drivers only. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/Kconfig | 7 + drivers/net/ethernet/intel/Makefile | 3 + drivers/net/ethernet/intel/i40e/i40e_common.c | 253 ------------------ drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + drivers/net/ethernet/intel/i40e/i40e_prototype.h | 7 - drivers/net/ethernet/intel/i40e/i40e_txrx.c | 72 ++--- drivers/net/ethernet/intel/i40e/i40e_type.h | 88 ------- drivers/net/ethernet/intel/iavf/iavf_common.c | 253 ------------------ drivers/net/ethernet/intel/iavf/iavf_main.c | 1 + drivers/net/ethernet/intel/iavf/iavf_prototype.h | 7 - drivers/net/ethernet/intel/iavf/iavf_txrx.c | 70 ++--- drivers/net/ethernet/intel/iavf/iavf_type.h | 88 ------- drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h | 320 ----------------------- drivers/net/ethernet/intel/ice/ice_main.c | 1 + drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 111 ++------ drivers/net/ethernet/intel/libeth/Kconfig | 8 + drivers/net/ethernet/intel/libeth/Makefile | 6 + drivers/net/ethernet/intel/libeth/rx.c | 52 ++++ drivers/net/ethernet/intel/libie/Kconfig | 10 + drivers/net/ethernet/intel/libie/Makefile | 6 + drivers/net/ethernet/intel/libie/rx.c | 124 +++++++++ include/linux/net/intel/libie/rx.h | 33 +++ include/net/libeth/rx.h | 125 +++++++++ 23 files changed, 426 insertions(+), 1220 deletions(-) create mode 100644 drivers/net/ethernet/intel/libeth/Kconfig create mode 100644 drivers/net/ethernet/intel/libeth/Makefile create mode 100644 drivers/net/ethernet/intel/libeth/rx.c create mode 100644 drivers/net/ethernet/intel/libie/Kconfig create mode 100644 drivers/net/ethernet/intel/libie/Makefile create mode 100644 drivers/net/ethernet/intel/libie/rx.c create mode 100644 include/linux/net/intel/libie/rx.h create mode 100644 include/net/libeth/rx.h (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 6e7901e12699..e0287fbd501d 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -16,6 +16,9 @@ config NET_VENDOR_INTEL if NET_VENDOR_INTEL +source "drivers/net/ethernet/intel/libeth/Kconfig" +source "drivers/net/ethernet/intel/libie/Kconfig" + config E100 tristate "Intel(R) PRO/100+ support" depends on PCI @@ -225,6 +228,7 @@ config I40E depends on PTP_1588_CLOCK_OPTIONAL depends on PCI select AUXILIARY_BUS + select LIBIE select NET_DEVLINK help This driver supports Intel(R) Ethernet Controller XL710 Family of @@ -253,6 +257,8 @@ config I40E_DCB # so that CONFIG_IAVF symbol will always mirror the state of CONFIG_I40EVF config IAVF tristate + select LIBIE + config I40EVF tristate "Intel(R) Ethernet Adaptive Virtual Function support" select IAVF @@ -283,6 +289,7 @@ config ICE depends on GNSS || GNSS = n select AUXILIARY_BUS select DIMLIB + select LIBIE select NET_DEVLINK select PLDMFW select DPLL diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile index dacb481ee5b1..04c844ef4964 100644 --- a/drivers/net/ethernet/intel/Makefile +++ b/drivers/net/ethernet/intel/Makefile @@ -3,6 +3,9 @@ # Makefile for the Intel network device drivers. # +obj-$(CONFIG_LIBETH) += libeth/ +obj-$(CONFIG_LIBIE) += libie/ + obj-$(CONFIG_E100) += e100.o obj-$(CONFIG_E1000) += e1000/ obj-$(CONFIG_E1000E) += e1000e/ diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index de6ca6295742..e8031f1a9b4f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -381,259 +381,6 @@ int i40e_aq_set_rss_key(struct i40e_hw *hw, return i40e_aq_get_set_rss_key(hw, vsi_id, key, true); } -/* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the - * hardware to a bit-field that can be used by SW to more easily determine the - * packet type. - * - * Macros are used to shorten the table lines and make this table human - * readable. - * - * We store the PTYPE in the top byte of the bit field - this is just so that - * we can check that the table doesn't have a row missing, as the index into - * the table should be the PTYPE. - * - * Typical work flow: - * - * IF NOT i40e_ptype_lookup[ptype].known - * THEN - * Packet is unknown - * ELSE IF i40e_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP - * Use the rest of the fields to look at the tunnels, inner protocols, etc - * ELSE - * Use the enum i40e_rx_l2_ptype to decode the packet type - * ENDIF - */ - -/* macro to make the table lines short, use explicit indexing with [PTYPE] */ -#define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - [PTYPE] = { \ - 1, \ - I40E_RX_PTYPE_OUTER_##OUTER_IP, \ - I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \ - I40E_RX_PTYPE_##OUTER_FRAG, \ - I40E_RX_PTYPE_TUNNEL_##T, \ - I40E_RX_PTYPE_TUNNEL_END_##TE, \ - I40E_RX_PTYPE_##TEF, \ - I40E_RX_PTYPE_INNER_PROT_##I, \ - I40E_RX_PTYPE_PAYLOAD_LAYER_##PL } - -#define I40E_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } - -/* shorter macros makes the table fit but are terse */ -#define I40E_RX_PTYPE_NOF I40E_RX_PTYPE_NOT_FRAG -#define I40E_RX_PTYPE_FRG I40E_RX_PTYPE_FRAG -#define I40E_RX_PTYPE_INNER_PROT_TS I40E_RX_PTYPE_INNER_PROT_TIMESYNC - -/* Lookup table mapping in the 8-bit HW PTYPE to the bit field for decoding */ -struct i40e_rx_ptype_decoded i40e_ptype_lookup[BIT(8)] = { - /* L2 Packet types */ - I40E_PTT_UNUSED_ENTRY(0), - I40E_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, TS, PAY2), - I40E_PTT(3, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT_UNUSED_ENTRY(4), - I40E_PTT_UNUSED_ENTRY(5), - I40E_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT_UNUSED_ENTRY(8), - I40E_PTT_UNUSED_ENTRY(9), - I40E_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - I40E_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - - /* Non Tunneled IPv4 */ - I40E_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(25), - I40E_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4), - I40E_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4), - I40E_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv4 --> IPv4 */ - I40E_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - I40E_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - I40E_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(32), - I40E_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - I40E_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - I40E_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> IPv6 */ - I40E_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - I40E_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - I40E_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(39), - I40E_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - I40E_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - I40E_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT */ - I40E_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> IPv4 */ - I40E_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - I40E_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - I40E_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(47), - I40E_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - I40E_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - I40E_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> IPv6 */ - I40E_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - I40E_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - I40E_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(54), - I40E_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - I40E_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - I40E_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC */ - I40E_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> MAC --> IPv4 */ - I40E_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - I40E_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - I40E_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(62), - I40E_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - I40E_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - I40E_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT -> MAC --> IPv6 */ - I40E_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - I40E_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - I40E_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(69), - I40E_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - I40E_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - I40E_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC/VLAN */ - I40E_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */ - I40E_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - I40E_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - I40E_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(77), - I40E_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - I40E_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - I40E_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */ - I40E_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - I40E_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - I40E_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(84), - I40E_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - I40E_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - I40E_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* Non Tunneled IPv6 */ - I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(91), - I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), - I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), - I40E_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv6 --> IPv4 */ - I40E_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - I40E_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - I40E_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(98), - I40E_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - I40E_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - I40E_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> IPv6 */ - I40E_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - I40E_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - I40E_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(105), - I40E_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - I40E_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - I40E_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT */ - I40E_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> IPv4 */ - I40E_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - I40E_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - I40E_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(113), - I40E_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - I40E_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - I40E_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> IPv6 */ - I40E_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - I40E_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - I40E_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(120), - I40E_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - I40E_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - I40E_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC */ - I40E_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC -> IPv4 */ - I40E_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - I40E_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - I40E_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(128), - I40E_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - I40E_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - I40E_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC -> IPv6 */ - I40E_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - I40E_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - I40E_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(135), - I40E_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - I40E_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - I40E_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN */ - I40E_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */ - I40E_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - I40E_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - I40E_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(143), - I40E_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - I40E_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - I40E_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */ - I40E_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - I40E_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - I40E_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(150), - I40E_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - I40E_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* unused entries */ - [154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } -}; - /** * i40e_init_shared_code - Initialize the shared code * @hw: pointer to hardware structure diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0bdcdea0be3e..164afd8ce5a0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -100,6 +100,7 @@ MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver"); +MODULE_IMPORT_NS(LIBIE); MODULE_LICENSE("GPL v2"); static struct workqueue_struct *i40e_wq; diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index ce1f11b8ad65..5a0699ca7ce5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -371,13 +371,6 @@ void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status); int i40e_set_mac_type(struct i40e_hw *hw); -extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[]; - -static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype) -{ - return i40e_ptype_lookup[ptype]; -} - /** * i40e_virtchnl_link_speed - Convert AdminQ link_speed to virtchnl definition * @link_speed: the speed to convert diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 64d198ed166b..bc9e766d88cb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2,6 +2,7 @@ /* Copyright(c) 2013 - 2018 Intel Corporation. */ #include +#include #include #include #include @@ -1741,38 +1742,30 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, struct sk_buff *skb, union i40e_rx_desc *rx_desc) { - struct i40e_rx_ptype_decoded decoded; + struct libeth_rx_pt decoded; u32 rx_error, rx_status; bool ipv4, ipv6; u8 ptype; u64 qword; - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - ptype = FIELD_GET(I40E_RXD_QW1_PTYPE_MASK, qword); - rx_error = FIELD_GET(I40E_RXD_QW1_ERROR_MASK, qword); - rx_status = FIELD_GET(I40E_RXD_QW1_STATUS_MASK, qword); - decoded = decode_rx_desc_ptype(ptype); - skb->ip_summed = CHECKSUM_NONE; - skb_checksum_none_assert(skb); + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + ptype = FIELD_GET(I40E_RXD_QW1_PTYPE_MASK, qword); - /* Rx csum enabled and ip headers found? */ - if (!(vsi->netdev->features & NETIF_F_RXCSUM)) + decoded = libie_rx_pt_parse(ptype); + if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded)) return; + rx_error = FIELD_GET(I40E_RXD_QW1_ERROR_MASK, qword); + rx_status = FIELD_GET(I40E_RXD_QW1_STATUS_MASK, qword); + /* did the hardware decode the packet and checksum? */ if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT))) return; - /* both known and outer_ip must be set for the below code to work */ - if (!(decoded.known && decoded.outer_ip)) - return; - - ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); - ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); + ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4; + ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6; if (ipv4 && (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | @@ -1800,49 +1793,16 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, * we need to bump the checksum level by 1 to reflect the fact that * we are indicating we validated the inner checksum. */ - if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT) + if (decoded.tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT) skb->csum_level = 1; - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ - switch (decoded.inner_prot) { - case I40E_RX_PTYPE_INNER_PROT_TCP: - case I40E_RX_PTYPE_INNER_PROT_UDP: - case I40E_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - fallthrough; - default: - break; - } - + skb->ip_summed = CHECKSUM_UNNECESSARY; return; checksum_fail: vsi->back->hw_csum_rx_error++; } -/** - * i40e_ptype_to_htype - get a hash type - * @ptype: the ptype value from the descriptor - * - * Returns a hash type to be used by skb_set_hash - **/ -static inline int i40e_ptype_to_htype(u8 ptype) -{ - struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); - - if (!decoded.known) - return PKT_HASH_TYPE_NONE; - - if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) - return PKT_HASH_TYPE_L4; - else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) - return PKT_HASH_TYPE_L3; - else - return PKT_HASH_TYPE_L2; -} - /** * i40e_rx_hash - set the hash value in the skb * @ring: descriptor ring @@ -1855,17 +1815,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring, struct sk_buff *skb, u8 rx_ptype) { + struct libeth_rx_pt decoded; u32 hash; const __le64 rss_mask = cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); - if (!(ring->netdev->features & NETIF_F_RXHASH)) + decoded = libie_rx_pt_parse(rx_ptype); + if (!libeth_rx_pt_has_hash(ring->netdev, decoded)) return; if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); - skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype)); + libeth_rx_pt_set_hash(skb, hash, decoded); } } diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index d9031499697e..28568e126850 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -745,94 +745,6 @@ enum i40e_rx_desc_error_l3l4e_fcoe_masks { #define I40E_RXD_QW1_PTYPE_SHIFT 30 #define I40E_RXD_QW1_PTYPE_MASK (0xFFULL << I40E_RXD_QW1_PTYPE_SHIFT) -/* Packet type non-ip values */ -enum i40e_rx_l2_ptype { - I40E_RX_PTYPE_L2_RESERVED = 0, - I40E_RX_PTYPE_L2_MAC_PAY2 = 1, - I40E_RX_PTYPE_L2_TIMESYNC_PAY2 = 2, - I40E_RX_PTYPE_L2_FIP_PAY2 = 3, - I40E_RX_PTYPE_L2_OUI_PAY2 = 4, - I40E_RX_PTYPE_L2_MACCNTRL_PAY2 = 5, - I40E_RX_PTYPE_L2_LLDP_PAY2 = 6, - I40E_RX_PTYPE_L2_ECP_PAY2 = 7, - I40E_RX_PTYPE_L2_EVB_PAY2 = 8, - I40E_RX_PTYPE_L2_QCN_PAY2 = 9, - I40E_RX_PTYPE_L2_EAPOL_PAY2 = 10, - I40E_RX_PTYPE_L2_ARP = 11, - I40E_RX_PTYPE_L2_FCOE_PAY3 = 12, - I40E_RX_PTYPE_L2_FCOE_FCDATA_PAY3 = 13, - I40E_RX_PTYPE_L2_FCOE_FCRDY_PAY3 = 14, - I40E_RX_PTYPE_L2_FCOE_FCRSP_PAY3 = 15, - I40E_RX_PTYPE_L2_FCOE_FCOTHER_PA = 16, - I40E_RX_PTYPE_L2_FCOE_VFT_PAY3 = 17, - I40E_RX_PTYPE_L2_FCOE_VFT_FCDATA = 18, - I40E_RX_PTYPE_L2_FCOE_VFT_FCRDY = 19, - I40E_RX_PTYPE_L2_FCOE_VFT_FCRSP = 20, - I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER = 21, - I40E_RX_PTYPE_GRENAT4_MAC_PAY3 = 58, - I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4 = 87, - I40E_RX_PTYPE_GRENAT6_MAC_PAY3 = 124, - I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4 = 153 -}; - -struct i40e_rx_ptype_decoded { - u32 known:1; - u32 outer_ip:1; - u32 outer_ip_ver:1; - u32 outer_frag:1; - u32 tunnel_type:3; - u32 tunnel_end_prot:2; - u32 tunnel_end_frag:1; - u32 inner_prot:4; - u32 payload_layer:3; -}; - -enum i40e_rx_ptype_outer_ip { - I40E_RX_PTYPE_OUTER_L2 = 0, - I40E_RX_PTYPE_OUTER_IP = 1 -}; - -enum i40e_rx_ptype_outer_ip_ver { - I40E_RX_PTYPE_OUTER_NONE = 0, - I40E_RX_PTYPE_OUTER_IPV4 = 0, - I40E_RX_PTYPE_OUTER_IPV6 = 1 -}; - -enum i40e_rx_ptype_outer_fragmented { - I40E_RX_PTYPE_NOT_FRAG = 0, - I40E_RX_PTYPE_FRAG = 1 -}; - -enum i40e_rx_ptype_tunnel_type { - I40E_RX_PTYPE_TUNNEL_NONE = 0, - I40E_RX_PTYPE_TUNNEL_IP_IP = 1, - I40E_RX_PTYPE_TUNNEL_IP_GRENAT = 2, - I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, - I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, -}; - -enum i40e_rx_ptype_tunnel_end_prot { - I40E_RX_PTYPE_TUNNEL_END_NONE = 0, - I40E_RX_PTYPE_TUNNEL_END_IPV4 = 1, - I40E_RX_PTYPE_TUNNEL_END_IPV6 = 2, -}; - -enum i40e_rx_ptype_inner_prot { - I40E_RX_PTYPE_INNER_PROT_NONE = 0, - I40E_RX_PTYPE_INNER_PROT_UDP = 1, - I40E_RX_PTYPE_INNER_PROT_TCP = 2, - I40E_RX_PTYPE_INNER_PROT_SCTP = 3, - I40E_RX_PTYPE_INNER_PROT_ICMP = 4, - I40E_RX_PTYPE_INNER_PROT_TIMESYNC = 5 -}; - -enum i40e_rx_ptype_payload_layer { - I40E_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, - I40E_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, - I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, - I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, -}; - #define I40E_RXD_QW1_LENGTH_PBUF_SHIFT 38 #define I40E_RXD_QW1_LENGTH_PBUF_MASK (0x3FFFULL << \ I40E_RXD_QW1_LENGTH_PBUF_SHIFT) diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index 5a25233a89d5..aa751ce3425b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -432,259 +432,6 @@ enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id, return iavf_aq_get_set_rss_key(hw, vsi_id, key, true); } -/* The iavf_ptype_lookup table is used to convert from the 8-bit ptype in the - * hardware to a bit-field that can be used by SW to more easily determine the - * packet type. - * - * Macros are used to shorten the table lines and make this table human - * readable. - * - * We store the PTYPE in the top byte of the bit field - this is just so that - * we can check that the table doesn't have a row missing, as the index into - * the table should be the PTYPE. - * - * Typical work flow: - * - * IF NOT iavf_ptype_lookup[ptype].known - * THEN - * Packet is unknown - * ELSE IF iavf_ptype_lookup[ptype].outer_ip == IAVF_RX_PTYPE_OUTER_IP - * Use the rest of the fields to look at the tunnels, inner protocols, etc - * ELSE - * Use the enum iavf_rx_l2_ptype to decode the packet type - * ENDIF - */ - -/* macro to make the table lines short, use explicit indexing with [PTYPE] */ -#define IAVF_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - [PTYPE] = { \ - 1, \ - IAVF_RX_PTYPE_OUTER_##OUTER_IP, \ - IAVF_RX_PTYPE_OUTER_##OUTER_IP_VER, \ - IAVF_RX_PTYPE_##OUTER_FRAG, \ - IAVF_RX_PTYPE_TUNNEL_##T, \ - IAVF_RX_PTYPE_TUNNEL_END_##TE, \ - IAVF_RX_PTYPE_##TEF, \ - IAVF_RX_PTYPE_INNER_PROT_##I, \ - IAVF_RX_PTYPE_PAYLOAD_LAYER_##PL } - -#define IAVF_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } - -/* shorter macros makes the table fit but are terse */ -#define IAVF_RX_PTYPE_NOF IAVF_RX_PTYPE_NOT_FRAG -#define IAVF_RX_PTYPE_FRG IAVF_RX_PTYPE_FRAG -#define IAVF_RX_PTYPE_INNER_PROT_TS IAVF_RX_PTYPE_INNER_PROT_TIMESYNC - -/* Lookup table mapping the 8-bit HW PTYPE to the bit field for decoding */ -struct iavf_rx_ptype_decoded iavf_ptype_lookup[BIT(8)] = { - /* L2 Packet types */ - IAVF_PTT_UNUSED_ENTRY(0), - IAVF_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, TS, PAY2), - IAVF_PTT(3, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT_UNUSED_ENTRY(4), - IAVF_PTT_UNUSED_ENTRY(5), - IAVF_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT_UNUSED_ENTRY(8), - IAVF_PTT_UNUSED_ENTRY(9), - IAVF_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - IAVF_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - - /* Non Tunneled IPv4 */ - IAVF_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(25), - IAVF_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4), - IAVF_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4), - IAVF_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv4 --> IPv4 */ - IAVF_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - IAVF_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - IAVF_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(32), - IAVF_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - IAVF_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> IPv6 */ - IAVF_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - IAVF_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - IAVF_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(39), - IAVF_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - IAVF_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT */ - IAVF_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> IPv4 */ - IAVF_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - IAVF_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - IAVF_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(47), - IAVF_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - IAVF_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> IPv6 */ - IAVF_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - IAVF_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - IAVF_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(54), - IAVF_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - IAVF_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC */ - IAVF_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> MAC --> IPv4 */ - IAVF_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - IAVF_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - IAVF_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(62), - IAVF_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - IAVF_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT -> MAC --> IPv6 */ - IAVF_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - IAVF_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - IAVF_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(69), - IAVF_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - IAVF_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC/VLAN */ - IAVF_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */ - IAVF_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - IAVF_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - IAVF_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(77), - IAVF_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - IAVF_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */ - IAVF_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - IAVF_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - IAVF_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(84), - IAVF_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - IAVF_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* Non Tunneled IPv6 */ - IAVF_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(91), - IAVF_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), - IAVF_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), - IAVF_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv6 --> IPv4 */ - IAVF_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - IAVF_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - IAVF_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(98), - IAVF_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - IAVF_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> IPv6 */ - IAVF_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - IAVF_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - IAVF_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(105), - IAVF_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - IAVF_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT */ - IAVF_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> IPv4 */ - IAVF_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - IAVF_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - IAVF_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(113), - IAVF_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - IAVF_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> IPv6 */ - IAVF_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - IAVF_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - IAVF_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(120), - IAVF_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - IAVF_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC */ - IAVF_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC -> IPv4 */ - IAVF_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - IAVF_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - IAVF_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(128), - IAVF_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - IAVF_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC -> IPv6 */ - IAVF_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - IAVF_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - IAVF_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(135), - IAVF_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - IAVF_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN */ - IAVF_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */ - IAVF_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - IAVF_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - IAVF_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(143), - IAVF_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - IAVF_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */ - IAVF_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - IAVF_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - IAVF_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(150), - IAVF_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - IAVF_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* unused entries */ - [154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } -}; - /** * iavf_aq_send_msg_to_pf * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 13361a780ece..d6cbe5022815 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -45,6 +45,7 @@ MODULE_DEVICE_TABLE(pci, iavf_pci_tbl); MODULE_ALIAS("i40evf"); MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) Ethernet Adaptive Virtual Function Network Driver"); +MODULE_IMPORT_NS(LIBIE); MODULE_LICENSE("GPL v2"); static const struct net_device_ops iavf_netdev_ops; diff --git a/drivers/net/ethernet/intel/iavf/iavf_prototype.h b/drivers/net/ethernet/intel/iavf/iavf_prototype.h index 4a48e6171405..48c3901381b4 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_prototype.h +++ b/drivers/net/ethernet/intel/iavf/iavf_prototype.h @@ -45,13 +45,6 @@ enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 seid, enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid, struct iavf_aqc_get_set_rss_key_data *key); -extern struct iavf_rx_ptype_decoded iavf_ptype_lookup[]; - -static inline struct iavf_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype) -{ - return iavf_ptype_lookup[ptype]; -} - void iavf_vf_parse_hw_config(struct iavf_hw *hw, struct virtchnl_vf_resource *msg); enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw, diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 32bb604a1382..33ec01d0ed67 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -2,6 +2,7 @@ /* Copyright(c) 2013 - 2018 Intel Corporation. */ #include +#include #include #include "iavf.h" @@ -982,38 +983,30 @@ static void iavf_rx_checksum(struct iavf_vsi *vsi, struct sk_buff *skb, union iavf_rx_desc *rx_desc) { - struct iavf_rx_ptype_decoded decoded; + struct libeth_rx_pt decoded; u32 rx_error, rx_status; bool ipv4, ipv6; u8 ptype; u64 qword; - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword); - rx_error = FIELD_GET(IAVF_RXD_QW1_ERROR_MASK, qword); - rx_status = FIELD_GET(IAVF_RXD_QW1_STATUS_MASK, qword); - decoded = decode_rx_desc_ptype(ptype); - skb->ip_summed = CHECKSUM_NONE; - skb_checksum_none_assert(skb); + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword); - /* Rx csum enabled and ip headers found? */ - if (!(vsi->netdev->features & NETIF_F_RXCSUM)) + decoded = libie_rx_pt_parse(ptype); + if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded)) return; + rx_error = FIELD_GET(IAVF_RXD_QW1_ERROR_MASK, qword); + rx_status = FIELD_GET(IAVF_RXD_QW1_STATUS_MASK, qword); + /* did the hardware decode the packet and checksum? */ if (!(rx_status & BIT(IAVF_RX_DESC_STATUS_L3L4P_SHIFT))) return; - /* both known and outer_ip must be set for the below code to work */ - if (!(decoded.known && decoded.outer_ip)) - return; - - ipv4 = (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV4); - ipv6 = (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV6); + ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4; + ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6; if (ipv4 && (rx_error & (BIT(IAVF_RX_DESC_ERROR_IPE_SHIFT) | @@ -1037,46 +1030,13 @@ static void iavf_rx_checksum(struct iavf_vsi *vsi, if (rx_error & BIT(IAVF_RX_DESC_ERROR_PPRS_SHIFT)) return; - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ - switch (decoded.inner_prot) { - case IAVF_RX_PTYPE_INNER_PROT_TCP: - case IAVF_RX_PTYPE_INNER_PROT_UDP: - case IAVF_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - fallthrough; - default: - break; - } - + skb->ip_summed = CHECKSUM_UNNECESSARY; return; checksum_fail: vsi->back->hw_csum_rx_error++; } -/** - * iavf_ptype_to_htype - get a hash type - * @ptype: the ptype value from the descriptor - * - * Returns a hash type to be used by skb_set_hash - **/ -static int iavf_ptype_to_htype(u8 ptype) -{ - struct iavf_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); - - if (!decoded.known) - return PKT_HASH_TYPE_NONE; - - if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP && - decoded.payload_layer == IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY4) - return PKT_HASH_TYPE_L4; - else if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP && - decoded.payload_layer == IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY3) - return PKT_HASH_TYPE_L3; - else - return PKT_HASH_TYPE_L2; -} - /** * iavf_rx_hash - set the hash value in the skb * @ring: descriptor ring @@ -1089,17 +1049,19 @@ static void iavf_rx_hash(struct iavf_ring *ring, struct sk_buff *skb, u8 rx_ptype) { + struct libeth_rx_pt decoded; u32 hash; const __le64 rss_mask = cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH << IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT); - if (!(ring->netdev->features & NETIF_F_RXHASH)) + decoded = libie_rx_pt_parse(rx_ptype); + if (!libeth_rx_pt_has_hash(ring->netdev, decoded)) return; if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); - skb_set_hash(skb, hash, iavf_ptype_to_htype(rx_ptype)); + libeth_rx_pt_set_hash(skb, hash, decoded); } } diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h index 2b6a207fa441..23ded4fcd94f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_type.h +++ b/drivers/net/ethernet/intel/iavf/iavf_type.h @@ -327,94 +327,6 @@ enum iavf_rx_desc_error_l3l4e_fcoe_masks { #define IAVF_RXD_QW1_PTYPE_SHIFT 30 #define IAVF_RXD_QW1_PTYPE_MASK (0xFFULL << IAVF_RXD_QW1_PTYPE_SHIFT) -/* Packet type non-ip values */ -enum iavf_rx_l2_ptype { - IAVF_RX_PTYPE_L2_RESERVED = 0, - IAVF_RX_PTYPE_L2_MAC_PAY2 = 1, - IAVF_RX_PTYPE_L2_TIMESYNC_PAY2 = 2, - IAVF_RX_PTYPE_L2_FIP_PAY2 = 3, - IAVF_RX_PTYPE_L2_OUI_PAY2 = 4, - IAVF_RX_PTYPE_L2_MACCNTRL_PAY2 = 5, - IAVF_RX_PTYPE_L2_LLDP_PAY2 = 6, - IAVF_RX_PTYPE_L2_ECP_PAY2 = 7, - IAVF_RX_PTYPE_L2_EVB_PAY2 = 8, - IAVF_RX_PTYPE_L2_QCN_PAY2 = 9, - IAVF_RX_PTYPE_L2_EAPOL_PAY2 = 10, - IAVF_RX_PTYPE_L2_ARP = 11, - IAVF_RX_PTYPE_L2_FCOE_PAY3 = 12, - IAVF_RX_PTYPE_L2_FCOE_FCDATA_PAY3 = 13, - IAVF_RX_PTYPE_L2_FCOE_FCRDY_PAY3 = 14, - IAVF_RX_PTYPE_L2_FCOE_FCRSP_PAY3 = 15, - IAVF_RX_PTYPE_L2_FCOE_FCOTHER_PA = 16, - IAVF_RX_PTYPE_L2_FCOE_VFT_PAY3 = 17, - IAVF_RX_PTYPE_L2_FCOE_VFT_FCDATA = 18, - IAVF_RX_PTYPE_L2_FCOE_VFT_FCRDY = 19, - IAVF_RX_PTYPE_L2_FCOE_VFT_FCRSP = 20, - IAVF_RX_PTYPE_L2_FCOE_VFT_FCOTHER = 21, - IAVF_RX_PTYPE_GRENAT4_MAC_PAY3 = 58, - IAVF_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4 = 87, - IAVF_RX_PTYPE_GRENAT6_MAC_PAY3 = 124, - IAVF_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4 = 153 -}; - -struct iavf_rx_ptype_decoded { - u32 known:1; - u32 outer_ip:1; - u32 outer_ip_ver:1; - u32 outer_frag:1; - u32 tunnel_type:3; - u32 tunnel_end_prot:2; - u32 tunnel_end_frag:1; - u32 inner_prot:4; - u32 payload_layer:3; -}; - -enum iavf_rx_ptype_outer_ip { - IAVF_RX_PTYPE_OUTER_L2 = 0, - IAVF_RX_PTYPE_OUTER_IP = 1 -}; - -enum iavf_rx_ptype_outer_ip_ver { - IAVF_RX_PTYPE_OUTER_NONE = 0, - IAVF_RX_PTYPE_OUTER_IPV4 = 0, - IAVF_RX_PTYPE_OUTER_IPV6 = 1 -}; - -enum iavf_rx_ptype_outer_fragmented { - IAVF_RX_PTYPE_NOT_FRAG = 0, - IAVF_RX_PTYPE_FRAG = 1 -}; - -enum iavf_rx_ptype_tunnel_type { - IAVF_RX_PTYPE_TUNNEL_NONE = 0, - IAVF_RX_PTYPE_TUNNEL_IP_IP = 1, - IAVF_RX_PTYPE_TUNNEL_IP_GRENAT = 2, - IAVF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, - IAVF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, -}; - -enum iavf_rx_ptype_tunnel_end_prot { - IAVF_RX_PTYPE_TUNNEL_END_NONE = 0, - IAVF_RX_PTYPE_TUNNEL_END_IPV4 = 1, - IAVF_RX_PTYPE_TUNNEL_END_IPV6 = 2, -}; - -enum iavf_rx_ptype_inner_prot { - IAVF_RX_PTYPE_INNER_PROT_NONE = 0, - IAVF_RX_PTYPE_INNER_PROT_UDP = 1, - IAVF_RX_PTYPE_INNER_PROT_TCP = 2, - IAVF_RX_PTYPE_INNER_PROT_SCTP = 3, - IAVF_RX_PTYPE_INNER_PROT_ICMP = 4, - IAVF_RX_PTYPE_INNER_PROT_TIMESYNC = 5 -}; - -enum iavf_rx_ptype_payload_layer { - IAVF_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, - IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, - IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, - IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, -}; - #define IAVF_RXD_QW1_LENGTH_PBUF_SHIFT 38 #define IAVF_RXD_QW1_LENGTH_PBUF_MASK (0x3FFFULL << \ IAVF_RXD_QW1_LENGTH_PBUF_SHIFT) diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index d384ddfcb83e..611577ebc29d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -160,64 +160,6 @@ struct ice_fltr_desc { (0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S) #define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES 0x1ULL -struct ice_rx_ptype_decoded { - u32 known:1; - u32 outer_ip:1; - u32 outer_ip_ver:2; - u32 outer_frag:1; - u32 tunnel_type:3; - u32 tunnel_end_prot:2; - u32 tunnel_end_frag:1; - u32 inner_prot:4; - u32 payload_layer:3; -}; - -enum ice_rx_ptype_outer_ip { - ICE_RX_PTYPE_OUTER_L2 = 0, - ICE_RX_PTYPE_OUTER_IP = 1, -}; - -enum ice_rx_ptype_outer_ip_ver { - ICE_RX_PTYPE_OUTER_NONE = 0, - ICE_RX_PTYPE_OUTER_IPV4 = 1, - ICE_RX_PTYPE_OUTER_IPV6 = 2, -}; - -enum ice_rx_ptype_outer_fragmented { - ICE_RX_PTYPE_NOT_FRAG = 0, - ICE_RX_PTYPE_FRAG = 1, -}; - -enum ice_rx_ptype_tunnel_type { - ICE_RX_PTYPE_TUNNEL_NONE = 0, - ICE_RX_PTYPE_TUNNEL_IP_IP = 1, - ICE_RX_PTYPE_TUNNEL_IP_GRENAT = 2, - ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, - ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, -}; - -enum ice_rx_ptype_tunnel_end_prot { - ICE_RX_PTYPE_TUNNEL_END_NONE = 0, - ICE_RX_PTYPE_TUNNEL_END_IPV4 = 1, - ICE_RX_PTYPE_TUNNEL_END_IPV6 = 2, -}; - -enum ice_rx_ptype_inner_prot { - ICE_RX_PTYPE_INNER_PROT_NONE = 0, - ICE_RX_PTYPE_INNER_PROT_UDP = 1, - ICE_RX_PTYPE_INNER_PROT_TCP = 2, - ICE_RX_PTYPE_INNER_PROT_SCTP = 3, - ICE_RX_PTYPE_INNER_PROT_ICMP = 4, - ICE_RX_PTYPE_INNER_PROT_TIMESYNC = 5, -}; - -enum ice_rx_ptype_payload_layer { - ICE_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, - ICE_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, - ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, - ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, -}; - /* Rx Flex Descriptor * This descriptor is used instead of the legacy version descriptor when * ice_rlan_ctx.adv_desc is set @@ -651,266 +593,4 @@ struct ice_tlan_ctx { u8 int_q_state; /* width not needed - internal - DO NOT WRITE!!! */ }; -/* The ice_ptype_lkup table is used to convert from the 10-bit ptype in the - * hardware to a bit-field that can be used by SW to more easily determine the - * packet type. - * - * Macros are used to shorten the table lines and make this table human - * readable. - * - * We store the PTYPE in the top byte of the bit field - this is just so that - * we can check that the table doesn't have a row missing, as the index into - * the table should be the PTYPE. - * - * Typical work flow: - * - * IF NOT ice_ptype_lkup[ptype].known - * THEN - * Packet is unknown - * ELSE IF ice_ptype_lkup[ptype].outer_ip == ICE_RX_PTYPE_OUTER_IP - * Use the rest of the fields to look at the tunnels, inner protocols, etc - * ELSE - * Use the enum ice_rx_l2_ptype to decode the packet type - * ENDIF - */ -#define ICE_PTYPES \ - /* L2 Packet types */ \ - ICE_PTT_UNUSED_ENTRY(0), \ - ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), \ - ICE_PTT_UNUSED_ENTRY(2), \ - ICE_PTT_UNUSED_ENTRY(3), \ - ICE_PTT_UNUSED_ENTRY(4), \ - ICE_PTT_UNUSED_ENTRY(5), \ - ICE_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \ - ICE_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \ - ICE_PTT_UNUSED_ENTRY(8), \ - ICE_PTT_UNUSED_ENTRY(9), \ - ICE_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \ - ICE_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \ - ICE_PTT_UNUSED_ENTRY(12), \ - ICE_PTT_UNUSED_ENTRY(13), \ - ICE_PTT_UNUSED_ENTRY(14), \ - ICE_PTT_UNUSED_ENTRY(15), \ - ICE_PTT_UNUSED_ENTRY(16), \ - ICE_PTT_UNUSED_ENTRY(17), \ - ICE_PTT_UNUSED_ENTRY(18), \ - ICE_PTT_UNUSED_ENTRY(19), \ - ICE_PTT_UNUSED_ENTRY(20), \ - ICE_PTT_UNUSED_ENTRY(21), \ - \ - /* Non Tunneled IPv4 */ \ - ICE_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3), \ - ICE_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3), \ - ICE_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(25), \ - ICE_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4), \ - ICE_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4), \ - ICE_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> IPv4 */ \ - ICE_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(32), \ - ICE_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> IPv6 */ \ - ICE_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(39), \ - ICE_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> GRE/NAT */ \ - ICE_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), \ - \ - /* IPv4 --> GRE/NAT --> IPv4 */ \ - ICE_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(47), \ - ICE_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> GRE/NAT --> IPv6 */ \ - ICE_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(54), \ - ICE_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> GRE/NAT --> MAC */ \ - ICE_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), \ - \ - /* IPv4 --> GRE/NAT --> MAC --> IPv4 */ \ - ICE_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(62), \ - ICE_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> GRE/NAT -> MAC --> IPv6 */ \ - ICE_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(69), \ - ICE_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> GRE/NAT --> MAC/VLAN */ \ - ICE_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), \ - \ - /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */ \ - ICE_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(77), \ - ICE_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */ \ - ICE_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(84), \ - ICE_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), \ - \ - /* Non Tunneled IPv6 */ \ - ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), \ - ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), \ - ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(91), \ - ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), \ - ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), \ - ICE_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> IPv4 */ \ - ICE_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(98), \ - ICE_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> IPv6 */ \ - ICE_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(105), \ - ICE_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> GRE/NAT */ \ - ICE_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), \ - \ - /* IPv6 --> GRE/NAT -> IPv4 */ \ - ICE_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(113), \ - ICE_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> GRE/NAT -> IPv6 */ \ - ICE_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(120), \ - ICE_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> GRE/NAT -> MAC */ \ - ICE_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), \ - \ - /* IPv6 --> GRE/NAT -> MAC -> IPv4 */ \ - ICE_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(128), \ - ICE_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> GRE/NAT -> MAC -> IPv6 */ \ - ICE_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(135), \ - ICE_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> GRE/NAT -> MAC/VLAN */ \ - ICE_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), \ - \ - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */ \ - ICE_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(143), \ - ICE_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */ \ - ICE_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(150), \ - ICE_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - -#define ICE_NUM_DEFINED_PTYPES 154 - -/* macro to make the table lines short, use explicit indexing with [PTYPE] */ -#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - [PTYPE] = { \ - 1, \ - ICE_RX_PTYPE_OUTER_##OUTER_IP, \ - ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \ - ICE_RX_PTYPE_##OUTER_FRAG, \ - ICE_RX_PTYPE_TUNNEL_##T, \ - ICE_RX_PTYPE_TUNNEL_END_##TE, \ - ICE_RX_PTYPE_##TEF, \ - ICE_RX_PTYPE_INNER_PROT_##I, \ - ICE_RX_PTYPE_PAYLOAD_LAYER_##PL } - -#define ICE_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } - -/* shorter macros makes the table fit but are terse */ -#define ICE_RX_PTYPE_NOF ICE_RX_PTYPE_NOT_FRAG -#define ICE_RX_PTYPE_FRG ICE_RX_PTYPE_FRAG - -/* Lookup table mapping in the 10-bit HW PTYPE to the bit field for decoding */ -static const struct ice_rx_ptype_decoded ice_ptype_lkup[BIT(10)] = { - ICE_PTYPES - - /* unused entries */ - [ICE_NUM_DEFINED_PTYPES ... 1023] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } -}; - -static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype) -{ - return ice_ptype_lkup[ptype]; -} - - #endif /* _ICE_LAN_TX_RX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2b173638b877..f55b57daec83 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -37,6 +37,7 @@ static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION(DRV_SUMMARY); +MODULE_IMPORT_NS(LIBIE); MODULE_LICENSE("GPL v2"); MODULE_FIRMWARE(ICE_DDP_PKG_FILE); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index df072ce767b1..2719f0e20933 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019, Intel Corporation. */ #include +#include #include "ice_txrx_lib.h" #include "ice_eswitch.h" @@ -38,30 +39,6 @@ void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val) } } -/** - * ice_ptype_to_htype - get a hash type - * @ptype: the ptype value from the descriptor - * - * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by - * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of - * Rx desc. - */ -static enum pkt_hash_types ice_ptype_to_htype(u16 ptype) -{ - struct ice_rx_ptype_decoded decoded = ice_decode_rx_desc_ptype(ptype); - - if (!decoded.known) - return PKT_HASH_TYPE_NONE; - if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4) - return PKT_HASH_TYPE_L4; - if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3) - return PKT_HASH_TYPE_L3; - if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2) - return PKT_HASH_TYPE_L2; - - return PKT_HASH_TYPE_NONE; -} - /** * ice_get_rx_hash - get RX hash value from descriptor * @rx_desc: specific descriptor @@ -91,14 +68,16 @@ ice_rx_hash_to_skb(const struct ice_rx_ring *rx_ring, const union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb, u16 rx_ptype) { + struct libeth_rx_pt decoded; u32 hash; - if (!(rx_ring->netdev->features & NETIF_F_RXHASH)) + decoded = libie_rx_pt_parse(rx_ptype); + if (!libeth_rx_pt_has_hash(rx_ring->netdev, decoded)) return; hash = ice_get_rx_hash(rx_desc); if (likely(hash)) - skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype)); + libeth_rx_pt_set_hash(skb, hash, decoded); } /** @@ -114,34 +93,26 @@ static void ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, union ice_32b_rx_flex_desc *rx_desc, u16 ptype) { - struct ice_rx_ptype_decoded decoded; + struct libeth_rx_pt decoded; u16 rx_status0, rx_status1; bool ipv4, ipv6; - rx_status0 = le16_to_cpu(rx_desc->wb.status_error0); - rx_status1 = le16_to_cpu(rx_desc->wb.status_error1); - - decoded = ice_decode_rx_desc_ptype(ptype); - /* Start with CHECKSUM_NONE and by default csum_level = 0 */ skb->ip_summed = CHECKSUM_NONE; - skb_checksum_none_assert(skb); - /* check if Rx checksum is enabled */ - if (!(ring->netdev->features & NETIF_F_RXCSUM)) + decoded = libie_rx_pt_parse(ptype); + if (!libeth_rx_pt_has_checksum(ring->netdev, decoded)) return; + rx_status0 = le16_to_cpu(rx_desc->wb.status_error0); + rx_status1 = le16_to_cpu(rx_desc->wb.status_error1); + /* check if HW has decoded the packet and checksum */ if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) return; - if (!(decoded.known && decoded.outer_ip)) - return; - - ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); - ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); + ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4; + ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6; if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) { ring->vsi->back->hw_rx_eipe_error++; @@ -169,19 +140,10 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, * we need to bump the checksum level by 1 to reflect the fact that * we are indicating we validated the inner checksum. */ - if (decoded.tunnel_type >= ICE_RX_PTYPE_TUNNEL_IP_GRENAT) + if (decoded.tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT) skb->csum_level = 1; - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ - switch (decoded.inner_prot) { - case ICE_RX_PTYPE_INNER_PROT_TCP: - case ICE_RX_PTYPE_INNER_PROT_UDP: - case ICE_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - default: - break; - } + skb->ip_summed = CHECKSUM_UNNECESSARY; return; checksum_fail: @@ -536,42 +498,6 @@ static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns) return 0; } -/* Define a ptype index -> XDP hash type lookup table. - * It uses the same ptype definitions as ice_decode_rx_desc_ptype[], - * avoiding possible copy-paste errors. - */ -#undef ICE_PTT -#undef ICE_PTT_UNUSED_ENTRY - -#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - [PTYPE] = XDP_RSS_L3_##OUTER_IP_VER | XDP_RSS_L4_##I | XDP_RSS_TYPE_##PL - -#define ICE_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = 0 - -/* A few supplementary definitions for when XDP hash types do not coincide - * with what can be generated from ptype definitions - * by means of preprocessor concatenation. - */ -#define XDP_RSS_L3_NONE XDP_RSS_TYPE_NONE -#define XDP_RSS_L4_NONE XDP_RSS_TYPE_NONE -#define XDP_RSS_TYPE_PAY2 XDP_RSS_TYPE_L2 -#define XDP_RSS_TYPE_PAY3 XDP_RSS_TYPE_NONE -#define XDP_RSS_TYPE_PAY4 XDP_RSS_L4 - -static const enum xdp_rss_hash_type -ice_ptype_to_xdp_hash[ICE_NUM_DEFINED_PTYPES] = { - ICE_PTYPES -}; - -#undef XDP_RSS_L3_NONE -#undef XDP_RSS_L4_NONE -#undef XDP_RSS_TYPE_PAY2 -#undef XDP_RSS_TYPE_PAY3 -#undef XDP_RSS_TYPE_PAY4 - -#undef ICE_PTT -#undef ICE_PTT_UNUSED_ENTRY - /** * ice_xdp_rx_hash_type - Get XDP-specific hash type from the RX descriptor * @eop_desc: End of Packet descriptor @@ -579,12 +505,7 @@ ice_ptype_to_xdp_hash[ICE_NUM_DEFINED_PTYPES] = { static enum xdp_rss_hash_type ice_xdp_rx_hash_type(const union ice_32b_rx_flex_desc *eop_desc) { - u16 ptype = ice_get_ptype(eop_desc); - - if (unlikely(ptype >= ICE_NUM_DEFINED_PTYPES)) - return 0; - - return ice_ptype_to_xdp_hash[ptype]; + return libie_rx_pt_parse(ice_get_ptype(eop_desc)).hash_type; } /** diff --git a/drivers/net/ethernet/intel/libeth/Kconfig b/drivers/net/ethernet/intel/libeth/Kconfig new file mode 100644 index 000000000000..af970a63c227 --- /dev/null +++ b/drivers/net/ethernet/intel/libeth/Kconfig @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2024 Intel Corporation + +config LIBETH + tristate + help + libeth is a common library containing routines shared between several + drivers, but not yet promoted to the generic kernel API. diff --git a/drivers/net/ethernet/intel/libeth/Makefile b/drivers/net/ethernet/intel/libeth/Makefile new file mode 100644 index 000000000000..cb99203d1dd2 --- /dev/null +++ b/drivers/net/ethernet/intel/libeth/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2024 Intel Corporation + +obj-$(CONFIG_LIBETH) += libeth.o + +libeth-objs += rx.o diff --git a/drivers/net/ethernet/intel/libeth/rx.c b/drivers/net/ethernet/intel/libeth/rx.c new file mode 100644 index 000000000000..879c4dfd6a4e --- /dev/null +++ b/drivers/net/ethernet/intel/libeth/rx.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Intel Corporation */ + +#include + +/* Converting abstract packet type numbers into a software structure with + * the packet parameters to do O(1) lookup on Rx. + */ + +static const u16 libeth_rx_pt_xdp_oip[] = { + [LIBETH_RX_PT_OUTER_L2] = XDP_RSS_TYPE_NONE, + [LIBETH_RX_PT_OUTER_IPV4] = XDP_RSS_L3_IPV4, + [LIBETH_RX_PT_OUTER_IPV6] = XDP_RSS_L3_IPV6, +}; + +static const u16 libeth_rx_pt_xdp_iprot[] = { + [LIBETH_RX_PT_INNER_NONE] = XDP_RSS_TYPE_NONE, + [LIBETH_RX_PT_INNER_UDP] = XDP_RSS_L4_UDP, + [LIBETH_RX_PT_INNER_TCP] = XDP_RSS_L4_TCP, + [LIBETH_RX_PT_INNER_SCTP] = XDP_RSS_L4_SCTP, + [LIBETH_RX_PT_INNER_ICMP] = XDP_RSS_L4_ICMP, + [LIBETH_RX_PT_INNER_TIMESYNC] = XDP_RSS_TYPE_NONE, +}; + +static const u16 libeth_rx_pt_xdp_pl[] = { + [LIBETH_RX_PT_PAYLOAD_NONE] = XDP_RSS_TYPE_NONE, + [LIBETH_RX_PT_PAYLOAD_L2] = XDP_RSS_TYPE_NONE, + [LIBETH_RX_PT_PAYLOAD_L3] = XDP_RSS_TYPE_NONE, + [LIBETH_RX_PT_PAYLOAD_L4] = XDP_RSS_L4, +}; + +/** + * libeth_rx_pt_gen_hash_type - generate an XDP RSS hash type for a PT + * @pt: PT structure to evaluate + * + * Generates ```hash_type``` field with XDP RSS type values from the parsed + * packet parameters if they're obtained dynamically at runtime. + */ +void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt) +{ + pt->hash_type = 0; + pt->hash_type |= libeth_rx_pt_xdp_oip[pt->outer_ip]; + pt->hash_type |= libeth_rx_pt_xdp_iprot[pt->inner_prot]; + pt->hash_type |= libeth_rx_pt_xdp_pl[pt->payload_layer]; +} +EXPORT_SYMBOL_NS_GPL(libeth_rx_pt_gen_hash_type, LIBETH); + +/* Module */ + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Common Ethernet library"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/intel/libie/Kconfig b/drivers/net/ethernet/intel/libie/Kconfig new file mode 100644 index 000000000000..33aff6bc8f81 --- /dev/null +++ b/drivers/net/ethernet/intel/libie/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2024 Intel Corporation + +config LIBIE + tristate + select LIBETH + help + libie (Intel Ethernet library) is a common library built on top of + libeth and containing vendor-specific routines shared between several + Intel Ethernet drivers. diff --git a/drivers/net/ethernet/intel/libie/Makefile b/drivers/net/ethernet/intel/libie/Makefile new file mode 100644 index 000000000000..bf42c5aeeedd --- /dev/null +++ b/drivers/net/ethernet/intel/libie/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2024 Intel Corporation + +obj-$(CONFIG_LIBIE) += libie.o + +libie-objs += rx.o diff --git a/drivers/net/ethernet/intel/libie/rx.c b/drivers/net/ethernet/intel/libie/rx.c new file mode 100644 index 000000000000..38201ee1e891 --- /dev/null +++ b/drivers/net/ethernet/intel/libie/rx.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Intel Corporation */ + +#include + +/* O(1) converting i40e/ice/iavf's 8/10-bit hardware packet type to a parsed + * bitfield struct. + */ + +/* A few supplementary definitions for when XDP hash types do not coincide + * with what can be generated from ptype definitions by means of preprocessor + * concatenation. + */ +#define XDP_RSS_L3_L2 XDP_RSS_TYPE_NONE +#define XDP_RSS_L4_NONE XDP_RSS_TYPE_NONE +#define XDP_RSS_L4_TIMESYNC XDP_RSS_TYPE_NONE +#define XDP_RSS_TYPE_L3 XDP_RSS_TYPE_NONE +#define XDP_RSS_TYPE_L4 XDP_RSS_L4 + +#define LIBIE_RX_PT(oip, ofrag, tun, tp, tefr, iprot, pl) { \ + .outer_ip = LIBETH_RX_PT_OUTER_##oip, \ + .outer_frag = LIBETH_RX_PT_##ofrag, \ + .tunnel_type = LIBETH_RX_PT_TUNNEL_IP_##tun, \ + .tunnel_end_prot = LIBETH_RX_PT_TUNNEL_END_##tp, \ + .tunnel_end_frag = LIBETH_RX_PT_##tefr, \ + .inner_prot = LIBETH_RX_PT_INNER_##iprot, \ + .payload_layer = LIBETH_RX_PT_PAYLOAD_##pl, \ + .hash_type = XDP_RSS_L3_##oip | \ + XDP_RSS_L4_##iprot | \ + XDP_RSS_TYPE_##pl, \ + } + +#define LIBIE_RX_PT_UNUSED { } + +#define __LIBIE_RX_PT_L2(iprot, pl) \ + LIBIE_RX_PT(L2, NOT_FRAG, NONE, NONE, NOT_FRAG, iprot, pl) +#define LIBIE_RX_PT_L2 __LIBIE_RX_PT_L2(NONE, L2) +#define LIBIE_RX_PT_TS __LIBIE_RX_PT_L2(TIMESYNC, L2) +#define LIBIE_RX_PT_L3 __LIBIE_RX_PT_L2(NONE, L3) + +#define LIBIE_RX_PT_IP_FRAG(oip) \ + LIBIE_RX_PT(IPV##oip, FRAG, NONE, NONE, NOT_FRAG, NONE, L3) +#define LIBIE_RX_PT_IP_L3(oip, tun, teprot, tefr) \ + LIBIE_RX_PT(IPV##oip, NOT_FRAG, tun, teprot, tefr, NONE, L3) +#define LIBIE_RX_PT_IP_L4(oip, tun, teprot, iprot) \ + LIBIE_RX_PT(IPV##oip, NOT_FRAG, tun, teprot, NOT_FRAG, iprot, L4) + +#define LIBIE_RX_PT_IP_NOF(oip, tun, ver) \ + LIBIE_RX_PT_IP_L3(oip, tun, ver, NOT_FRAG), \ + LIBIE_RX_PT_IP_L4(oip, tun, ver, UDP), \ + LIBIE_RX_PT_UNUSED, \ + LIBIE_RX_PT_IP_L4(oip, tun, ver, TCP), \ + LIBIE_RX_PT_IP_L4(oip, tun, ver, SCTP), \ + LIBIE_RX_PT_IP_L4(oip, tun, ver, ICMP) + +/* IPv oip --> tun --> IPv ver */ +#define LIBIE_RX_PT_IP_TUN_VER(oip, tun, ver) \ + LIBIE_RX_PT_IP_L3(oip, tun, ver, FRAG), \ + LIBIE_RX_PT_IP_NOF(oip, tun, ver) + +/* Non Tunneled IPv oip */ +#define LIBIE_RX_PT_IP_RAW(oip) \ + LIBIE_RX_PT_IP_FRAG(oip), \ + LIBIE_RX_PT_IP_NOF(oip, NONE, NONE) + +/* IPv oip --> tun --> { IPv4, IPv6 } */ +#define LIBIE_RX_PT_IP_TUN(oip, tun) \ + LIBIE_RX_PT_IP_TUN_VER(oip, tun, IPV4), \ + LIBIE_RX_PT_IP_TUN_VER(oip, tun, IPV6) + +/* IPv oip --> GRE/NAT tun --> { x, IPv4, IPv6 } */ +#define LIBIE_RX_PT_IP_GRE(oip, tun) \ + LIBIE_RX_PT_IP_L3(oip, tun, NONE, NOT_FRAG), \ + LIBIE_RX_PT_IP_TUN(oip, tun) + +/* Non Tunneled IPv oip + * IPv oip --> { IPv4, IPv6 } + * IPv oip --> GRE/NAT --> { x, IPv4, IPv6 } + * IPv oip --> GRE/NAT --> MAC --> { x, IPv4, IPv6 } + * IPv oip --> GRE/NAT --> MAC/VLAN --> { x, IPv4, IPv6 } + */ +#define LIBIE_RX_PT_IP(oip) \ + LIBIE_RX_PT_IP_RAW(oip), \ + LIBIE_RX_PT_IP_TUN(oip, IP), \ + LIBIE_RX_PT_IP_GRE(oip, GRENAT), \ + LIBIE_RX_PT_IP_GRE(oip, GRENAT_MAC), \ + LIBIE_RX_PT_IP_GRE(oip, GRENAT_MAC_VLAN) + +/* Lookup table mapping for O(1) parsing */ +const struct libeth_rx_pt libie_rx_pt_lut[LIBIE_RX_PT_NUM] = { + /* L2 packet types */ + LIBIE_RX_PT_UNUSED, + LIBIE_RX_PT_L2, + LIBIE_RX_PT_TS, + LIBIE_RX_PT_L2, + LIBIE_RX_PT_UNUSED, + LIBIE_RX_PT_UNUSED, + LIBIE_RX_PT_L2, + LIBIE_RX_PT_L2, + LIBIE_RX_PT_UNUSED, + LIBIE_RX_PT_UNUSED, + LIBIE_RX_PT_L2, + LIBIE_RX_PT_UNUSED, + + LIBIE_RX_PT_L3, + LIBIE_RX_PT_L3, + LIBIE_RX_PT_L3, + LIBIE_RX_PT_L3, + LIBIE_RX_PT_L3, + LIBIE_RX_PT_L3, + LIBIE_RX_PT_L3, + LIBIE_RX_PT_L3, + LIBIE_RX_PT_L3, + LIBIE_RX_PT_L3, + + LIBIE_RX_PT_IP(4), + LIBIE_RX_PT_IP(6), +}; +EXPORT_SYMBOL_NS_GPL(libie_rx_pt_lut, LIBIE); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) Ethernet common library"); +MODULE_IMPORT_NS(LIBETH); +MODULE_LICENSE("GPL"); diff --git a/include/linux/net/intel/libie/rx.h b/include/linux/net/intel/libie/rx.h new file mode 100644 index 000000000000..37c5c8f26fb9 --- /dev/null +++ b/include/linux/net/intel/libie/rx.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBIE_RX_H +#define __LIBIE_RX_H + +#include + +/* O(1) converting i40e/ice/iavf's 8/10-bit hardware packet type to a parsed + * bitfield struct. + */ + +#define LIBIE_RX_PT_NUM 154 + +extern const struct libeth_rx_pt libie_rx_pt_lut[LIBIE_RX_PT_NUM]; + +/** + * libie_rx_pt_parse - convert HW packet type to software bitfield structure + * @pt: 10-bit hardware packet type value from the descriptor + * + * ```libie_rx_pt_lut``` must be accessed only using this wrapper. + * + * Return: parsed bitfield struct corresponding to the provided ptype. + */ +static inline struct libeth_rx_pt libie_rx_pt_parse(u32 pt) +{ + if (unlikely(pt >= LIBIE_RX_PT_NUM)) + pt = 0; + + return libie_rx_pt_lut[pt]; +} + +#endif /* __LIBIE_RX_H */ diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h new file mode 100644 index 000000000000..0807e19f44b3 --- /dev/null +++ b/include/net/libeth/rx.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBETH_RX_H +#define __LIBETH_RX_H + +#include + +/* Converting abstract packet type numbers into a software structure with + * the packet parameters to do O(1) lookup on Rx. + */ + +enum { + LIBETH_RX_PT_OUTER_L2 = 0U, + LIBETH_RX_PT_OUTER_IPV4, + LIBETH_RX_PT_OUTER_IPV6, +}; + +enum { + LIBETH_RX_PT_NOT_FRAG = 0U, + LIBETH_RX_PT_FRAG, +}; + +enum { + LIBETH_RX_PT_TUNNEL_IP_NONE = 0U, + LIBETH_RX_PT_TUNNEL_IP_IP, + LIBETH_RX_PT_TUNNEL_IP_GRENAT, + LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC, + LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN, +}; + +enum { + LIBETH_RX_PT_TUNNEL_END_NONE = 0U, + LIBETH_RX_PT_TUNNEL_END_IPV4, + LIBETH_RX_PT_TUNNEL_END_IPV6, +}; + +enum { + LIBETH_RX_PT_INNER_NONE = 0U, + LIBETH_RX_PT_INNER_UDP, + LIBETH_RX_PT_INNER_TCP, + LIBETH_RX_PT_INNER_SCTP, + LIBETH_RX_PT_INNER_ICMP, + LIBETH_RX_PT_INNER_TIMESYNC, +}; + +#define LIBETH_RX_PT_PAYLOAD_NONE PKT_HASH_TYPE_NONE +#define LIBETH_RX_PT_PAYLOAD_L2 PKT_HASH_TYPE_L2 +#define LIBETH_RX_PT_PAYLOAD_L3 PKT_HASH_TYPE_L3 +#define LIBETH_RX_PT_PAYLOAD_L4 PKT_HASH_TYPE_L4 + +struct libeth_rx_pt { + u32 outer_ip:2; + u32 outer_frag:1; + u32 tunnel_type:3; + u32 tunnel_end_prot:2; + u32 tunnel_end_frag:1; + u32 inner_prot:3; + enum pkt_hash_types payload_layer:2; + + u32 pad:2; + enum xdp_rss_hash_type hash_type:16; +}; + +void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt); + +/** + * libeth_rx_pt_get_ip_ver - get IP version from a packet type structure + * @pt: packet type params + * + * Wrapper to compile out the IPv6 code from the drivers when not supported + * by the kernel. + * + * Return: @pt.outer_ip or stub for IPv6 when not compiled-in. + */ +static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt) +{ +#if !IS_ENABLED(CONFIG_IPV6) + switch (pt.outer_ip) { + case LIBETH_RX_PT_OUTER_IPV4: + return LIBETH_RX_PT_OUTER_IPV4; + default: + return LIBETH_RX_PT_OUTER_L2; + } +#else + return pt.outer_ip; +#endif +} + +/* libeth_has_*() can be used to quickly check whether the HW metadata is + * available to avoid further expensive processing such as descriptor reads. + * They already check for the corresponding netdev feature to be enabled, + * thus can be used as drop-in replacements. + */ + +static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev, + struct libeth_rx_pt pt) +{ + /* Non-zero _INNER* is only possible when _OUTER_IPV* is set, + * it is enough to check only for the L4 type. + */ + return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE && + (dev->features & NETIF_F_RXCSUM)); +} + +static inline bool libeth_rx_pt_has_hash(const struct net_device *dev, + struct libeth_rx_pt pt) +{ + return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE && + (dev->features & NETIF_F_RXHASH)); +} + +/** + * libeth_rx_pt_set_hash - fill in skb hash value basing on the PT + * @skb: skb to fill the hash in + * @hash: 32-bit hash value from the descriptor + * @pt: packet type + */ +static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash, + struct libeth_rx_pt pt) +{ + skb_set_hash(skb, hash, pt.payload_layer); +} + +#endif /* __LIBETH_RX_H */ -- cgit v1.2.3 From 53844673d555290010dd3d6de1365af72e9839c8 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 18 Apr 2024 13:36:08 +0200 Subject: iavf: kill "legacy-rx" for good Ever since build_skb() became stable, the old way with allocating an skb for storing the headers separately, which will be then copied manually, was slower, less flexible, and thus obsolete. * It had higher pressure on MM since it actually allocates new pages, which then get split and refcount-biased (NAPI page cache); * It implies memcpy() of packet headers (40+ bytes per each frame); * the actual header length was calculated via eth_get_headlen(), which invokes Flow Dissector and thus wastes a bunch of CPU cycles; * XDP makes it even more weird since it requires headroom for long and also tailroom for some time (since mbuf landed). Take a look at the ice driver, which is built around work-arounds to make XDP work with it. Even on some quite low-end hardware (not a common case for 100G NICs) it was performing worse. The only advantage "legacy-rx" had is that it didn't require any reserved headroom and tailroom. But iavf didn't use this, as it always splits pages into two halves of 2k, while that save would only be useful when striding. And again, XDP effectively removes that sole pro. There's a train of features to land in IAVF soon: Page Pool, XDP, XSk, multi-buffer etc. Each new would require adding more and more Danse Macabre for absolutely no reason, besides making hotpath less and less effective. Remove the "feature" with all the related code. This includes at least one very hot branch (typically hit on each new frame), which was either always-true or always-false at least for a complete NAPI bulk of 64 frames, the whole private flags cruft, and so on. Some stats: Function: add/remove: 0/4 grow/shrink: 0/7 up/down: 0/-721 (-721) RO Data: add/remove: 0/1 grow/shrink: 0/0 up/down: 0/-40 (-40) Reviewed-by: Alexander Duyck Reviewed-by: Przemek Kitszel Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 2 +- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 140 ------------------------ drivers/net/ethernet/intel/iavf/iavf_main.c | 10 +- drivers/net/ethernet/intel/iavf/iavf_txrx.c | 82 +------------- drivers/net/ethernet/intel/iavf/iavf_txrx.h | 27 +---- drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 3 +- 6 files changed, 8 insertions(+), 256 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index db8188c7ac4b..23a6557fc3db 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -287,7 +287,7 @@ struct iavf_adapter { #define IAVF_FLAG_RESET_PENDING BIT(4) #define IAVF_FLAG_RESET_NEEDED BIT(5) #define IAVF_FLAG_WB_ON_ITR_CAPABLE BIT(6) -#define IAVF_FLAG_LEGACY_RX BIT(15) +/* BIT(15) is free, was IAVF_FLAG_LEGACY_RX */ #define IAVF_FLAG_REINIT_ITR_NEEDED BIT(16) #define IAVF_FLAG_QUEUES_DISABLED BIT(17) #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 378c3e9ddf9d..52273f7eab2c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -240,29 +240,6 @@ static const struct iavf_stats iavf_gstrings_stats[] = { #define IAVF_QUEUE_STATS_LEN ARRAY_SIZE(iavf_gstrings_queue_stats) -/* For now we have one and only one private flag and it is only defined - * when we have support for the SKIP_CPU_SYNC DMA attribute. Instead - * of leaving all this code sitting around empty we will strip it unless - * our one private flag is actually available. - */ -struct iavf_priv_flags { - char flag_string[ETH_GSTRING_LEN]; - u32 flag; - bool read_only; -}; - -#define IAVF_PRIV_FLAG(_name, _flag, _read_only) { \ - .flag_string = _name, \ - .flag = _flag, \ - .read_only = _read_only, \ -} - -static const struct iavf_priv_flags iavf_gstrings_priv_flags[] = { - IAVF_PRIV_FLAG("legacy-rx", IAVF_FLAG_LEGACY_RX, 0), -}; - -#define IAVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(iavf_gstrings_priv_flags) - /** * iavf_get_link_ksettings - Get Link Speed and Duplex settings * @netdev: network interface device structure @@ -342,8 +319,6 @@ static int iavf_get_sset_count(struct net_device *netdev, int sset) return IAVF_STATS_LEN + (IAVF_QUEUE_STATS_LEN * 2 * netdev->real_num_tx_queues); - else if (sset == ETH_SS_PRIV_FLAGS) - return IAVF_PRIV_FLAGS_STR_LEN; else return -EINVAL; } @@ -385,21 +360,6 @@ static void iavf_get_ethtool_stats(struct net_device *netdev, rcu_read_unlock(); } -/** - * iavf_get_priv_flag_strings - Get private flag strings - * @netdev: network interface device structure - * @data: buffer for string data - * - * Builds the private flags string table - **/ -static void iavf_get_priv_flag_strings(struct net_device *netdev, u8 *data) -{ - unsigned int i; - - for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) - ethtool_puts(&data, iavf_gstrings_priv_flags[i].flag_string); -} - /** * iavf_get_stat_strings - Get stat strings * @netdev: network interface device structure @@ -438,108 +398,11 @@ static void iavf_get_strings(struct net_device *netdev, u32 sset, u8 *data) case ETH_SS_STATS: iavf_get_stat_strings(netdev, data); break; - case ETH_SS_PRIV_FLAGS: - iavf_get_priv_flag_strings(netdev, data); - break; default: break; } } -/** - * iavf_get_priv_flags - report device private flags - * @netdev: network interface device structure - * - * The get string set count and the string set should be matched for each - * flag returned. Add new strings for each flag to the iavf_gstrings_priv_flags - * array. - * - * Returns a u32 bitmap of flags. - **/ -static u32 iavf_get_priv_flags(struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - u32 i, ret_flags = 0; - - for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { - const struct iavf_priv_flags *priv_flags; - - priv_flags = &iavf_gstrings_priv_flags[i]; - - if (priv_flags->flag & adapter->flags) - ret_flags |= BIT(i); - } - - return ret_flags; -} - -/** - * iavf_set_priv_flags - set private flags - * @netdev: network interface device structure - * @flags: bit flags to be set - **/ -static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - u32 orig_flags, new_flags, changed_flags; - int ret = 0; - u32 i; - - orig_flags = READ_ONCE(adapter->flags); - new_flags = orig_flags; - - for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { - const struct iavf_priv_flags *priv_flags; - - priv_flags = &iavf_gstrings_priv_flags[i]; - - if (flags & BIT(i)) - new_flags |= priv_flags->flag; - else - new_flags &= ~(priv_flags->flag); - - if (priv_flags->read_only && - ((orig_flags ^ new_flags) & ~BIT(i))) - return -EOPNOTSUPP; - } - - /* Before we finalize any flag changes, any checks which we need to - * perform to determine if the new flags will be supported should go - * here... - */ - - /* Compare and exchange the new flags into place. If we failed, that - * is if cmpxchg returns anything but the old value, this means - * something else must have modified the flags variable since we - * copied it. We'll just punt with an error and log something in the - * message buffer. - */ - if (cmpxchg(&adapter->flags, orig_flags, new_flags) != orig_flags) { - dev_warn(&adapter->pdev->dev, - "Unable to update adapter->flags as it was modified by another thread...\n"); - return -EAGAIN; - } - - changed_flags = orig_flags ^ new_flags; - - /* Process any additional changes needed as a result of flag changes. - * The changed_flags value reflects the list of bits that were changed - * in the code above. - */ - - /* issue a reset to force legacy-rx change to take effect */ - if (changed_flags & IAVF_FLAG_LEGACY_RX) { - if (netif_running(netdev)) { - iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); - ret = iavf_wait_for_reset(adapter); - if (ret) - netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset"); - } - } - - return ret; -} - /** * iavf_get_msglevel - Get debug message level * @netdev: network interface device structure @@ -585,7 +448,6 @@ static void iavf_get_drvinfo(struct net_device *netdev, strscpy(drvinfo->driver, iavf_driver_name, 32); strscpy(drvinfo->fw_version, "N/A", 4); strscpy(drvinfo->bus_info, pci_name(adapter->pdev), 32); - drvinfo->n_priv_flags = IAVF_PRIV_FLAGS_STR_LEN; } /** @@ -1995,8 +1857,6 @@ static const struct ethtool_ops iavf_ethtool_ops = { .get_strings = iavf_get_strings, .get_ethtool_stats = iavf_get_ethtool_stats, .get_sset_count = iavf_get_sset_count, - .get_priv_flags = iavf_get_priv_flags, - .set_priv_flags = iavf_set_priv_flags, .get_msglevel = iavf_get_msglevel, .set_msglevel = iavf_set_msglevel, .get_coalesce = iavf_get_coalesce, diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index d6cbe5022815..5eb7379956e4 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -719,9 +719,7 @@ static void iavf_configure_rx(struct iavf_adapter *adapter) struct iavf_hw *hw = &adapter->hw; int i; - /* Legacy Rx will always default to a 2048 buffer size. */ -#if (PAGE_SIZE < 8192) - if (!(adapter->flags & IAVF_FLAG_LEGACY_RX)) { + if (PAGE_SIZE < 8192) { struct net_device *netdev = adapter->netdev; /* For jumbo frames on systems with 4K pages we have to use @@ -738,16 +736,10 @@ static void iavf_configure_rx(struct iavf_adapter *adapter) (netdev->mtu <= ETH_DATA_LEN)) rx_buf_len = IAVF_RXBUFFER_1536 - NET_IP_ALIGN; } -#endif for (i = 0; i < adapter->num_active_queues; i++) { adapter->rx_rings[i].tail = hw->hw_addr + IAVF_QRX_TAIL1(i); adapter->rx_rings[i].rx_buf_len = rx_buf_len; - - if (adapter->flags & IAVF_FLAG_LEGACY_RX) - clear_ring_build_skb_enabled(&adapter->rx_rings[i]); - else - set_ring_build_skb_enabled(&adapter->rx_rings[i]); } } diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 33ec01d0ed67..a908b394409d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -824,17 +824,6 @@ static void iavf_release_rx_desc(struct iavf_ring *rx_ring, u32 val) writel(val, rx_ring->tail); } -/** - * iavf_rx_offset - Return expected offset into page to access data - * @rx_ring: Ring we are requesting offset of - * - * Returns the offset value for ring into the data buffer. - */ -static unsigned int iavf_rx_offset(struct iavf_ring *rx_ring) -{ - return ring_uses_build_skb(rx_ring) ? IAVF_SKB_PAD : 0; -} - /** * iavf_alloc_mapped_page - recycle or make a new page * @rx_ring: ring to use @@ -879,7 +868,7 @@ static bool iavf_alloc_mapped_page(struct iavf_ring *rx_ring, bi->dma = dma; bi->page = page; - bi->page_offset = iavf_rx_offset(rx_ring); + bi->page_offset = IAVF_SKB_PAD; /* initialize pagecnt_bias to 1 representing we fully own page */ bi->pagecnt_bias = 1; @@ -1218,7 +1207,7 @@ static void iavf_add_rx_frag(struct iavf_ring *rx_ring, #if (PAGE_SIZE < 8192) unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = SKB_DATA_ALIGN(size + iavf_rx_offset(rx_ring)); + unsigned int truesize = SKB_DATA_ALIGN(size + IAVF_SKB_PAD); #endif if (!size) @@ -1266,69 +1255,6 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring, return rx_buffer; } -/** - * iavf_construct_skb - Allocate skb and populate it - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: rx buffer to pull data from - * @size: size of buffer to add to skb - * - * This function allocates an skb. It then populates it with the page - * data from the current receive descriptor, taking care to set up the - * skb correctly. - */ -static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *rx_buffer, - unsigned int size) -{ - void *va; -#if (PAGE_SIZE < 8192) - unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(size); -#endif - unsigned int headlen; - struct sk_buff *skb; - - if (!rx_buffer) - return NULL; - /* prefetch first cache line of first page */ - va = page_address(rx_buffer->page) + rx_buffer->page_offset; - net_prefetch(va); - - /* allocate a skb to store the frags */ - skb = napi_alloc_skb(&rx_ring->q_vector->napi, IAVF_RX_HDR_SIZE); - if (unlikely(!skb)) - return NULL; - - /* Determine available headroom for copy */ - headlen = size; - if (headlen > IAVF_RX_HDR_SIZE) - headlen = eth_get_headlen(skb->dev, va, IAVF_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); - - /* update all of the pointers */ - size -= headlen; - if (size) { - skb_add_rx_frag(skb, 0, rx_buffer->page, - rx_buffer->page_offset + headlen, - size, truesize); - - /* buffer is used by skb, update page_offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - } else { - /* buffer is unused, reset bias back to rx_buffer */ - rx_buffer->pagecnt_bias++; - } - - return skb; -} - /** * iavf_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on @@ -1500,10 +1426,8 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) /* retrieve a buffer from the ring */ if (skb) iavf_add_rx_frag(rx_ring, rx_buffer, skb, size); - else if (ring_uses_build_skb(rx_ring)) - skb = iavf_build_skb(rx_ring, rx_buffer, size); else - skb = iavf_construct_skb(rx_ring, rx_buffer, size); + skb = iavf_build_skb(rx_ring, rx_buffer, size); /* exit if we failed to retrieve a buffer */ if (!skb) { diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index 10ba36602c0c..68543efdd29b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -81,20 +81,11 @@ enum iavf_dyn_idx_t { BIT_ULL(IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP)) /* Supported Rx Buffer Sizes (a multiple of 128) */ -#define IAVF_RXBUFFER_256 256 #define IAVF_RXBUFFER_1536 1536 /* 128B aligned standard Ethernet frame */ #define IAVF_RXBUFFER_2048 2048 #define IAVF_RXBUFFER_3072 3072 /* Used for large frames w/ padding */ #define IAVF_MAX_RXBUFFER 9728 /* largest size for single descriptor */ -/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we - * reserve 2 more, and skb_shared_info adds an additional 384 bytes more, - * this adds up to 512 bytes of extra data meaning the smallest allocation - * we could have is 1K. - * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab) - * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab) - */ -#define IAVF_RX_HDR_SIZE IAVF_RXBUFFER_256 #define IAVF_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) #define iavf_rx_desc iavf_32byte_rx_desc @@ -361,7 +352,8 @@ struct iavf_ring { u16 flags; #define IAVF_TXR_FLAGS_WB_ON_ITR BIT(0) -#define IAVF_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1) +/* BIT(1) is free, was IAVF_RXR_FLAGS_BUILD_SKB_ENABLED */ +/* BIT(2) is free */ #define IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 BIT(3) #define IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2 BIT(4) #define IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2 BIT(5) @@ -392,21 +384,6 @@ struct iavf_ring { */ } ____cacheline_internodealigned_in_smp; -static inline bool ring_uses_build_skb(struct iavf_ring *ring) -{ - return !!(ring->flags & IAVF_RXR_FLAGS_BUILD_SKB_ENABLED); -} - -static inline void set_ring_build_skb_enabled(struct iavf_ring *ring) -{ - ring->flags |= IAVF_RXR_FLAGS_BUILD_SKB_ENABLED; -} - -static inline void clear_ring_build_skb_enabled(struct iavf_ring *ring) -{ - ring->flags &= ~IAVF_RXR_FLAGS_BUILD_SKB_ENABLED; -} - #define IAVF_ITR_ADAPTIVE_MIN_INC 0x0002 #define IAVF_ITR_ADAPTIVE_MIN_USECS 0x0002 #define IAVF_ITR_ADAPTIVE_MAX_USECS 0x007e diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 22f2df7c460b..a31df5af0473 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -289,8 +289,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter) return; /* Limit maximum frame size when jumbo frames is not enabled */ - if (!(adapter->flags & IAVF_FLAG_LEGACY_RX) && - (adapter->netdev->mtu <= ETH_DATA_LEN)) + if (adapter->netdev->mtu <= ETH_DATA_LEN) max_frame = IAVF_RXBUFFER_1536 - NET_IP_ALIGN; vqci->vsi_id = adapter->vsi_res->vsi_id; -- cgit v1.2.3 From 920d86f3c5529d658bb9576ae9120a2330d9b220 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 18 Apr 2024 13:36:09 +0200 Subject: iavf: drop page splitting and recycling As an intermediate step, remove all page splitting/recycling code. Just always allocate a new page and don't touch its refcount, so that it gets freed by the core stack later. Same for the "in-place" recycling, i.e. when an unused buffer gets assigned to a first needs-refilling descriptor. In some cases, this was leading to moving up to 63 &iavf_rx_buf structures around the ring on a per-field basis -- not something wanted on hotpath. The change allows to greatly simplify certain parts of the code: Function: add/remove: 0/2 grow/shrink: 0/7 up/down: 0/-744 (-744) Although the array of &iavf_rx_buf is barely used now and could be replaced with just page pointer array, don't touch it now to not complicate replacing it with libie Rx buffer struct later on. No surprise perf loses up to 30% here, but that regression will go away once PP lands. Note that iavf_rx_pg_*() definitions are left to reduce diffstat. They will be removed with the conversion to Page Pool. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 24 +--- drivers/net/ethernet/intel/iavf/iavf_txrx.c | 152 ++---------------------- drivers/net/ethernet/intel/iavf/iavf_txrx.h | 65 ---------- drivers/net/ethernet/intel/iavf/iavf_type.h | 2 - drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 8 +- 5 files changed, 10 insertions(+), 241 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 5eb7379956e4..ffb71a62b105 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -715,32 +715,10 @@ static void iavf_configure_tx(struct iavf_adapter *adapter) **/ static void iavf_configure_rx(struct iavf_adapter *adapter) { - unsigned int rx_buf_len = IAVF_RXBUFFER_2048; struct iavf_hw *hw = &adapter->hw; - int i; - - if (PAGE_SIZE < 8192) { - struct net_device *netdev = adapter->netdev; - /* For jumbo frames on systems with 4K pages we have to use - * an order 1 page, so we might as well increase the size - * of our Rx buffer to make better use of the available space - */ - rx_buf_len = IAVF_RXBUFFER_3072; - - /* We use a 1536 buffer size for configurations with - * standard Ethernet mtu. On x86 this gives us enough room - * for shared info and 192 bytes of padding. - */ - if (!IAVF_2K_TOO_SMALL_WITH_PADDING && - (netdev->mtu <= ETH_DATA_LEN)) - rx_buf_len = IAVF_RXBUFFER_1536 - NET_IP_ALIGN; - } - - for (i = 0; i < adapter->num_active_queues; i++) { + for (u32 i = 0; i < adapter->num_active_queues; i++) adapter->rx_rings[i].tail = hw->hw_addr + IAVF_QRX_TAIL1(i); - adapter->rx_rings[i].rx_buf_len = rx_buf_len; - } } /** diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index a908b394409d..c7602c173420 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -715,7 +715,7 @@ static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) dma_sync_single_range_for_cpu(rx_ring->dev, rx_bi->dma, rx_bi->page_offset, - rx_ring->rx_buf_len, + IAVF_RXBUFFER_3072, DMA_FROM_DEVICE); /* free resources associated with mapping */ @@ -724,7 +724,7 @@ static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) DMA_FROM_DEVICE, IAVF_RX_DMA_ATTR); - __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias); + __free_page(rx_bi->page); rx_bi->page = NULL; rx_bi->page_offset = 0; @@ -736,7 +736,6 @@ static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); - rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } @@ -792,7 +791,6 @@ int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring) goto err; } - rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; @@ -812,9 +810,6 @@ static void iavf_release_rx_desc(struct iavf_ring *rx_ring, u32 val) { rx_ring->next_to_use = val; - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = val; - /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -838,12 +833,6 @@ static bool iavf_alloc_mapped_page(struct iavf_ring *rx_ring, struct page *page = bi->page; dma_addr_t dma; - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) { - rx_ring->rx_stats.page_reuse_count++; - return true; - } - /* alloc new page for storage */ page = dev_alloc_pages(iavf_rx_pg_order(rx_ring)); if (unlikely(!page)) { @@ -870,9 +859,6 @@ static bool iavf_alloc_mapped_page(struct iavf_ring *rx_ring, bi->page = page; bi->page_offset = IAVF_SKB_PAD; - /* initialize pagecnt_bias to 1 representing we fully own page */ - bi->pagecnt_bias = 1; - return true; } @@ -924,7 +910,7 @@ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count) /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, bi->dma, bi->page_offset, - rx_ring->rx_buf_len, + IAVF_RXBUFFER_3072, DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change @@ -1102,91 +1088,6 @@ static bool iavf_cleanup_headers(struct iavf_ring *rx_ring, struct sk_buff *skb) return false; } -/** - * iavf_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void iavf_reuse_rx_page(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *old_buff) -{ - struct iavf_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_bi[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->dma = old_buff->dma; - new_buff->page = old_buff->page; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - -/** - * iavf_can_reuse_rx_page - Determine if this page can be reused by - * the adapter for another receive - * - * @rx_buffer: buffer containing the page - * - * If page is reusable, rx_buffer->page_offset is adjusted to point to - * an unused region in the page. - * - * For small pages, @truesize will be a constant value, half the size - * of the memory at page. We'll attempt to alternate between high and - * low halves of the page, with one half ready for use by the hardware - * and the other half being consumed by the stack. We use the page - * ref count to determine whether the stack has finished consuming the - * portion of this page that was passed up with a previous packet. If - * the page ref count is >1, we'll assume the "other" half page is - * still busy, and this page cannot be reused. - * - * For larger pages, @truesize will be the actual space used by the - * received packet (adjusted upward to an even multiple of the cache - * line size). This will advance through the page by the amount - * actually consumed by the received packets while there is still - * space for a buffer. Each region of larger pages will be used at - * most once, after which the page will not be reused. - * - * In either case, if the page is reusable its refcount is increased. - **/ -static bool iavf_can_reuse_rx_page(struct iavf_rx_buffer *rx_buffer) -{ - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; - struct page *page = rx_buffer->page; - - /* Is any reuse possible? */ - if (!dev_page_is_reusable(page)) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely((page_count(page) - pagecnt_bias) > 1)) - return false; -#else -#define IAVF_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - IAVF_RXBUFFER_2048) - if (rx_buffer->page_offset > IAVF_LAST_OFFSET) - return false; -#endif - - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); - rx_buffer->pagecnt_bias = USHRT_MAX; - } - - return true; -} - /** * iavf_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on @@ -1204,24 +1105,13 @@ static void iavf_add_rx_frag(struct iavf_ring *rx_ring, struct sk_buff *skb, unsigned int size) { -#if (PAGE_SIZE < 8192) - unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; -#else unsigned int truesize = SKB_DATA_ALIGN(size + IAVF_SKB_PAD); -#endif if (!size) return; skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, rx_buffer->page_offset, size, truesize); - - /* page is being used so we must update the page offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif } /** @@ -1249,9 +1139,6 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring, size, DMA_FROM_DEVICE); - /* We have pulled a buffer for use, so decrement pagecnt_bias */ - rx_buffer->pagecnt_bias--; - return rx_buffer; } @@ -1269,12 +1156,8 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring, unsigned int size) { void *va; -#if (PAGE_SIZE < 8192) - unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; -#else unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + SKB_DATA_ALIGN(IAVF_SKB_PAD + size); -#endif struct sk_buff *skb; if (!rx_buffer || !size) @@ -1292,23 +1175,15 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring, skb_reserve(skb, IAVF_SKB_PAD); __skb_put(skb, size); - /* buffer is used by skb, update page_offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - return skb; } /** - * iavf_put_rx_buffer - Clean up used buffer and either recycle or free + * iavf_put_rx_buffer - Unmap used buffer * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: rx buffer to pull data from * - * This function will clean up the contents of the rx_buffer. It will - * either recycle the buffer or unmap it and free the associated resources. + * This function will unmap the buffer after it's written by HW. */ static void iavf_put_rx_buffer(struct iavf_ring *rx_ring, struct iavf_rx_buffer *rx_buffer) @@ -1316,18 +1191,9 @@ static void iavf_put_rx_buffer(struct iavf_ring *rx_ring, if (!rx_buffer) return; - if (iavf_can_reuse_rx_page(rx_buffer)) { - /* hand second half of page back to the ring */ - iavf_reuse_rx_page(rx_ring, rx_buffer); - rx_ring->rx_stats.page_reuse_count++; - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - iavf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, IAVF_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - } + /* we are not reusing the buffer so unmap it */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, + DMA_FROM_DEVICE, IAVF_RX_DMA_ATTR); /* clear contents of buffer_info */ rx_buffer->page = NULL; @@ -1432,8 +1298,6 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; - if (rx_buffer && size) - rx_buffer->pagecnt_bias++; break; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index 68543efdd29b..e01777531635 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -81,8 +81,6 @@ enum iavf_dyn_idx_t { BIT_ULL(IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP)) /* Supported Rx Buffer Sizes (a multiple of 128) */ -#define IAVF_RXBUFFER_1536 1536 /* 128B aligned standard Ethernet frame */ -#define IAVF_RXBUFFER_2048 2048 #define IAVF_RXBUFFER_3072 3072 /* Used for large frames w/ padding */ #define IAVF_MAX_RXBUFFER 9728 /* largest size for single descriptor */ @@ -92,57 +90,7 @@ enum iavf_dyn_idx_t { #define IAVF_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) -/* Attempt to maximize the headroom available for incoming frames. We - * use a 2K buffer for receives and need 1536/1534 to store the data for - * the frame. This leaves us with 512 bytes of room. From that we need - * to deduct the space needed for the shared info and the padding needed - * to IP align the frame. - * - * Note: For cache line sizes 256 or larger this value is going to end - * up negative. In these cases we should fall back to the legacy - * receive path. - */ -#if (PAGE_SIZE < 8192) -#define IAVF_2K_TOO_SMALL_WITH_PADDING \ -((NET_SKB_PAD + IAVF_RXBUFFER_1536) > SKB_WITH_OVERHEAD(IAVF_RXBUFFER_2048)) - -static inline int iavf_compute_pad(int rx_buf_len) -{ - int page_size, pad_size; - - page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2); - pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len; - - return pad_size; -} - -static inline int iavf_skb_pad(void) -{ - int rx_buf_len; - - /* If a 2K buffer cannot handle a standard Ethernet frame then - * optimize padding for a 3K buffer instead of a 1.5K buffer. - * - * For a 3K buffer we need to add enough padding to allow for - * tailroom due to NET_IP_ALIGN possibly shifting us out of - * cache-line alignment. - */ - if (IAVF_2K_TOO_SMALL_WITH_PADDING) - rx_buf_len = IAVF_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN); - else - rx_buf_len = IAVF_RXBUFFER_1536; - - /* if needed make room for NET_IP_ALIGN */ - rx_buf_len -= NET_IP_ALIGN; - - return iavf_compute_pad(rx_buf_len); -} - -#define IAVF_SKB_PAD iavf_skb_pad() -#else -#define IAVF_2K_TOO_SMALL_WITH_PADDING false #define IAVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) -#endif /** * iavf_test_staterr - tests bits in Rx descriptor status and error fields @@ -265,12 +213,7 @@ struct iavf_tx_buffer { struct iavf_rx_buffer { dma_addr_t dma; struct page *page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) __u32 page_offset; -#else - __u16 page_offset; -#endif - __u16 pagecnt_bias; }; struct iavf_queue_stats { @@ -292,8 +235,6 @@ struct iavf_rx_queue_stats { u64 non_eop_descs; u64 alloc_page_failed; u64 alloc_buff_failed; - u64 page_reuse_count; - u64 realloc_count; }; enum iavf_ring_state_t { @@ -337,7 +278,6 @@ struct iavf_ring { u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ - u16 rx_buf_len; /* used in interrupt processing */ u16 next_to_use; @@ -373,7 +313,6 @@ struct iavf_ring { struct iavf_q_vector *q_vector; /* Backreference to associated vector */ struct rcu_head rcu; /* to avoid race on free */ - u16 next_to_alloc; struct sk_buff *skb; /* When iavf_clean_rx_ring_irq() must * return before it sees the EOP for * the current packet, we save that skb @@ -407,10 +346,6 @@ struct iavf_ring_container { static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring) { -#if (PAGE_SIZE < 8192) - if (ring->rx_buf_len > (PAGE_SIZE / 2)) - return 1; -#endif return 0; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h index 23ded4fcd94f..f6b09e57abce 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_type.h +++ b/drivers/net/ethernet/intel/iavf/iavf_type.h @@ -10,8 +10,6 @@ #include "iavf_adminq.h" #include "iavf_devids.h" -#define IAVF_RXQ_CTX_DBUFF_SHIFT 7 - /* IAVF_MASK is a macro used on 32 bit registers */ #define IAVF_MASK(mask, shift) ((u32)(mask) << (shift)) diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index a31df5af0473..f8e9f859a4f1 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -288,10 +288,6 @@ void iavf_configure_queues(struct iavf_adapter *adapter) if (!vqci) return; - /* Limit maximum frame size when jumbo frames is not enabled */ - if (adapter->netdev->mtu <= ETH_DATA_LEN) - max_frame = IAVF_RXBUFFER_1536 - NET_IP_ALIGN; - vqci->vsi_id = adapter->vsi_res->vsi_id; vqci->num_queue_pairs = pairs; vqpi = vqci->qpair; @@ -308,9 +304,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter) vqpi->rxq.ring_len = adapter->rx_rings[i].count; vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma; vqpi->rxq.max_pkt_size = max_frame; - vqpi->rxq.databuffer_size = - ALIGN(adapter->rx_rings[i].rx_buf_len, - BIT_ULL(IAVF_RXQ_CTX_DBUFF_SHIFT)); + vqpi->rxq.databuffer_size = IAVF_RXBUFFER_3072; if (CRC_OFFLOAD_ALLOWED(adapter)) vqpi->rxq.crc_disable = !!(adapter->netdev->features & NETIF_F_RXFCS); -- cgit v1.2.3 From e6c91556b97f855436fa45f75e69165d671012a7 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 18 Apr 2024 13:36:13 +0200 Subject: libeth: add Rx buffer management Add a couple intuitive helpers to hide Rx buffer implementation details in the library and not multiplicate it between drivers. The settings are sorta optimized for 100G+ NICs, but nothing really HW-specific here. Use the new page_pool_dev_alloc() to dynamically switch between split-page and full-page modes depending on MTU, page size, required headroom etc. For example, on x86_64 with the default driver settings each page is shared between 2 buffers. Turning on XDP (not in this series) -> increasing headroom requirement pushes truesize out of 2048 boundary, leading to that each buffer starts getting a full page. The "ceiling" limit is %PAGE_SIZE, as only order-0 pages are used to avoid compound overhead. For the above architecture, this means maximum linear frame size of 3712 w/o XDP. Not that &libeth_buf_queue is not a complete queue/ring structure for now, rather a shim, but eventually the libeth-enabled drivers will move to it, with iavf being the first one. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/libeth/Kconfig | 1 + drivers/net/ethernet/intel/libeth/rx.c | 98 +++++++++++++++++++++++++ include/net/libeth/rx.h | 117 ++++++++++++++++++++++++++++++ 3 files changed, 216 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/libeth/Kconfig b/drivers/net/ethernet/intel/libeth/Kconfig index af970a63c227..480293b71dbc 100644 --- a/drivers/net/ethernet/intel/libeth/Kconfig +++ b/drivers/net/ethernet/intel/libeth/Kconfig @@ -3,6 +3,7 @@ config LIBETH tristate + select PAGE_POOL help libeth is a common library containing routines shared between several drivers, but not yet promoted to the generic kernel API. diff --git a/drivers/net/ethernet/intel/libeth/rx.c b/drivers/net/ethernet/intel/libeth/rx.c index 879c4dfd6a4e..6221b88c34ac 100644 --- a/drivers/net/ethernet/intel/libeth/rx.c +++ b/drivers/net/ethernet/intel/libeth/rx.c @@ -3,6 +3,104 @@ #include +/* Rx buffer management */ + +/** + * libeth_rx_hw_len - get the actual buffer size to be passed to HW + * @pp: &page_pool_params of the netdev to calculate the size for + * @max_len: maximum buffer size for a single descriptor + * + * Return: HW-writeable length per one buffer to pass it to the HW accounting: + * MTU the @dev has, HW required alignment, minimum and maximum allowed values, + * and system's page size. + */ +static u32 libeth_rx_hw_len(const struct page_pool_params *pp, u32 max_len) +{ + u32 len; + + len = READ_ONCE(pp->netdev->mtu) + LIBETH_RX_LL_LEN; + len = ALIGN(len, LIBETH_RX_BUF_STRIDE); + len = min3(len, ALIGN_DOWN(max_len ? : U32_MAX, LIBETH_RX_BUF_STRIDE), + pp->max_len); + + return len; +} + +/** + * libeth_rx_fq_create - create a PP with the default libeth settings + * @fq: buffer queue struct to fill + * @napi: &napi_struct covering this PP (no usage outside its poll loops) + * + * Return: %0 on success, -%errno on failure. + */ +int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi) +{ + struct page_pool_params pp = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .order = LIBETH_RX_PAGE_ORDER, + .pool_size = fq->count, + .nid = fq->nid, + .dev = napi->dev->dev.parent, + .netdev = napi->dev, + .napi = napi, + .dma_dir = DMA_FROM_DEVICE, + .offset = LIBETH_SKB_HEADROOM, + }; + struct libeth_fqe *fqes; + struct page_pool *pool; + + /* HW-writeable / syncable length per one page */ + pp.max_len = LIBETH_RX_PAGE_LEN(pp.offset); + + /* HW-writeable length per buffer */ + fq->buf_len = libeth_rx_hw_len(&pp, fq->buf_len); + /* Buffer size to allocate */ + fq->truesize = roundup_pow_of_two(SKB_HEAD_ALIGN(pp.offset + + fq->buf_len)); + + pool = page_pool_create(&pp); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + fqes = kvcalloc_node(fq->count, sizeof(*fqes), GFP_KERNEL, fq->nid); + if (!fqes) + goto err_buf; + + fq->fqes = fqes; + fq->pp = pool; + + return 0; + +err_buf: + page_pool_destroy(pool); + + return -ENOMEM; +} +EXPORT_SYMBOL_NS_GPL(libeth_rx_fq_create, LIBETH); + +/** + * libeth_rx_fq_destroy - destroy a &page_pool created by libeth + * @fq: buffer queue to process + */ +void libeth_rx_fq_destroy(struct libeth_fq *fq) +{ + kvfree(fq->fqes); + page_pool_destroy(fq->pp); +} +EXPORT_SYMBOL_NS_GPL(libeth_rx_fq_destroy, LIBETH); + +/** + * libeth_rx_recycle_slow - recycle a libeth page from the NAPI context + * @page: page to recycle + * + * To be used on exceptions or rare cases not requiring fast inline recycling. + */ +void libeth_rx_recycle_slow(struct page *page) +{ + page_pool_recycle_direct(page->pp, page); +} +EXPORT_SYMBOL_NS_GPL(libeth_rx_recycle_slow, LIBETH); + /* Converting abstract packet type numbers into a software structure with * the packet parameters to do O(1) lookup on Rx. */ diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h index 0807e19f44b3..f29ea3e34c6c 100644 --- a/include/net/libeth/rx.h +++ b/include/net/libeth/rx.h @@ -4,8 +4,125 @@ #ifndef __LIBETH_RX_H #define __LIBETH_RX_H +#include + +#include #include +/* Rx buffer management */ + +/* Space reserved in front of each frame */ +#define LIBETH_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) +/* Maximum headroom for worst-case calculations */ +#define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM +/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */ +#define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) + +/* Always use order-0 pages */ +#define LIBETH_RX_PAGE_ORDER 0 +/* Pick a sane buffer stride and align to a cacheline boundary */ +#define LIBETH_RX_BUF_STRIDE SKB_DATA_ALIGN(128) +/* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */ +#define LIBETH_RX_PAGE_LEN(hr) \ + ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER), \ + LIBETH_RX_BUF_STRIDE) + +/** + * struct libeth_fqe - structure representing an Rx buffer (fill queue element) + * @page: page holding the buffer + * @offset: offset from the page start (to the headroom) + * @truesize: total space occupied by the buffer (w/ headroom and tailroom) + * + * Depending on the MTU, API switches between one-page-per-frame and shared + * page model (to conserve memory on bigger-page platforms). In case of the + * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```. + */ +struct libeth_fqe { + struct page *page; + u32 offset; + u32 truesize; +} __aligned_largest; + +/** + * struct libeth_fq - structure representing a buffer (fill) queue + * @fp: hotpath part of the structure + * @pp: &page_pool for buffer management + * @fqes: array of Rx buffers + * @truesize: size to allocate per buffer, w/overhead + * @count: number of descriptors/buffers the queue has + * @buf_len: HW-writeable length per each buffer + * @nid: ID of the closest NUMA node with memory + */ +struct libeth_fq { + struct_group_tagged(libeth_fq_fp, fp, + struct page_pool *pp; + struct libeth_fqe *fqes; + + u32 truesize; + u32 count; + ); + + /* Cold fields */ + u32 buf_len; + int nid; +}; + +int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi); +void libeth_rx_fq_destroy(struct libeth_fq *fq); + +/** + * libeth_rx_alloc - allocate a new Rx buffer + * @fq: fill queue to allocate for + * @i: index of the buffer within the queue + * + * Return: DMA address to be passed to HW for Rx on successful allocation, + * ```DMA_MAPPING_ERROR``` otherwise. + */ +static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i) +{ + struct libeth_fqe *buf = &fq->fqes[i]; + + buf->truesize = fq->truesize; + buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize); + if (unlikely(!buf->page)) + return DMA_MAPPING_ERROR; + + return page_pool_get_dma_addr(buf->page) + buf->offset + + fq->pp->p.offset; +} + +void libeth_rx_recycle_slow(struct page *page); + +/** + * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA + * @fqe: buffer to process + * @len: frame length from the descriptor + * + * Process the buffer after it's written by HW. The regular path is to + * synchronize DMA for CPU, but in case of no data it will be immediately + * recycled back to its PP. + * + * Return: true when there's data to process, false otherwise. + */ +static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe, + u32 len) +{ + struct page *page = fqe->page; + + /* Very rare, but possible case. The most common reason: + * the last fragment contained FCS only, which was then + * stripped by the HW. + */ + if (unlikely(!len)) { + libeth_rx_recycle_slow(page); + return false; + } + + page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len); + + return true; +} + /* Converting abstract packet type numbers into a software structure with * the packet parameters to do O(1) lookup on Rx. */ -- cgit v1.2.3 From 97cadd3d3ce3df32647011233a35a1597bb7a55b Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 18 Apr 2024 13:36:14 +0200 Subject: iavf: pack iavf_ring more efficiently Before replacing the Rx buffer management with libie, clean up &iavf_ring a bit. There are several fields not used anywhere in the code -- simply remove them. Move ::tail up to remove a hole. Replace ::arm_wb boolean with 1-bit flag in ::flags to free 1 more byte. Finally, move ::prev_pkt_ctr out of &iavf_tx_queue_stats -- it doesn't belong there (used for Tx stall detection). Place it next to the stats on the ring itself to fill the 4-byte slot. The result: no holes and all the hot fields fit into the first 64-byte cacheline. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_txrx.c | 12 ++++++------ drivers/net/ethernet/intel/iavf/iavf_txrx.h | 22 +++------------------- 2 files changed, 9 insertions(+), 25 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index c7602c173420..2ec68b51bebe 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -185,7 +185,7 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi) * pending work. */ packets = tx_ring->stats.packets & INT_MAX; - if (tx_ring->tx_stats.prev_pkt_ctr == packets) { + if (tx_ring->prev_pkt_ctr == packets) { iavf_force_wb(vsi, tx_ring->q_vector); continue; } @@ -194,7 +194,7 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi) * to iavf_get_tx_pending() */ smp_rmb(); - tx_ring->tx_stats.prev_pkt_ctr = + tx_ring->prev_pkt_ctr = iavf_get_tx_pending(tx_ring, true) ? packets : -1; } } @@ -320,7 +320,7 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, ((j / WB_STRIDE) == 0) && (j > 0) && !test_bit(__IAVF_VSI_DOWN, vsi->state) && (IAVF_DESC_UNUSED(tx_ring) != tx_ring->count)) - tx_ring->arm_wb = true; + tx_ring->flags |= IAVF_TXR_FLAGS_ARM_WB; } /* notify netdev of completed buffers */ @@ -675,7 +675,7 @@ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; - tx_ring->tx_stats.prev_pkt_ctr = -1; + tx_ring->prev_pkt_ctr = -1; return 0; err: @@ -1491,8 +1491,8 @@ int iavf_napi_poll(struct napi_struct *napi, int budget) clean_complete = false; continue; } - arm_wb |= ring->arm_wb; - ring->arm_wb = false; + arm_wb |= !!(ring->flags & IAVF_TXR_FLAGS_ARM_WB); + ring->flags &= ~IAVF_TXR_FLAGS_ARM_WB; } /* Handle case where we are called by netpoll with a budget of 0 */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index e01777531635..ed559fa6f214 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -227,7 +227,6 @@ struct iavf_tx_queue_stats { u64 tx_done_old; u64 tx_linearize; u64 tx_force_wb; - int prev_pkt_ctr; u64 tx_lost_interrupt; }; @@ -237,12 +236,6 @@ struct iavf_rx_queue_stats { u64 alloc_buff_failed; }; -enum iavf_ring_state_t { - __IAVF_TX_FDIR_INIT_DONE, - __IAVF_TX_XPS_INIT_DONE, - __IAVF_RING_STATE_NBITS /* must be last */ -}; - /* some useful defines for virtchannel interface, which * is the only remaining user of header split */ @@ -264,10 +257,8 @@ struct iavf_ring { struct iavf_tx_buffer *tx_bi; struct iavf_rx_buffer *rx_bi; }; - DECLARE_BITMAP(state, __IAVF_RING_STATE_NBITS); - u16 queue_index; /* Queue number of ring */ - u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; + u16 queue_index; /* Queue number of ring */ /* high bit set means dynamic, use accessors routines to read/write. * hardware only supports 2us resolution for the ITR registers. @@ -277,22 +268,14 @@ struct iavf_ring { u16 itr_setting; u16 count; /* Number of descriptors */ - u16 reg_idx; /* HW register index of the ring */ /* used in interrupt processing */ u16 next_to_use; u16 next_to_clean; - u8 atr_sample_rate; - u8 atr_count; - - bool ring_active; /* is ring online or not */ - bool arm_wb; /* do something to arm write back */ - u8 packet_stride; - u16 flags; #define IAVF_TXR_FLAGS_WB_ON_ITR BIT(0) -/* BIT(1) is free, was IAVF_RXR_FLAGS_BUILD_SKB_ENABLED */ +#define IAVF_TXR_FLAGS_ARM_WB BIT(1) /* BIT(2) is free */ #define IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 BIT(3) #define IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2 BIT(4) @@ -306,6 +289,7 @@ struct iavf_ring { struct iavf_rx_queue_stats rx_stats; }; + int prev_pkt_ctr; /* For Tx stall detection */ unsigned int size; /* length of descriptor ring in bytes */ dma_addr_t dma; /* physical address of ring */ -- cgit v1.2.3 From 5fa4caff59f251bf9f766fc48c9f0a774a9216a0 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 18 Apr 2024 13:36:15 +0200 Subject: iavf: switch to Page Pool Now that the IAVF driver simply uses dev_alloc_page() + free_page() with no custom recycling logics, it can easily be switched to using Page Pool / libeth API instead. This allows to removing the whole dancing around headroom, HW buffer size, and page order. All DMA-for-device is now done in the PP core, for-CPU -- in the libeth helper. Use skb_mark_for_recycle() to bring back the recycling and restore the performance. Speaking of performance: on par with the baseline and faster with the PP optimization series applied. But the memory usage for 1500b MTU is now almost 2x lower (x86_64) thanks to allocating a page every second descriptor. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 7 +- drivers/net/ethernet/intel/iavf/iavf_txrx.c | 257 +++++++----------------- drivers/net/ethernet/intel/iavf/iavf_txrx.h | 34 +--- drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 10 +- include/linux/net/intel/libie/rx.h | 17 ++ 5 files changed, 111 insertions(+), 214 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index ffb71a62b105..7e0de0a9b883 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2018 Intel Corporation. */ +#include + #include "iavf.h" #include "iavf_prototype.h" /* All iavf tracepoints are defined by the include below, which must @@ -45,6 +47,7 @@ MODULE_DEVICE_TABLE(pci, iavf_pci_tbl); MODULE_ALIAS("i40evf"); MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) Ethernet Adaptive Virtual Function Network Driver"); +MODULE_IMPORT_NS(LIBETH); MODULE_IMPORT_NS(LIBIE); MODULE_LICENSE("GPL v2"); @@ -1586,7 +1589,6 @@ static int iavf_alloc_queues(struct iavf_adapter *adapter) rx_ring = &adapter->rx_rings[i]; rx_ring->queue_index = i; rx_ring->netdev = adapter->netdev; - rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; rx_ring->itr_setting = IAVF_ITR_RX_DEF; } @@ -2613,9 +2615,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) iavf_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; - /* MTU range: 68 - 9710 */ netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = IAVF_MAX_RXBUFFER - IAVF_PACKET_HDR_PAD; + netdev->max_mtu = LIBIE_MAX_MTU; if (!is_valid_ether_addr(adapter->hw.mac.addr)) { dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n", diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 2ec68b51bebe..26b424fd6718 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -690,11 +690,8 @@ err: **/ static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) { - unsigned long bi_size; - u16 i; - /* ring already cleared, nothing to do */ - if (!rx_ring->rx_bi) + if (!rx_ring->rx_fqes) return; if (rx_ring->skb) { @@ -702,40 +699,16 @@ static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) rx_ring->skb = NULL; } - /* Free all the Rx ring sk_buffs */ - for (i = 0; i < rx_ring->count; i++) { - struct iavf_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; + /* Free all the Rx ring buffers */ + for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { + const struct libeth_fqe *rx_fqes = &rx_ring->rx_fqes[i]; - if (!rx_bi->page) - continue; + page_pool_put_full_page(rx_ring->pp, rx_fqes->page, false); - /* Invalidate cache lines that may have been written to by - * device so that we avoid corrupting memory. - */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_bi->dma, - rx_bi->page_offset, - IAVF_RXBUFFER_3072, - DMA_FROM_DEVICE); - - /* free resources associated with mapping */ - dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma, - iavf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IAVF_RX_DMA_ATTR); - - __free_page(rx_bi->page); - - rx_bi->page = NULL; - rx_bi->page_offset = 0; + if (unlikely(++i == rx_ring->count)) + i = 0; } - bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count; - memset(rx_ring->rx_bi, 0, bi_size); - - /* Zero out the descriptor ring */ - memset(rx_ring->desc, 0, rx_ring->size); - rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } @@ -748,15 +721,22 @@ static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) **/ void iavf_free_rx_resources(struct iavf_ring *rx_ring) { + struct libeth_fq fq = { + .fqes = rx_ring->rx_fqes, + .pp = rx_ring->pp, + }; + iavf_clean_rx_ring(rx_ring); - kfree(rx_ring->rx_bi); - rx_ring->rx_bi = NULL; if (rx_ring->desc) { - dma_free_coherent(rx_ring->dev, rx_ring->size, + dma_free_coherent(rx_ring->pp->p.dev, rx_ring->size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; } + + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; } /** @@ -767,26 +747,32 @@ void iavf_free_rx_resources(struct iavf_ring *rx_ring) **/ int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring) { - struct device *dev = rx_ring->dev; - int bi_size; - - /* warn if we are about to overwrite the pointer */ - WARN_ON(rx_ring->rx_bi); - bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count; - rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); - if (!rx_ring->rx_bi) - goto err; + struct libeth_fq fq = { + .count = rx_ring->count, + .buf_len = LIBIE_MAX_RX_BUF_LEN, + .nid = NUMA_NO_NODE, + }; + int ret; + + ret = libeth_rx_fq_create(&fq, &rx_ring->q_vector->napi); + if (ret) + return ret; + + rx_ring->pp = fq.pp; + rx_ring->rx_fqes = fq.fqes; + rx_ring->truesize = fq.truesize; + rx_ring->rx_buf_len = fq.buf_len; u64_stats_init(&rx_ring->syncp); /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union iavf_32byte_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); - rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, + rx_ring->desc = dma_alloc_coherent(fq.pp->p.dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) { - dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", + dev_info(fq.pp->p.dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", rx_ring->size); goto err; } @@ -795,9 +781,12 @@ int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring) rx_ring->next_to_use = 0; return 0; + err: - kfree(rx_ring->rx_bi); - rx_ring->rx_bi = NULL; + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; + return -ENOMEM; } @@ -819,49 +808,6 @@ static void iavf_release_rx_desc(struct iavf_ring *rx_ring, u32 val) writel(val, rx_ring->tail); } -/** - * iavf_alloc_mapped_page - recycle or make a new page - * @rx_ring: ring to use - * @bi: rx_buffer struct to modify - * - * Returns true if the page was successfully allocated or - * reused. - **/ -static bool iavf_alloc_mapped_page(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *bi) -{ - struct page *page = bi->page; - dma_addr_t dma; - - /* alloc new page for storage */ - page = dev_alloc_pages(iavf_rx_pg_order(rx_ring)); - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_page_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, - iavf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IAVF_RX_DMA_ATTR); - - /* if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, iavf_rx_pg_order(rx_ring)); - rx_ring->rx_stats.alloc_page_failed++; - return false; - } - - bi->dma = dma; - bi->page = page; - bi->page_offset = IAVF_SKB_PAD; - - return true; -} - /** * iavf_receive_skb - Send a completed packet up the stack * @rx_ring: rx ring in play @@ -892,38 +838,37 @@ static void iavf_receive_skb(struct iavf_ring *rx_ring, **/ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count) { + const struct libeth_fq_fp fq = { + .pp = rx_ring->pp, + .fqes = rx_ring->rx_fqes, + .truesize = rx_ring->truesize, + .count = rx_ring->count, + }; u16 ntu = rx_ring->next_to_use; union iavf_rx_desc *rx_desc; - struct iavf_rx_buffer *bi; /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) return false; rx_desc = IAVF_RX_DESC(rx_ring, ntu); - bi = &rx_ring->rx_bi[ntu]; do { - if (!iavf_alloc_mapped_page(rx_ring, bi)) - goto no_buffers; + dma_addr_t addr; - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, bi->dma, - bi->page_offset, - IAVF_RXBUFFER_3072, - DMA_FROM_DEVICE); + addr = libeth_rx_alloc(&fq, ntu); + if (addr == DMA_MAPPING_ERROR) + goto no_buffers; /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + rx_desc->read.pkt_addr = cpu_to_le64(addr); rx_desc++; - bi++; ntu++; if (unlikely(ntu == rx_ring->count)) { rx_desc = IAVF_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_bi; ntu = 0; } @@ -942,6 +887,8 @@ no_buffers: if (rx_ring->next_to_use != ntu) iavf_release_rx_desc(rx_ring, ntu); + rx_ring->rx_stats.alloc_page_failed++; + /* make sure to come back via polling to try again after * allocation failure */ @@ -1090,9 +1037,8 @@ static bool iavf_cleanup_headers(struct iavf_ring *rx_ring, struct sk_buff *skb) /** * iavf_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: buffer containing page to add * @skb: sk_buff to place the data into + * @rx_buffer: buffer containing page to add * @size: packet length from rx_desc * * This function will add the data contained in rx_buffer->page to the skb. @@ -1100,105 +1046,49 @@ static bool iavf_cleanup_headers(struct iavf_ring *rx_ring, struct sk_buff *skb) * * The function will then update the page offset. **/ -static void iavf_add_rx_frag(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *rx_buffer, - struct sk_buff *skb, +static void iavf_add_rx_frag(struct sk_buff *skb, + const struct libeth_fqe *rx_buffer, unsigned int size) { - unsigned int truesize = SKB_DATA_ALIGN(size + IAVF_SKB_PAD); - - if (!size) - return; + u32 hr = rx_buffer->page->pp->p.offset; skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, - rx_buffer->page_offset, size, truesize); -} - -/** - * iavf_get_rx_buffer - Fetch Rx buffer and synchronize data for use - * @rx_ring: rx descriptor ring to transact packets on - * @size: size of buffer to add to skb - * - * This function will pull an Rx buffer from the ring and synchronize it - * for use by the CPU. - */ -static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring, - const unsigned int size) -{ - struct iavf_rx_buffer *rx_buffer; - - rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; - prefetchw(rx_buffer->page); - if (!size) - return rx_buffer; - - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); - - return rx_buffer; + rx_buffer->offset + hr, size, rx_buffer->truesize); } /** * iavf_build_skb - Build skb around an existing buffer - * @rx_ring: Rx descriptor ring to transact packets on * @rx_buffer: Rx buffer to pull data from * @size: size of buffer to add to skb * * This function builds an skb around an existing Rx buffer, taking care * to set up the skb correctly and avoid any memcpy overhead. */ -static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *rx_buffer, +static struct sk_buff *iavf_build_skb(const struct libeth_fqe *rx_buffer, unsigned int size) { - void *va; - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(IAVF_SKB_PAD + size); + u32 hr = rx_buffer->page->pp->p.offset; struct sk_buff *skb; + void *va; - if (!rx_buffer || !size) - return NULL; /* prefetch first cache line of first page */ - va = page_address(rx_buffer->page) + rx_buffer->page_offset; - net_prefetch(va); + va = page_address(rx_buffer->page) + rx_buffer->offset; + net_prefetch(va + hr); /* build an skb around the page buffer */ - skb = napi_build_skb(va - IAVF_SKB_PAD, truesize); + skb = napi_build_skb(va, rx_buffer->truesize); if (unlikely(!skb)) return NULL; + skb_mark_for_recycle(skb); + /* update pointers within the skb to store the data */ - skb_reserve(skb, IAVF_SKB_PAD); + skb_reserve(skb, hr); __skb_put(skb, size); return skb; } -/** - * iavf_put_rx_buffer - Unmap used buffer - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: rx buffer to pull data from - * - * This function will unmap the buffer after it's written by HW. - */ -static void iavf_put_rx_buffer(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *rx_buffer) -{ - if (!rx_buffer) - return; - - /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, - DMA_FROM_DEVICE, IAVF_RX_DMA_ATTR); - - /* clear contents of buffer_info */ - rx_buffer->page = NULL; -} - /** * iavf_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed @@ -1252,7 +1142,7 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) bool failure = false; while (likely(total_rx_packets < (unsigned int)budget)) { - struct iavf_rx_buffer *rx_buffer; + struct libeth_fqe *rx_buffer; union iavf_rx_desc *rx_desc; unsigned int size; u16 vlan_tag = 0; @@ -1287,13 +1177,16 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) size = FIELD_GET(IAVF_RXD_QW1_LENGTH_PBUF_MASK, qword); iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb); - rx_buffer = iavf_get_rx_buffer(rx_ring, size); + + rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; + if (!libeth_rx_sync_for_cpu(rx_buffer, size)) + goto skip_data; /* retrieve a buffer from the ring */ if (skb) - iavf_add_rx_frag(rx_ring, rx_buffer, skb, size); + iavf_add_rx_frag(skb, rx_buffer, size); else - skb = iavf_build_skb(rx_ring, rx_buffer, size); + skb = iavf_build_skb(rx_buffer, size); /* exit if we failed to retrieve a buffer */ if (!skb) { @@ -1301,10 +1194,10 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) break; } - iavf_put_rx_buffer(rx_ring, rx_buffer); +skip_data: cleaned_count++; - if (iavf_is_non_eop(rx_ring, rx_desc, skb)) + if (iavf_is_non_eop(rx_ring, rx_desc, skb) || unlikely(!skb)) continue; /* ERR_MASK will only have valid bits if EOP set, and diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index ed559fa6f214..d7b5587aeb8e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -80,18 +80,8 @@ enum iavf_dyn_idx_t { BIT_ULL(IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \ BIT_ULL(IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP)) -/* Supported Rx Buffer Sizes (a multiple of 128) */ -#define IAVF_RXBUFFER_3072 3072 /* Used for large frames w/ padding */ -#define IAVF_MAX_RXBUFFER 9728 /* largest size for single descriptor */ - -#define IAVF_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) #define iavf_rx_desc iavf_32byte_rx_desc -#define IAVF_RX_DMA_ATTR \ - (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) - -#define IAVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) - /** * iavf_test_staterr - tests bits in Rx descriptor status and error fields * @rx_desc: pointer to receive descriptor (in le64 format) @@ -210,12 +200,6 @@ struct iavf_tx_buffer { u32 tx_flags; }; -struct iavf_rx_buffer { - dma_addr_t dma; - struct page *page; - __u32 page_offset; -}; - struct iavf_queue_stats { u64 packets; u64 bytes; @@ -251,13 +235,18 @@ struct iavf_rx_queue_stats { struct iavf_ring { struct iavf_ring *next; /* pointer to next ring in q_vector */ void *desc; /* Descriptor ring memory */ - struct device *dev; /* Used for DMA mapping */ + union { + struct page_pool *pp; /* Used on Rx for buffer management */ + struct device *dev; /* Used on Tx for DMA mapping */ + }; struct net_device *netdev; /* netdev ring maps to */ union { + struct libeth_fqe *rx_fqes; struct iavf_tx_buffer *tx_bi; - struct iavf_rx_buffer *rx_bi; }; u8 __iomem *tail; + u32 truesize; + u16 queue_index; /* Queue number of ring */ /* high bit set means dynamic, use accessors routines to read/write. @@ -305,6 +294,8 @@ struct iavf_ring { * iavf_clean_rx_ring_irq() is called * for this ring. */ + + u32 rx_buf_len; } ____cacheline_internodealigned_in_smp; #define IAVF_ITR_ADAPTIVE_MIN_INC 0x0002 @@ -328,13 +319,6 @@ struct iavf_ring_container { #define iavf_for_each_ring(pos, head) \ for (pos = (head).ring; pos != NULL; pos = pos->next) -static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring) -{ - return 0; -} - -#define iavf_rx_pg_size(_ring) (PAGE_SIZE << iavf_rx_pg_order(_ring)) - bool iavf_alloc_rx_buffers(struct iavf_ring *rxr, u16 cleaned_count); netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index f8e9f859a4f1..1e543f6a7c30 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2018 Intel Corporation. */ +#include + #include "iavf.h" #include "iavf_prototype.h" @@ -268,13 +270,13 @@ int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter) void iavf_configure_queues(struct iavf_adapter *adapter) { struct virtchnl_vsi_queue_config_info *vqci; - int i, max_frame = adapter->vf_res->max_mtu; int pairs = adapter->num_active_queues; struct virtchnl_queue_pair_info *vqpi; + u32 i, max_frame; size_t len; - if (max_frame > IAVF_MAX_RXBUFFER || !max_frame) - max_frame = IAVF_MAX_RXBUFFER; + max_frame = LIBIE_MAX_RX_FRM_LEN(adapter->rx_rings->pp->p.offset); + max_frame = min_not_zero(adapter->vf_res->max_mtu, max_frame); if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -304,7 +306,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter) vqpi->rxq.ring_len = adapter->rx_rings[i].count; vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma; vqpi->rxq.max_pkt_size = max_frame; - vqpi->rxq.databuffer_size = IAVF_RXBUFFER_3072; + vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len; if (CRC_OFFLOAD_ALLOWED(adapter)) vqpi->rxq.crc_disable = !!(adapter->netdev->features & NETIF_F_RXFCS); diff --git a/include/linux/net/intel/libie/rx.h b/include/linux/net/intel/libie/rx.h index 37c5c8f26fb9..8e97775f1d66 100644 --- a/include/linux/net/intel/libie/rx.h +++ b/include/linux/net/intel/libie/rx.h @@ -6,6 +6,23 @@ #include +/* Rx buffer management */ + +/* The largest size for a single descriptor as per HW */ +#define LIBIE_MAX_RX_BUF_LEN 9728U +/* "True" HW-writeable space: minimum from SW and HW values */ +#define LIBIE_RX_BUF_LEN(hr) min_t(u32, LIBETH_RX_PAGE_LEN(hr), \ + LIBIE_MAX_RX_BUF_LEN) + +/* The maximum frame size as per HW (S/G) */ +#define __LIBIE_MAX_RX_FRM_LEN 16382U +/* ATST, HW can chain up to 5 Rx descriptors */ +#define LIBIE_MAX_RX_FRM_LEN(hr) \ + min_t(u32, __LIBIE_MAX_RX_FRM_LEN, LIBIE_RX_BUF_LEN(hr) * 5) +/* Maximum frame size minus LL overhead */ +#define LIBIE_MAX_MTU \ + (LIBIE_MAX_RX_FRM_LEN(LIBETH_MAX_HEADROOM) - LIBETH_RX_LL_LEN) + /* O(1) converting i40e/ice/iavf's 8/10-bit hardware packet type to a parsed * bitfield struct. */ -- cgit v1.2.3 From 5bd8ebe4693c21851ef8a05343d43aa2d005dd53 Mon Sep 17 00:00:00 2001 From: Chintan Vankar Date: Mon, 22 Apr 2024 18:15:15 +0530 Subject: net: ethernet: ti: am65-cpsw-nuss: Enable SGMII mode for J784S4 CPSW9G TI's J784S4 SoC supports SGMII mode with CPSW9G instance of the CPSW Ethernet Switch. Thus, enable it by adding SGMII mode to the extra_modes member of the "j784s4_cpswxg_pdata" SoC data. Reviewed-by: Roger Quadros Signed-off-by: Chintan Vankar Link: https://lore.kernel.org/r/20240422124515.887511-1-c-vankar@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 81a0fe1f1045..67b3a94e5efa 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -3390,7 +3390,8 @@ static const struct am65_cpsw_pdata j784s4_cpswxg_pdata = { .quirks = 0, .ale_dev_id = "am64-cpswxg", .fdqring_mode = K3_RINGACC_RING_MODE_MESSAGE, - .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII) | BIT(PHY_INTERFACE_MODE_USXGMII), + .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII) | BIT(PHY_INTERFACE_MODE_SGMII) | + BIT(PHY_INTERFACE_MODE_USXGMII), }; static const struct of_device_id am65_cpsw_nuss_of_mtable[] = { -- cgit v1.2.3 From 3833e4834d70440582c127443073fde3204d5c07 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Mon, 22 Apr 2024 15:26:23 +0000 Subject: bnxt_en: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_match_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_match_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Jiri Pirko Reviewed-by: Sriharsha Basavapatna Tested-by: Sriharsha Basavapatna Link: https://lore.kernel.org/r/20240422152626.175569-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 273c9ba48f09..d2ca90407cce 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -370,6 +370,7 @@ static int bnxt_tc_parse_flow(struct bnxt *bp, struct bnxt_tc_flow *flow) { struct flow_rule *rule = flow_cls_offload_flow_rule(tc_flow_cmd); + struct netlink_ext_ack *extack = tc_flow_cmd->common.extack; struct flow_dissector *dissector = rule->match.dissector; /* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */ @@ -380,6 +381,9 @@ static int bnxt_tc_parse_flow(struct bnxt *bp, return -EOPNOTSUPP; } + if (flow_rule_match_has_control_flags(rule, extack)) + return -EOPNOTSUPP; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; -- cgit v1.2.3 From 8ae124f1897fcae824d2b041e102914957c5ab38 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Mon, 22 Apr 2024 15:26:42 +0000 Subject: net: ethernet: ti: am65-cpsw: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_match_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_match_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240422152643.175592-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-qos.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index 816e73a3d6e4..16f192a5b160 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -1008,6 +1008,9 @@ static int am65_cpsw_qos_clsflower_add_policer(struct am65_cpsw_port *port, return -EOPNOTSUPP; } + if (flow_rule_match_has_control_flags(rule, extack)) + return -EOPNOTSUPP; + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { NL_SET_ERR_MSG_MOD(extack, "Not matching on eth address"); return -EOPNOTSUPP; -- cgit v1.2.3 From f97e0a5eac159a637a8d36fcac3eb60253306ece Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Mon, 22 Apr 2024 15:26:55 +0000 Subject: net: ethernet: ti: cpsw: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_match_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_match_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240422152656.175627-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpsw_priv.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 764ed298b570..6fe4edabba44 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -1404,6 +1404,9 @@ static int cpsw_qos_clsflower_add_policer(struct cpsw_priv *priv, return -EOPNOTSUPP; } + if (flow_rule_match_has_control_flags(rule, extack)) + return -EOPNOTSUPP; + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { NL_SET_ERR_MSG_MOD(extack, "Not matching on eth address"); return -EOPNOTSUPP; -- cgit v1.2.3 From e199a5b29f199b1fb790d09f1cc12d5bc564fe37 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Mon, 22 Apr 2024 15:27:16 +0000 Subject: net: hns3: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Also propagate extack to hclge_get_cls_key_ip(), and convert it to return error code. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Jiri Pirko Tested-by: Jijie Shao Link: https://lore.kernel.org/r/20240422152717.175659-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 28336cf17170..85b5d2331fc9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -7222,8 +7222,9 @@ static void hclge_get_cls_key_vlan(const struct flow_rule *flow, } } -static void hclge_get_cls_key_ip(const struct flow_rule *flow, - struct hclge_fd_rule *rule) +static int hclge_get_cls_key_ip(const struct flow_rule *flow, + struct hclge_fd_rule *rule, + struct netlink_ext_ack *extack) { u16 addr_type = 0; @@ -7232,6 +7233,9 @@ static void hclge_get_cls_key_ip(const struct flow_rule *flow, flow_rule_match_control(flow, &match); addr_type = match.key->addr_type; + + if (flow_rule_has_control_flags(match.mask->flags, extack)) + return -EOPNOTSUPP; } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { @@ -7260,6 +7264,8 @@ static void hclge_get_cls_key_ip(const struct flow_rule *flow, rule->unused_tuple |= BIT(INNER_SRC_IP); rule->unused_tuple |= BIT(INNER_DST_IP); } + + return 0; } static void hclge_get_cls_key_port(const struct flow_rule *flow, @@ -7285,7 +7291,9 @@ static int hclge_parse_cls_flower(struct hclge_dev *hdev, struct hclge_fd_rule *rule) { struct flow_rule *flow = flow_cls_offload_flow_rule(cls_flower); + struct netlink_ext_ack *extack = cls_flower->common.extack; struct flow_dissector *dissector = flow->match.dissector; + int ret; if (dissector->used_keys & ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | @@ -7303,7 +7311,11 @@ static int hclge_parse_cls_flower(struct hclge_dev *hdev, hclge_get_cls_key_basic(flow, rule); hclge_get_cls_key_mac(flow, rule); hclge_get_cls_key_vlan(flow, rule); - hclge_get_cls_key_ip(flow, rule); + + ret = hclge_get_cls_key_ip(flow, rule, extack); + if (ret) + return ret; + hclge_get_cls_key_port(flow, rule); return 0; -- cgit v1.2.3 From 3c3adb22510cf1c9bb4c43e3f74650a8ff2d85d7 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Mon, 22 Apr 2024 15:27:34 +0000 Subject: octeontx2-pf: flower: check for unsupported control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use flow_rule_is_supp_control_flags() to reject filters with unsupported control flags. In case any unsupported control flags are masked, flow_rule_is_supp_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Remove FLOW_DIS_FIRST_FRAG specific error message, and treat it as any other unsupported control flag. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Jacob Keller Reviewed-by: Sunil Goutham Link: https://lore.kernel.org/r/20240422152735.175693-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 6d4ce2ece8d0..e63cc1eb6d89 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -700,10 +700,6 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, u32 val; flow_rule_match_control(rule, &match); - if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { - NL_SET_ERR_MSG_MOD(extack, "HW doesn't support frag first/later"); - return -EOPNOTSUPP; - } if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { val = match.key->flags & FLOW_DIS_IS_FRAGMENT; @@ -721,6 +717,10 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, return -EOPNOTSUPP; } } + + if (!flow_rule_is_supp_control_flags(FLOW_DIS_IS_FRAGMENT, + match.mask->flags, extack)) + return -EOPNOTSUPP; } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { -- cgit v1.2.3 From 369dac68d22ef6052160b3cd383f2d04b1810f84 Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Mon, 22 Apr 2024 20:53:05 -0700 Subject: enic: Replace hardcoded values for vnic descriptor by defines Replace the hardcoded values used in the calculations for vnic descriptors and rings with defines. Minor code cleanup. Signed-off-by: Satish Kharat Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/vnic_dev.c | 20 ++++++++------------ drivers/net/ethernet/cisco/enic/vnic_dev.h | 5 +++++ 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c index 12a83fa1302d..9f6089e81608 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_dev.c +++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c @@ -146,23 +146,19 @@ EXPORT_SYMBOL(vnic_dev_get_res); static unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring, unsigned int desc_count, unsigned int desc_size) { - /* The base address of the desc rings must be 512 byte aligned. - * Descriptor count is aligned to groups of 32 descriptors. A - * count of 0 means the maximum 4096 descriptors. Descriptor - * size is aligned to 16 bytes. - */ - - unsigned int count_align = 32; - unsigned int desc_align = 16; - ring->base_align = 512; + /* Descriptor ring base address alignment in bytes*/ + ring->base_align = VNIC_DESC_BASE_ALIGN; + /* A count of 0 means the maximum descriptors */ if (desc_count == 0) - desc_count = 4096; + desc_count = VNIC_DESC_MAX_COUNT; - ring->desc_count = ALIGN(desc_count, count_align); + /* Descriptor count aligned in groups of VNIC_DESC_COUNT_ALIGN descriptors */ + ring->desc_count = ALIGN(desc_count, VNIC_DESC_COUNT_ALIGN); - ring->desc_size = ALIGN(desc_size, desc_align); + /* Descriptor size alignment in bytes */ + ring->desc_size = ALIGN(desc_size, VNIC_DESC_SIZE_ALIGN); ring->size = ring->desc_count * ring->desc_size; ring->size_unaligned = ring->size + ring->base_align; diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.h b/drivers/net/ethernet/cisco/enic/vnic_dev.h index 6273794b923b..7fdd8c661c99 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_dev.h +++ b/drivers/net/ethernet/cisco/enic/vnic_dev.h @@ -31,6 +31,11 @@ static inline void writeq(u64 val, void __iomem *reg) #undef pr_fmt #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define VNIC_DESC_SIZE_ALIGN 16 +#define VNIC_DESC_COUNT_ALIGN 32 +#define VNIC_DESC_BASE_ALIGN 512 +#define VNIC_DESC_MAX_COUNT 4096 + enum vnic_dev_intr_mode { VNIC_DEV_INTR_MODE_UNKNOWN, VNIC_DEV_INTR_MODE_INTX, -- cgit v1.2.3 From 632c9550b9993edb44c4f5a127f1298f99ef13f4 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 24 Apr 2024 16:13:23 +0100 Subject: net: lan743x: Correct spelling in comments Correct spelling in comments, as flagged by codespell. Signed-off-by: Simon Horman Reviewed-by: Daniel Machon Link: https://lore.kernel.org/r/20240424-lan743x-confirm-v2-1-f0480542e39f@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 4 ++-- drivers/net/ethernet/microchip/lan743x_ptp.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index d37a49cd5c69..cee47729d022 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -803,7 +803,7 @@ static int lan743x_mdiobus_read_c22(struct mii_bus *bus, int phy_id, int index) u32 val, mii_access; int ret; - /* comfirm MII not busy */ + /* confirm MII not busy */ ret = lan743x_mac_mii_wait_till_not_busy(adapter); if (ret < 0) return ret; @@ -868,7 +868,7 @@ static int lan743x_mdiobus_read_c45(struct mii_bus *bus, int phy_id, u32 mmd_access; int ret; - /* comfirm MII not busy */ + /* confirm MII not busy */ ret = lan743x_mac_mii_wait_till_not_busy(adapter); if (ret < 0) return ret; diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c index 2801f08bf1c9..80d9680b3830 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.c +++ b/drivers/net/ethernet/microchip/lan743x_ptp.c @@ -555,7 +555,7 @@ static int lan743x_ptp_perout(struct lan743x_adapter *adapter, int on, if (half == wf_high) { /* It's 50% match. Use the toggle option */ pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_TOGGLE_; - /* In this case, devide period value by 2 */ + /* In this case, divide period value by 2 */ ts_period = ns_to_timespec64(div_s64(period64, 2)); period_sec = ts_period.tv_sec; period_nsec = ts_period.tv_nsec; -- cgit v1.2.3 From 896e47f5f481999c23ffda228c947937407c2c9a Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 24 Apr 2024 16:13:24 +0100 Subject: net: lan966x: Correct spelling in comments Correct spelling in comments, as flagged by codespell. Signed-off-by: Simon Horman Reviewed-by: Daniel Machon Reviewed-by: Horatiu Vultur Link: https://lore.kernel.org/r/20240424-lan743x-confirm-v2-2-f0480542e39f@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_ifh.h | 2 +- drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 4 ++-- drivers/net/ethernet/microchip/lan966x/lan966x_main.h | 2 +- drivers/net/ethernet/microchip/lan966x/lan966x_port.c | 2 +- drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ifh.h b/drivers/net/ethernet/microchip/lan966x/lan966x_ifh.h index f3b1e0d31826..e706163ce9cc 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ifh.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ifh.h @@ -78,7 +78,7 @@ /* Classified internal priority for queuing */ #define IFH_POS_QOS_CLASS 100 -/* Bit mask with eight cpu copy classses */ +/* Bit mask with eight cpu copy classes */ #define IFH_POS_CPUQ 92 /* Relearn + learn flags (*) */ diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 2635ef8958c8..b7e75da65834 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -276,7 +276,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb, ++i; } - /* Inidcate EOF and valid bytes in the last word */ + /* Indicate EOF and valid bytes in the last word */ lan_wr(QS_INJ_CTRL_GAP_SIZE_SET(1) | QS_INJ_CTRL_VLD_BYTES_SET(skb->len < LAN966X_BUFFER_MIN_SZ ? 0 : last) | @@ -520,7 +520,7 @@ bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, struct sk_buff *skb) u32 val; /* The IGMP and MLD frames are not forward by the HW if - * multicast snooping is enabled, therefor don't mark as + * multicast snooping is enabled, therefore don't mark as * offload to allow the SW to forward the frames accordingly. */ val = lan_rd(lan966x, ANA_CPU_FWD_CFG(port)); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index caa9e0533c96..f8bebbcf77b2 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -326,7 +326,7 @@ struct lan966x { u8 base_mac[ETH_ALEN]; - spinlock_t tx_lock; /* lock for frame transmition */ + spinlock_t tx_lock; /* lock for frame transmission */ struct net_device *bridge; u16 bridge_mask; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 2e83bbb9477e..fdfa4040d9ee 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -88,7 +88,7 @@ static void lan966x_port_link_down(struct lan966x_port *port) SYS_FRONT_PORT_MODE_HDX_MODE, lan966x, SYS_FRONT_PORT_MODE(port->chip_port)); - /* 8: Flush the queues accociated with the port */ + /* 8: Flush the queues associated with the port */ lan_rmw(QSYS_SW_PORT_MODE_AGING_MODE_SET(3), QSYS_SW_PORT_MODE_AGING_MODE, lan966x, QSYS_SW_PORT_MODE(port->chip_port)); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c index 3c44660128da..fa34a739c748 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c @@ -157,7 +157,7 @@ void lan966x_vlan_port_apply(struct lan966x_port *port) pvid = lan966x_vlan_port_get_pvid(port); - /* Ingress clasification (ANA_PORT_VLAN_CFG) */ + /* Ingress classification (ANA_PORT_VLAN_CFG) */ /* Default vlan to classify for untagged frames (may be zero) */ val = ANA_VLAN_CFG_VLAN_VID_SET(pvid); if (port->vlan_aware) -- cgit v1.2.3 From 49c6e0a859f7ae2fbf8b93ba7b0151ef14302427 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 24 Apr 2024 16:13:25 +0100 Subject: net: encx24j600: Correct spelling in comments Correct spelling in comments, as flagged by codespell. Signed-off-by: Simon Horman Reviewed-by: Daniel Machon Link: https://lore.kernel.org/r/20240424-lan743x-confirm-v2-3-f0480542e39f@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/encx24j600-regmap.c | 4 ++-- drivers/net/ethernet/microchip/encx24j600.c | 6 ++++-- drivers/net/ethernet/microchip/encx24j600_hw.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c index 443128adbcb6..3885d6fbace1 100644 --- a/drivers/net/ethernet/microchip/encx24j600-regmap.c +++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c @@ -75,7 +75,7 @@ static int regmap_encx24j600_sfr_read(void *context, u8 reg, u8 *val, if (unlikely(ret)) return ret; } else { - /* Translate registers that are more effecient using + /* Translate registers that are more efficient using * 3-byte SPI commands */ switch (reg) { @@ -129,7 +129,7 @@ static int regmap_encx24j600_sfr_update(struct encx24j600_context *ctx, if (unlikely(ret)) return ret; } else { - /* Translate registers that are more effecient using + /* Translate registers that are more efficient using * 3-byte SPI commands */ switch (reg) { diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index cdc2872ace1b..b011bf5c2305 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -569,7 +569,7 @@ static void encx24j600_dump_config(struct encx24j600_priv *priv, pr_info(DRV_NAME " MABBIPG: %04X\n", encx24j600_read_reg(priv, MABBIPG)); - /* PHY configuation */ + /* PHY configuration */ pr_info(DRV_NAME " PHCON1: %04X\n", encx24j600_read_phy(priv, PHCON1)); pr_info(DRV_NAME " PHCON2: %04X\n", encx24j600_read_phy(priv, PHCON2)); pr_info(DRV_NAME " PHANA: %04X\n", encx24j600_read_phy(priv, PHANA)); @@ -837,7 +837,9 @@ static void encx24j600_hw_tx(struct encx24j600_priv *priv) dump_packet("TX", priv->tx_skb->len, priv->tx_skb->data); if (encx24j600_read_reg(priv, EIR) & TXABTIF) - /* Last transmition aborted due to error. Reset TX interface */ + /* Last transmission aborted due to error. + * Reset TX interface + */ encx24j600_reset_hw_tx(priv); /* Clear the TXIF flag if were previously set */ diff --git a/drivers/net/ethernet/microchip/encx24j600_hw.h b/drivers/net/ethernet/microchip/encx24j600_hw.h index 34c5a289898c..2522f4f48b67 100644 --- a/drivers/net/ethernet/microchip/encx24j600_hw.h +++ b/drivers/net/ethernet/microchip/encx24j600_hw.h @@ -243,7 +243,7 @@ int devm_regmap_init_encx24j600(struct device *dev, /* MAIPG */ /* value of the high byte is given by the reserved bits, - * value of the low byte is recomended setting of the + * value of the low byte is recommended setting of the * IPG parameter. */ #define MAIPGH_VAL 0x0C -- cgit v1.2.3 From d896a374378a2df373e855111434bbe2b3c521da Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 24 Apr 2024 16:13:26 +0100 Subject: net: sparx5: Correct spelling in comments Correct spelling in comments, as flagged by codespell. Signed-off-by: Simon Horman Reviewed-by: Daniel Machon Link: https://lore.kernel.org/r/20240424-lan743x-confirm-v2-4-f0480542e39f@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c | 2 +- drivers/net/ethernet/microchip/sparx5/sparx5_packet.c | 2 +- drivers/net/ethernet/microchip/sparx5/sparx5_port.c | 2 +- drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c | 2 +- drivers/net/ethernet/microchip/vcap/vcap_ag_api.h | 2 +- drivers/net/ethernet/microchip/vcap/vcap_api.c | 4 ++-- drivers/net/ethernet/microchip/vcap/vcap_api_client.h | 2 +- drivers/net/ethernet/microchip/vcap/vcap_api_private.h | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c index 141897dfe388..1915998f6079 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c @@ -143,7 +143,7 @@ static void sparx5_fdma_rx_activate(struct sparx5 *sparx5, struct sparx5_rx *rx) static void sparx5_fdma_rx_deactivate(struct sparx5 *sparx5, struct sparx5_rx *rx) { - /* Dectivate the RX channel */ + /* Deactivate the RX channel */ spx5_rmw(0, BIT(rx->channel_id) & FDMA_CH_ACTIVATE_CH_ACTIVATE, sparx5, FDMA_CH_ACTIVATE); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index ac7e1cffbcec..f3f5fb420468 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -67,7 +67,7 @@ static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap) for (i = 0; i < IFH_LEN; i++) ifh[i] = spx5_rd(sparx5, QS_XTR_RD(grp)); - /* Decode IFH (whats needed) */ + /* Decode IFH (what's needed) */ sparx5_ifh_parse(ifh, &fi); /* Map to port netdev */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index 60dd2fd603a8..062e486c002c 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -370,7 +370,7 @@ static int sparx5_port_disable(struct sparx5 *sparx5, struct sparx5_port *port, /* 6: Wait while the last frame is exiting the queues */ usleep_range(8 * spd_prm, 10 * spd_prm); - /* 7: Flush the queues accociated with the port->portno */ + /* 7: Flush the queues associated with the port->portno */ spx5_rmw(HSCH_FLUSH_CTRL_FLUSH_PORT_SET(port->portno) | HSCH_FLUSH_CTRL_FLUSH_DST_SET(1) | HSCH_FLUSH_CTRL_FLUSH_SRC_SET(1) | diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c index 4af85d108a06..0b4abc3eb53d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c @@ -190,7 +190,7 @@ static int sparx5_port_bridge_join(struct sparx5_port *port, /* Remove standalone port entry */ sparx5_mact_forget(sparx5, ndev->dev_addr, 0); - /* Port enters in bridge mode therefor don't need to copy to CPU + /* Port enters in bridge mode therefore don't need to copy to CPU * frames for multicast in case the bridge is not requesting them */ __dev_mc_unsync(ndev, sparx5_mc_unsync); diff --git a/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h b/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h index c3569a4c7b69..4735fad05708 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h @@ -290,7 +290,7 @@ enum vcap_keyfield_set { * Sparx5: TCP flag RST , LAN966x: TCP: TCP flag RST. PTP over UDP: messageType * bit 3 * VCAP_KF_L4_SEQUENCE_EQ0_IS: W1, sparx5: is2/es2, lan966x: is2 - * Set if TCP sequence number is 0, LAN966x: Overlayed with PTP over UDP: + * Set if TCP sequence number is 0, LAN966x: Overlaid with PTP over UDP: * messageType bit 0 * VCAP_KF_L4_SPORT: W16, sparx5: is0/is2/es2, lan966x: is1/is2 * TCP/UDP source port diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c index 80ae5e1708a6..2687765abe52 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c @@ -327,7 +327,7 @@ static int vcap_find_keystream_typegroup_sw(struct vcap_control *vctrl, } /* Verify that the typegroup information, subword count, keyset and type id - * are in sync and correct, return the list of matchin keysets + * are in sync and correct, return the list of matching keysets */ int vcap_find_keystream_keysets(struct vcap_control *vctrl, @@ -2943,7 +2943,7 @@ void vcap_netbytes_copy(u8 *dst, u8 *src, int count) } EXPORT_SYMBOL_GPL(vcap_netbytes_copy); -/* Convert validation error code into tc extact error message */ +/* Convert validation error code into tc extack error message */ void vcap_set_tc_exterr(struct flow_cls_offload *fco, struct vcap_rule *vrule) { switch (vrule->exterr) { diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h index 56874f2adbba..cdf79e17ca54 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h @@ -238,7 +238,7 @@ const struct vcap_set *vcap_keyfieldset(struct vcap_control *vctrl, /* Copy to host byte order */ void vcap_netbytes_copy(u8 *dst, u8 *src, int count); -/* Convert validation error code into tc extact error message */ +/* Convert validation error code into tc extack error message */ void vcap_set_tc_exterr(struct flow_cls_offload *fco, struct vcap_rule *vrule); /* Cleanup a VCAP instance */ diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_private.h b/drivers/net/ethernet/microchip/vcap/vcap_api_private.h index df81d9ff502b..844bdf6b5f45 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_private.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_private.h @@ -109,7 +109,7 @@ int vcap_addr_keysets(struct vcap_control *vctrl, struct net_device *ndev, struct vcap_keyset_list *kslist); /* Verify that the typegroup information, subword count, keyset and type id - * are in sync and correct, return the list of matchin keysets + * are in sync and correct, return the list of matching keysets */ int vcap_find_keystream_keysets(struct vcap_control *vctrl, enum vcap_type vt, u32 *keystream, u32 *mskstream, bool mask, -- cgit v1.2.3 From bcf303c62c98eb41e242bba8764f5804d9d4658b Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 24 Apr 2024 12:16:22 +0000 Subject: net: sparx5: flower: only do lookup if fragment flags are set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fragment lookup should only be performed, when at least one of the fragment flags are set. This change was deliberately not included in commit 68aba00483c7 ("net: sparx5: flower: fix fragment flags handling") as it's only needed for future proffing the code, since "mask" is currently only set in conjunction with the fragment flags. (The 3rd flag FLOW_DIS_ENCAPSULATION is only used with "key") Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Daniel Machon Tested-by: Daniel Machon Link: https://lore.kernel.org/r/20240424121632.459022-2-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c index 663571fe7b2d..ca9e69d39439 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c @@ -165,7 +165,7 @@ sparx5_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) flow_rule_match_control(st->frule, &mt); - if (mt.mask->flags) { + if (mt.mask->flags & (FLOW_DIS_IS_FRAGMENT | FLOW_DIS_FIRST_FRAG)) { u8 is_frag_key = !!(mt.key->flags & FLOW_DIS_IS_FRAGMENT); u8 is_frag_mask = !!(mt.mask->flags & FLOW_DIS_IS_FRAGMENT); u8 is_frag_idx = (is_frag_key << 1) | is_frag_mask; -- cgit v1.2.3 From 8cd1b6c0bf3170cd90bbe691546333d9a29c3419 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 24 Apr 2024 12:16:23 +0000 Subject: net: sparx5: flower: add extack to sparx5_tc_flower_handler_control_usage() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define extack locally, to reduce line lengths and aid future users. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Daniel Machon Tested-by: Daniel Machon Link: https://lore.kernel.org/r/20240424121632.459022-3-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c index ca9e69d39439..d0dca65f8e76 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c @@ -159,6 +159,7 @@ out: static int sparx5_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) { + struct netlink_ext_ack *extack = st->fco->common.extack; struct flow_match_control mt; u32 value, mask; int err = 0; @@ -178,7 +179,7 @@ sparx5_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) u8 vdt = sparx5_vcap_frag_map[is_frag_idx][first_frag_idx]; if (vdt == FRAG_INVAL) { - NL_SET_ERR_MSG_MOD(st->fco->common.extack, + NL_SET_ERR_MSG_MOD(extack, "Match on invalid fragment flag combination"); return -EINVAL; } @@ -199,7 +200,7 @@ sparx5_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) return err; out: - NL_SET_ERR_MSG_MOD(st->fco->common.extack, "ip_frag parse error"); + NL_SET_ERR_MSG_MOD(extack, "ip_frag parse error"); return err; } -- cgit v1.2.3 From b92eb1ac13f069f188b733709afabc26049bf6bf Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 24 Apr 2024 12:16:24 +0000 Subject: net: sparx5: flower: remove goto in sparx5_tc_flower_handler_control_usage() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove goto, as it's only used once, and the error message is specific to that context. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Daniel Machon Tested-by: Daniel Machon Link: https://lore.kernel.org/r/20240424121632.459022-4-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c index d0dca65f8e76..22f6c778afb0 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c @@ -191,17 +191,15 @@ sparx5_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) err = vcap_rule_add_key_u32(st->vrule, VCAP_KF_L3_FRAGMENT_TYPE, value, mask); - if (err) - goto out; + if (err) { + NL_SET_ERR_MSG_MOD(extack, "ip_frag parse error"); + return err; + } } st->used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL); return err; - -out: - NL_SET_ERR_MSG_MOD(extack, "ip_frag parse error"); - return err; } static int -- cgit v1.2.3 From 8ef631e9c995d397dafd1565b82e9ab933fa25a6 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 24 Apr 2024 12:16:25 +0000 Subject: net: sparx5: flower: check for unsupported control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use flow_rule_is_supp_control_flags() to reject filters with unsupported control flags. In case any unsupported control flags are masked, flow_rule_is_supp_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Daniel Machon Tested-by: Daniel Machon Link: https://lore.kernel.org/r/20240424121632.459022-5-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c index 22f6c778afb0..8d67d9f24c76 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c @@ -197,6 +197,11 @@ sparx5_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) } } + if (!flow_rule_is_supp_control_flags(FLOW_DIS_IS_FRAGMENT | + FLOW_DIS_FIRST_FRAG, + mt.mask->flags, extack)) + return -EOPNOTSUPP; + st->used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL); return err; -- cgit v1.2.3 From 505ccf890c21430f9ca70921debc3312aa813541 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 24 Apr 2024 12:53:38 +0000 Subject: net: lan966x: flower: add extack to lan966x_tc_flower_handler_control_usage() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define extack locally, to reduce line lengths and aid future users. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Jiri Pirko Reviewed-by: Horatiu Vultur Link: https://lore.kernel.org/r/20240424125347.461995-2-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c index d696cf9dbd19..a63b83fa2823 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c @@ -45,6 +45,7 @@ static bool lan966x_tc_is_known_etype(struct vcap_tc_flower_parse_usage *st, static int lan966x_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) { + struct netlink_ext_ack *extack = st->fco->common.extack; struct flow_match_control match; int err = 0; @@ -80,7 +81,7 @@ lan966x_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) return err; out: - NL_SET_ERR_MSG_MOD(st->fco->common.extack, "ip_frag parse error"); + NL_SET_ERR_MSG_MOD(extack, "ip_frag parse error"); return err; } -- cgit v1.2.3 From 12b8e129c40929551b2ce4035ca91c3c841863a2 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 24 Apr 2024 12:53:39 +0000 Subject: net: lan966x: flower: rename goto in lan966x_tc_flower_handler_control_usage() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename goto label, as the error message is specific to the fragment flags. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Jiri Pirko Reviewed-by: Horatiu Vultur Link: https://lore.kernel.org/r/20240424125347.461995-3-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c index a63b83fa2823..8baec0cd8d95 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c @@ -60,7 +60,7 @@ lan966x_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) VCAP_KF_L3_FRAGMENT, VCAP_BIT_0); if (err) - goto out; + goto bad_frag_out; } if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { @@ -73,14 +73,14 @@ lan966x_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) VCAP_KF_L3_FRAG_OFS_GT0, VCAP_BIT_1); if (err) - goto out; + goto bad_frag_out; } st->used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL); return err; -out: +bad_frag_out: NL_SET_ERR_MSG_MOD(extack, "ip_frag parse error"); return err; } -- cgit v1.2.3 From 8c65e27b42fc9a3646bc58ada1cb26eab043e515 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 24 Apr 2024 12:53:40 +0000 Subject: net: lan966x: flower: check for unsupported control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use flow_rule_is_supp_control_flags() to reject filters with unsupported control flags. In case any unsupported control flags are masked, flow_rule_is_supp_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Jiri Pirko Reviewed-by: Horatiu Vultur Link: https://lore.kernel.org/r/20240424125347.461995-4-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c index 8baec0cd8d95..43913d6204e1 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c @@ -76,6 +76,11 @@ lan966x_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st) goto bad_frag_out; } + if (!flow_rule_is_supp_control_flags(FLOW_DIS_IS_FRAGMENT | + FLOW_DIS_FIRST_FRAG, + match.mask->flags, extack)) + return -EOPNOTSUPP; + st->used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL); return err; -- cgit v1.2.3 From 15fd021bc4270273d8f4b7f58fdda8a16214a377 Mon Sep 17 00:00:00 2001 From: Song Yoong Siang Date: Wed, 24 Apr 2024 14:02:54 -0700 Subject: igc: Add Tx hardware timestamp request for AF_XDP zero-copy packet This patch adds support to per-packet Tx hardware timestamp request to AF_XDP zero-copy packet via XDP Tx metadata framework. Please note that user needs to enable Tx HW timestamp capability via igc_ioctl() with SIOCSHWTSTAMP cmd before sending xsk Tx hardware timestamp request. Same as implementation in RX timestamp XDP hints kfunc metadata, Timer 0 (adjustable clock) is used in xsk Tx hardware timestamp. i225/i226 have four sets of timestamping registers. *skb and *xsk_tx_buffer pointers are used to indicate whether the timestamping register is already occupied. Furthermore, a boolean variable named xsk_pending_ts is used to hold the transmit completion until the tx hardware timestamp is ready. This is because, for i225/i226, the timestamp notification event comes some time after the transmit completion event. The driver will retrigger hardware irq to clean the packet after retrieve the tx hardware timestamp. Besides, xsk_meta is added into struct igc_tx_timestamp_request as a hook to the metadata location of the transmit packet. When the Tx timestamp interrupt is fired, the interrupt handler will copy the value of Tx hwts into metadata location via xsk_tx_metadata_complete(). This patch is tested with tools/testing/selftests/bpf/xdp_hw_metadata on Intel ADL-S platform. Below are the test steps and results. Test Step 1: Run xdp_hw_metadata app ./xdp_hw_metadata > /dev/shm/result.log Test Step 2: Enable Tx hardware timestamp hwstamp_ctl -i -t 1 -r 1 Test Step 3: Run ptp4l and phc2sys for time synchronization Test Step 4: Generate UDP packets with 1ms interval for 10s trafgen --dev '{eth(da=), udp(dp=9091)}' -t 1ms -n 10000 Test Step 5: Rerun Step 1-3 with 10s iperf3 as background traffic Test Step 6: Rerun Step 1-4 with 10s iperf3 as background traffic Based on iperf3 results below, the impact of holding tx completion to throughput is not observable. Result of last UDP packet (no. 10000) in Step 4: poll: 1 (0) skip=99 fail=0 redir=10000 xsk_ring_cons__peek: 1 0x5640a37972d0: rx_desc[9999]->addr=f2110 addr=f2110 comp_addr=f2110 EoP rx_hash: 0x2049BE1D with RSS type:0x1 HW RX-time: 1679819246792971268 (sec:1679819246.7930) delta to User RX-time sec:0.0000 (14.990 usec) XDP RX-time: 1679819246792981987 (sec:1679819246.7930) delta to User RX-time sec:0.0000 (4.271 usec) No rx_vlan_tci or rx_vlan_proto, err=-95 0x5640a37972d0: ping-pong with csum=ab19 (want 315b) csum_start=34 csum_offset=6 0x5640a37972d0: complete tx idx=9999 addr=f010 HW TX-complete-time: 1679819246793036971 (sec:1679819246.7930) delta to User TX-complete-time sec:0.0001 (77.656 usec) XDP RX-time: 1679819246792981987 (sec:1679819246.7930) delta to User TX-complete-time sec:0.0001 (132.640 usec) HW RX-time: 1679819246792971268 (sec:1679819246.7930) delta to HW TX-complete-time sec:0.0001 (65.703 usec) 0x5640a37972d0: complete rx idx=10127 addr=f2110 Result of iperf3 without tx hwts request in step 5: [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 2.74 GBytes 2.36 Gbits/sec 0 sender [ 5] 0.00-10.05 sec 2.74 GBytes 2.34 Gbits/sec receiver Result of iperf3 running parallel with trafgen command in step 6: [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 2.74 GBytes 2.36 Gbits/sec 0 sender [ 5] 0.00-10.04 sec 2.74 GBytes 2.34 Gbits/sec receiver Co-developed-by: Lai Peter Jun Ann Signed-off-by: Lai Peter Jun Ann Signed-off-by: Song Yoong Siang Acked-by: John Fastabend Acked-by: Vinicius Costa Gomes Tested-by: Naama Meir Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240424210256.3440903-1-anthony.l.nguyen@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/igc/igc.h | 71 ++++++++++++------- drivers/net/ethernet/intel/igc/igc_main.c | 113 ++++++++++++++++++++++++++++-- drivers/net/ethernet/intel/igc/igc_ptp.c | 51 ++++++++++++-- 3 files changed, 195 insertions(+), 40 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 6bc56c7c181e..8b14c029eda1 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -72,13 +72,46 @@ struct igc_rx_packet_stats { u64 other_packets; }; +enum igc_tx_buffer_type { + IGC_TX_BUFFER_TYPE_SKB, + IGC_TX_BUFFER_TYPE_XDP, + IGC_TX_BUFFER_TYPE_XSK, +}; + +/* wrapper around a pointer to a socket buffer, + * so a DMA handle can be stored along with the buffer + */ +struct igc_tx_buffer { + union igc_adv_tx_desc *next_to_watch; + unsigned long time_stamp; + enum igc_tx_buffer_type type; + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; + unsigned int bytecount; + u16 gso_segs; + __be16 protocol; + + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + u32 tx_flags; + bool xsk_pending_ts; +}; + struct igc_tx_timestamp_request { - struct sk_buff *skb; /* reference to the packet being timestamped */ + union { /* reference to the packet being timestamped */ + struct sk_buff *skb; + struct igc_tx_buffer *xsk_tx_buffer; + }; + enum igc_tx_buffer_type buffer_type; unsigned long start; /* when the tstamp request started (jiffies) */ u32 mask; /* _TSYNCTXCTL_TXTT_{X} bit for this request */ u32 regl; /* which TXSTMPL_{X} register should be used */ u32 regh; /* which TXSTMPH_{X} register should be used */ u32 flags; /* flags that should be added to the tx_buffer */ + u8 xsk_queue_index; /* Tx queue which requesting timestamp */ + struct xsk_tx_metadata_compl xsk_meta; /* ref to xsk Tx metadata */ }; struct igc_inline_rx_tstamps { @@ -323,6 +356,9 @@ void igc_disable_tx_ring(struct igc_ring *ring); void igc_enable_tx_ring(struct igc_ring *ring); int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); +/* AF_XDP TX metadata operations */ +extern const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops; + /* igc_dump declarations */ void igc_rings_dump(struct igc_adapter *adapter); void igc_regs_dump(struct igc_adapter *adapter); @@ -508,32 +544,6 @@ enum igc_boards { #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGC_MAX_DATA_PER_TXD) #define DESC_NEEDED (MAX_SKB_FRAGS + 4) -enum igc_tx_buffer_type { - IGC_TX_BUFFER_TYPE_SKB, - IGC_TX_BUFFER_TYPE_XDP, - IGC_TX_BUFFER_TYPE_XSK, -}; - -/* wrapper around a pointer to a socket buffer, - * so a DMA handle can be stored along with the buffer - */ -struct igc_tx_buffer { - union igc_adv_tx_desc *next_to_watch; - unsigned long time_stamp; - enum igc_tx_buffer_type type; - union { - struct sk_buff *skb; - struct xdp_frame *xdpf; - }; - unsigned int bytecount; - u16 gso_segs; - __be16 protocol; - - DEFINE_DMA_UNMAP_ADDR(dma); - DEFINE_DMA_UNMAP_LEN(len); - u32 tx_flags; -}; - struct igc_rx_buffer { union { struct { @@ -557,6 +567,13 @@ struct igc_xdp_buff { struct igc_inline_rx_tstamps *rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */ }; +struct igc_metadata_request { + struct igc_tx_buffer *tx_buffer; + struct xsk_tx_metadata *meta; + struct igc_ring *tx_ring; + u32 cmd_type; +}; + struct igc_q_vector { struct igc_adapter *adapter; /* backlink */ void __iomem *itr_register; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index b529fb59f9d5..34c257a51ed1 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2873,6 +2873,89 @@ static void igc_update_tx_stats(struct igc_q_vector *q_vector, q_vector->tx.total_packets += packets; } +static void igc_xsk_request_timestamp(void *_priv) +{ + struct igc_metadata_request *meta_req = _priv; + struct igc_ring *tx_ring = meta_req->tx_ring; + struct igc_tx_timestamp_request *tstamp; + u32 tx_flags = IGC_TX_FLAGS_TSTAMP; + struct igc_adapter *adapter; + unsigned long lock_flags; + bool found = false; + int i; + + if (test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags)) { + adapter = netdev_priv(tx_ring->netdev); + + spin_lock_irqsave(&adapter->ptp_tx_lock, lock_flags); + + /* Search for available tstamp regs */ + for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { + tstamp = &adapter->tx_tstamp[i]; + + /* tstamp->skb and tstamp->xsk_tx_buffer are in union. + * When tstamp->skb is equal to NULL, + * tstamp->xsk_tx_buffer is equal to NULL as well. + * This condition means that the particular tstamp reg + * is not occupied by other packet. + */ + if (!tstamp->skb) { + found = true; + break; + } + } + + /* Return if no available tstamp regs */ + if (!found) { + adapter->tx_hwtstamp_skipped++; + spin_unlock_irqrestore(&adapter->ptp_tx_lock, + lock_flags); + return; + } + + tstamp->start = jiffies; + tstamp->xsk_queue_index = tx_ring->queue_index; + tstamp->xsk_tx_buffer = meta_req->tx_buffer; + tstamp->buffer_type = IGC_TX_BUFFER_TYPE_XSK; + + /* Hold the transmit completion until timestamp is ready */ + meta_req->tx_buffer->xsk_pending_ts = true; + + /* Keep the pointer to tx_timestamp, which is located in XDP + * metadata area. It is the location to store the value of + * tx hardware timestamp. + */ + xsk_tx_metadata_to_compl(meta_req->meta, &tstamp->xsk_meta); + + /* Set timestamp bit based on the _TSTAMP(_X) bit. */ + tx_flags |= tstamp->flags; + meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, + IGC_TX_FLAGS_TSTAMP, + (IGC_ADVTXD_MAC_TSTAMP)); + meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, + IGC_TX_FLAGS_TSTAMP_1, + (IGC_ADVTXD_TSTAMP_REG_1)); + meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, + IGC_TX_FLAGS_TSTAMP_2, + (IGC_ADVTXD_TSTAMP_REG_2)); + meta_req->cmd_type |= IGC_SET_FLAG(tx_flags, + IGC_TX_FLAGS_TSTAMP_3, + (IGC_ADVTXD_TSTAMP_REG_3)); + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, lock_flags); + } +} + +static u64 igc_xsk_fill_timestamp(void *_priv) +{ + return *(u64 *)_priv; +} + +const struct xsk_tx_metadata_ops igc_xsk_tx_metadata_ops = { + .tmo_request_timestamp = igc_xsk_request_timestamp, + .tmo_fill_timestamp = igc_xsk_fill_timestamp, +}; + static void igc_xdp_xmit_zc(struct igc_ring *ring) { struct xsk_buff_pool *pool = ring->xsk_pool; @@ -2894,24 +2977,34 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) budget = igc_desc_unused(ring); while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { - u32 cmd_type, olinfo_status; + struct igc_metadata_request meta_req; + struct xsk_tx_metadata *meta = NULL; struct igc_tx_buffer *bi; + u32 olinfo_status; dma_addr_t dma; - cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | - IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | - xdp_desc.len; + meta_req.cmd_type = IGC_ADVTXD_DTYP_DATA | + IGC_ADVTXD_DCMD_DEXT | + IGC_ADVTXD_DCMD_IFCS | + IGC_TXD_DCMD | xdp_desc.len; olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT; dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr); + meta = xsk_buff_get_metadata(pool, xdp_desc.addr); xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len); + bi = &ring->tx_buffer_info[ntu]; + + meta_req.tx_ring = ring; + meta_req.tx_buffer = bi; + meta_req.meta = meta; + xsk_tx_metadata_request(meta, &igc_xsk_tx_metadata_ops, + &meta_req); tx_desc = IGC_TX_DESC(ring, ntu); - tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + tx_desc->read.cmd_type_len = cpu_to_le32(meta_req.cmd_type); tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); tx_desc->read.buffer_addr = cpu_to_le64(dma); - bi = &ring->tx_buffer_info[ntu]; bi->type = IGC_TX_BUFFER_TYPE_XSK; bi->protocol = 0; bi->bytecount = xdp_desc.len; @@ -2974,6 +3067,13 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) break; + /* Hold the completions while there's a pending tx hardware + * timestamp request from XDP Tx metadata. + */ + if (tx_buffer->type == IGC_TX_BUFFER_TYPE_XSK && + tx_buffer->xsk_pending_ts) + break; + /* clear next_to_watch to prevent false hangs */ tx_buffer->next_to_watch = NULL; @@ -6802,6 +6902,7 @@ static int igc_probe(struct pci_dev *pdev, netdev->netdev_ops = &igc_netdev_ops; netdev->xdp_metadata_ops = &igc_xdp_metadata_ops; + netdev->xsk_tx_metadata_ops = &igc_xsk_tx_metadata_ops; igc_ethtool_set_ops(netdev); netdev->watchdog_timeo = 5 * HZ; diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 885faaa7b9de..1bb026232efc 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -11,6 +11,7 @@ #include #include #include +#include #define INCVALUE_MASK 0x7fffffff #define ISGN 0x80000000 @@ -545,6 +546,30 @@ static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter) wr32(IGC_TSYNCRXCTL, val); } +static void igc_ptp_free_tx_buffer(struct igc_adapter *adapter, + struct igc_tx_timestamp_request *tstamp) +{ + if (tstamp->buffer_type == IGC_TX_BUFFER_TYPE_XSK) { + /* Release the transmit completion */ + tstamp->xsk_tx_buffer->xsk_pending_ts = false; + + /* Note: tstamp->skb and tstamp->xsk_tx_buffer are in union. + * By setting tstamp->xsk_tx_buffer to NULL, tstamp->skb will + * become NULL as well. + */ + tstamp->xsk_tx_buffer = NULL; + tstamp->buffer_type = 0; + + /* Trigger txrx interrupt for transmit completion */ + igc_xsk_wakeup(adapter->netdev, tstamp->xsk_queue_index, 0); + + return; + } + + dev_kfree_skb_any(tstamp->skb); + tstamp->skb = NULL; +} + static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter) { unsigned long flags; @@ -555,8 +580,8 @@ static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter) for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) { struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i]; - dev_kfree_skb_any(tstamp->skb); - tstamp->skb = NULL; + if (tstamp->skb) + igc_ptp_free_tx_buffer(adapter, tstamp); } spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); @@ -657,8 +682,9 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, static void igc_ptp_tx_timeout(struct igc_adapter *adapter, struct igc_tx_timestamp_request *tstamp) { - dev_kfree_skb_any(tstamp->skb); - tstamp->skb = NULL; + if (tstamp->skb) + igc_ptp_free_tx_buffer(adapter, tstamp); + adapter->tx_hwtstamp_timeouts++; netdev_warn(adapter->netdev, "Tx timestamp timeout\n"); @@ -729,10 +755,21 @@ static void igc_ptp_tx_reg_to_stamp(struct igc_adapter *adapter, shhwtstamps.hwtstamp = ktime_add_ns(shhwtstamps.hwtstamp, adjust); - tstamp->skb = NULL; + /* Copy the tx hardware timestamp into xdp metadata or skb */ + if (tstamp->buffer_type == IGC_TX_BUFFER_TYPE_XSK) { + struct xsk_buff_pool *xsk_pool; + + xsk_pool = adapter->tx_ring[tstamp->xsk_queue_index]->xsk_pool; + if (xsk_pool && xp_tx_metadata_enabled(xsk_pool)) { + xsk_tx_metadata_complete(&tstamp->xsk_meta, + &igc_xsk_tx_metadata_ops, + &shhwtstamps.hwtstamp); + } + } else { + skb_tstamp_tx(skb, &shhwtstamps); + } - skb_tstamp_tx(skb, &shhwtstamps); - dev_kfree_skb_any(skb); + igc_ptp_free_tx_buffer(adapter, tstamp); } /** -- cgit v1.2.3 From d63394abc923093423c141d4049b72aa403fff07 Mon Sep 17 00:00:00 2001 From: Tanmay Patil Date: Thu, 25 Apr 2024 16:01:42 +0530 Subject: net: ethernet: ti: am65-cpsw-qos: Add support to taprio for past base_time If the base-time for taprio is in the past, start the schedule at the time of the form "base_time + N*cycle_time" where N is the smallest possible integer such that the above time is in the future. Signed-off-by: Tanmay Patil Signed-off-by: Chintan Vankar Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-qos.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index 16f192a5b160..fa96db7c1a13 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -837,6 +838,7 @@ static int am65_cpsw_taprio_replace(struct net_device *ndev, struct am65_cpsw_port *port = am65_ndev_to_port(ndev); struct am65_cpts *cpts = common->cpts; struct am65_cpsw_est *est_new; + u64 cur_time, n; int ret, tact; if (!netif_running(ndev)) { @@ -888,13 +890,21 @@ static int am65_cpsw_taprio_replace(struct net_device *ndev, if (tact == TACT_PROG) am65_cpsw_timer_stop(ndev); - if (!est_new->taprio.base_time) - est_new->taprio.base_time = am65_cpts_ns_gettime(cpts); - am65_cpsw_port_est_get_buf_num(ndev, est_new); am65_cpsw_est_set_sched_list(ndev, est_new); am65_cpsw_port_est_assign_buf_num(ndev, est_new->buf); + /* If the base-time is in the past, start schedule from the time: + * base_time + (N*cycle_time) + * where N is the smallest possible integer such that the above + * time is in the future. + */ + cur_time = am65_cpts_ns_gettime(cpts); + if (est_new->taprio.base_time < cur_time) { + n = div64_u64(cur_time - est_new->taprio.base_time, est_new->taprio.cycle_time); + est_new->taprio.base_time += (n + 1) * est_new->taprio.cycle_time; + } + am65_cpsw_est_set(ndev, 1); if (tact == TACT_PROG) { -- cgit v1.2.3 From e28d8aba4381a7b056baef2e8c1422a72dcde0b5 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 26 Apr 2024 14:42:22 +0200 Subject: mlxsw: pci: Handle up to 64 Rx completions in tasklet We can get many completions in one interrupt. Currently, the CQ tasklet handles up to half queue size completions, and then arms the hardware to generate additional events, which means that in case that there were additional completions that we did not handle, we will get immediately an additional interrupt to handle the rest. The decision to handle up to half of the queue size is arbitrary and was determined in 2015, when mlxsw driver was added to the kernel. One additional fact that should be taken into account is that while WQEs from RDQ are handled, the CPU that handles the tasklet is dedicated for this task, which means that we might hold the CPU for a long time. Handle WQEs in smaller chucks, then arm CQ doorbell to notify the hardware to send additional notifications. Set the chunk size to 64 as this number is recommended using NAPI and the driver will use NAPI in a next patch. Note that for now we use ARM doorbell to retrigger CQ tasklet, but with NAPI it will be more efficient as software will reschedule the poll method and we will not involve hardware for that. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 13fd067c39ed..8668947400ab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -652,12 +652,13 @@ static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) return elem; } +#define MLXSW_PCI_CQ_MAX_HANDLE 64 + static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) { struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); struct mlxsw_pci_queue *rdq = q->cq.dq; struct mlxsw_pci *mlxsw_pci = q->pci; - int credits = q->count >> 1; int items = 0; char *cqe; @@ -683,7 +684,7 @@ static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, wqe_counter, q->cq.v, ncqe); - if (++items == credits) + if (++items == MLXSW_PCI_CQ_MAX_HANDLE) break; } -- cgit v1.2.3 From 6b3d015cdb2aee8361e061387d70fdc8f4dd0b9a Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 26 Apr 2024 14:42:23 +0200 Subject: mlxsw: pci: Ring RDQ and CQ doorbells once per several completions Currently, for each CQE in CQ, we ring CQ doorbell, then handle RDQ and ring RDQ doorbell. Finally we ring CQ arm doorbell - once per CQ tasklet. The idea of ringing CQ doorbell before RDQ doorbell, is to be sure that when we post new WQE (after RDQ is handled), there is an available CQE. This was done because of a hardware bug as part of commit c9ebea04cb1b ("mlxsw: pci: Ring CQ's doorbell before RDQ's"). There is no real reason to ring RDQ and CQ doorbells for each completion, it is better to handle several completions and reduce number of ringings, as access to hardware is expensive (time wise) and might take time because of memory barriers. A previous patch changed CQ tasklet to handle up to 64 Rx packets. With this limitation, we can ring CQ and RDQ doorbells once per CQ tasklet. The counters of the doorbells are increased by the amount of packets that we handled, then the device will know for which completion to send an additional event. To avoid reordering CQ and RDQ doorbells' ring, let the tasklet to ring also RDQ doorbell, mlxsw_pci_cqe_rdq_handle() handles the counter but does not ring the doorbell. Note that with this change there is no need to copy the CQE, as we ring CQ doorbell only after Rx packet processing (which uses the CQE) is done. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 8668947400ab..2094b802d8d5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -630,9 +630,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); out: - /* Everything is set up, ring doorbell to pass elem to HW */ q->producer_counter++; - mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); return; } @@ -666,7 +664,6 @@ static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); u8 sendq = mlxsw_pci_cqe_sr_get(q->cq.v, cqe); u8 dqn = mlxsw_pci_cqe_dqn_get(q->cq.v, cqe); - char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; if (unlikely(sendq)) { WARN_ON_ONCE(1); @@ -678,16 +675,15 @@ static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) continue; } - memcpy(ncqe, cqe, q->elem_size); - mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); - mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, - wqe_counter, q->cq.v, ncqe); + wqe_counter, q->cq.v, cqe); if (++items == MLXSW_PCI_CQ_MAX_HANDLE) break; } + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, rdq); mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); } -- cgit v1.2.3 From 5d01ed2e970812a5e1947ef2ce421a2fce68c45b Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 26 Apr 2024 14:42:24 +0200 Subject: mlxsw: pci: Initialize dummy net devices for NAPI mlxsw will use NAPI for event processing in a next patch. As preparation, add two dummy net devices and initialize them. NAPI instance should be attached to net device. Usually each queue is used by a single net device in network drivers, so the mapping between net device to NAPI instance is intuitive. In our case, Rx queues are not per port, they are per trap-group. Tx queues are mapped to net devices, but we do not have a separate queue for each local port, several ports share the same queue. Use init_dummy_netdev() to initialize dummy net devices for NAPI. To run NAPI poll method in a kernel thread, the net device which NAPI instance is attached to should be marked as 'threaded'. It is recommended to handle Tx packets in softIRQ context, as usually this is a short task - just free the Tx packet which has been transmitted. Rx packets handling is more complicated task, so drivers can use a dedicated kernel thread to process them. It allows processing packets from different Rx queues in parallel. We would like to handle only Rx packets in kernel threads, which means that we will use two dummy net devices (one for Rx and one for Tx). Set only one of them with 'threaded' as it will be used for Rx processing. Do not fail in case that setting 'threaded' fails, as it is better to use regular softIRQ NAPI rather than preventing the driver from loading. Note that the net devices are initialized with init_dummy_netdev(), so they are not registered, which means that they will not be visible to user. It will not be possible to change 'threaded' configuration from user space, but it is reasonable in our case, as there is no another configuration which makes sense, considering that user has no influence on the usage of each queue. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 41 +++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 2094b802d8d5..ec54b876dfd9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -127,8 +127,42 @@ struct mlxsw_pci { u8 num_cqs; /* Number of CQs */ u8 num_sdqs; /* Number of SDQs */ bool skip_reset; + struct net_device *napi_dev_tx; + struct net_device *napi_dev_rx; }; +static int mlxsw_pci_napi_devs_init(struct mlxsw_pci *mlxsw_pci) +{ + int err; + + mlxsw_pci->napi_dev_tx = alloc_netdev_dummy(0); + if (!mlxsw_pci->napi_dev_tx) + return -ENOMEM; + strscpy(mlxsw_pci->napi_dev_tx->name, "mlxsw_tx", + sizeof(mlxsw_pci->napi_dev_tx->name)); + + mlxsw_pci->napi_dev_rx = alloc_netdev_dummy(0); + if (!mlxsw_pci->napi_dev_rx) { + err = -ENOMEM; + goto err_alloc_rx; + } + strscpy(mlxsw_pci->napi_dev_rx->name, "mlxsw_rx", + sizeof(mlxsw_pci->napi_dev_rx->name)); + dev_set_threaded(mlxsw_pci->napi_dev_rx, true); + + return 0; + +err_alloc_rx: + free_netdev(mlxsw_pci->napi_dev_tx); + return err; +} + +static void mlxsw_pci_napi_devs_fini(struct mlxsw_pci *mlxsw_pci) +{ + free_netdev(mlxsw_pci->napi_dev_rx); + free_netdev(mlxsw_pci->napi_dev_tx); +} + static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q) { tasklet_schedule(&q->tasklet); @@ -1721,6 +1755,10 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_requery_resources; + err = mlxsw_pci_napi_devs_init(mlxsw_pci); + if (err) + goto err_napi_devs_init; + err = mlxsw_pci_aqs_init(mlxsw_pci, mbox); if (err) goto err_aqs_init; @@ -1738,6 +1776,8 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, err_request_eq_irq: mlxsw_pci_aqs_fini(mlxsw_pci); err_aqs_init: + mlxsw_pci_napi_devs_fini(mlxsw_pci); +err_napi_devs_init: err_requery_resources: err_config_profile: err_cqe_v_check: @@ -1765,6 +1805,7 @@ static void mlxsw_pci_fini(void *bus_priv) free_irq(pci_irq_vector(mlxsw_pci->pdev, 0), mlxsw_pci); mlxsw_pci_aqs_fini(mlxsw_pci); + mlxsw_pci_napi_devs_fini(mlxsw_pci); mlxsw_pci_fw_area_fini(mlxsw_pci); mlxsw_pci_free_irq_vectors(mlxsw_pci); } -- cgit v1.2.3 From c0d9267873bc0960f65ea43cca72d63dbe34202f Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 26 Apr 2024 14:42:25 +0200 Subject: mlxsw: pci: Reorganize 'mlxsw_pci_queue' structure The next patch will set the driver to use NAPI for event processing. Then tasklet mechanism will be used only for EQ. Reorganize 'mlxsw_pci_queue' to hold EQ and CQ attributes in a union. For now, add tasklet for both EQ and CQ. This will be changed in the next patch, as 'tasklet_struct' will be replaced with NAPI instance. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 76 +++++++++++++++---------------- 1 file changed, 38 insertions(+), 38 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index ec54b876dfd9..7724f9a61479 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -82,12 +82,17 @@ struct mlxsw_pci_queue { u8 num; /* queue number */ u8 elem_size; /* size of one element */ enum mlxsw_pci_queue_type type; - struct tasklet_struct tasklet; /* queue processing tasklet */ struct mlxsw_pci *pci; - struct { - enum mlxsw_pci_cqe_v v; - struct mlxsw_pci_queue *dq; - } cq; + union { + struct { + enum mlxsw_pci_cqe_v v; + struct mlxsw_pci_queue *dq; + struct tasklet_struct tasklet; + } cq; + struct { + struct tasklet_struct tasklet; + } eq; + } u; }; struct mlxsw_pci_queue_type_group { @@ -163,11 +168,6 @@ static void mlxsw_pci_napi_devs_fini(struct mlxsw_pci *mlxsw_pci) free_netdev(mlxsw_pci->napi_dev_tx); } -static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q) -{ - tasklet_schedule(&q->tasklet); -} - static char *__mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q, size_t elem_size, int elem_index) { @@ -324,7 +324,7 @@ static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, return err; cq = mlxsw_pci_cq_get(mlxsw_pci, cq_num); - cq->cq.dq = q; + cq->u.cq.dq = q; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); return 0; } @@ -433,7 +433,7 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, return err; cq = mlxsw_pci_cq_get(mlxsw_pci, cq_num); - cq->cq.dq = q; + cq->u.cq.dq = q; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); @@ -455,7 +455,7 @@ rollback: elem_info = mlxsw_pci_queue_elem_info_get(q, i); mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info); } - cq->cq.dq = NULL; + cq->u.cq.dq = NULL; mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num); return err; @@ -477,12 +477,12 @@ static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci, static void mlxsw_pci_cq_pre_init(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q) { - q->cq.v = mlxsw_pci->max_cqe_ver; + q->u.cq.v = mlxsw_pci->max_cqe_ver; - if (q->cq.v == MLXSW_PCI_CQE_V2 && + if (q->u.cq.v == MLXSW_PCI_CQE_V2 && q->num < mlxsw_pci->num_sdqs && !mlxsw_core_sdq_supports_cqe_v2(mlxsw_pci->core)) - q->cq.v = MLXSW_PCI_CQE_V1; + q->u.cq.v = MLXSW_PCI_CQE_V1; } static unsigned int mlxsw_pci_read32_off(struct mlxsw_pci *mlxsw_pci, @@ -676,7 +676,7 @@ static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); elem = elem_info->elem; - owner_bit = mlxsw_pci_cqe_owner_get(q->cq.v, elem); + owner_bit = mlxsw_pci_cqe_owner_get(q->u.cq.v, elem); if (mlxsw_pci_elem_hw_owned(q, owner_bit)) return NULL; q->consumer_counter++; @@ -688,16 +688,16 @@ static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) { - struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); - struct mlxsw_pci_queue *rdq = q->cq.dq; + struct mlxsw_pci_queue *q = from_tasklet(q, t, u.cq.tasklet); + struct mlxsw_pci_queue *rdq = q->u.cq.dq; struct mlxsw_pci *mlxsw_pci = q->pci; int items = 0; char *cqe; while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) { u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); - u8 sendq = mlxsw_pci_cqe_sr_get(q->cq.v, cqe); - u8 dqn = mlxsw_pci_cqe_dqn_get(q->cq.v, cqe); + u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe); + u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe); if (unlikely(sendq)) { WARN_ON_ONCE(1); @@ -710,7 +710,7 @@ static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) } mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, - wqe_counter, q->cq.v, cqe); + wqe_counter, q->u.cq.v, cqe); if (++items == MLXSW_PCI_CQ_MAX_HANDLE) break; @@ -723,8 +723,8 @@ static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) static void mlxsw_pci_cq_tx_tasklet(struct tasklet_struct *t) { - struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); - struct mlxsw_pci_queue *sdq = q->cq.dq; + struct mlxsw_pci_queue *q = from_tasklet(q, t, u.cq.tasklet); + struct mlxsw_pci_queue *sdq = q->u.cq.dq; struct mlxsw_pci *mlxsw_pci = q->pci; int credits = q->count >> 1; int items = 0; @@ -732,8 +732,8 @@ static void mlxsw_pci_cq_tx_tasklet(struct tasklet_struct *t) while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) { u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); - u8 sendq = mlxsw_pci_cqe_sr_get(q->cq.v, cqe); - u8 dqn = mlxsw_pci_cqe_dqn_get(q->cq.v, cqe); + u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe); + u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe); char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; if (unlikely(!sendq)) { @@ -750,7 +750,7 @@ static void mlxsw_pci_cq_tx_tasklet(struct tasklet_struct *t) mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, - wqe_counter, q->cq.v, ncqe); + wqe_counter, q->u.cq.v, ncqe); if (++items == credits) break; @@ -777,10 +777,10 @@ static void mlxsw_pci_cq_tasklet_setup(struct mlxsw_pci_queue *q, { switch (cq_type) { case MLXSW_PCI_CQ_SDQ: - tasklet_setup(&q->tasklet, mlxsw_pci_cq_tx_tasklet); + tasklet_setup(&q->u.cq.tasklet, mlxsw_pci_cq_tx_tasklet); break; case MLXSW_PCI_CQ_RDQ: - tasklet_setup(&q->tasklet, mlxsw_pci_cq_rx_tasklet); + tasklet_setup(&q->u.cq.tasklet, mlxsw_pci_cq_rx_tasklet); break; } } @@ -796,13 +796,13 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, for (i = 0; i < q->count; i++) { char *elem = mlxsw_pci_queue_elem_get(q, i); - mlxsw_pci_cqe_owner_set(q->cq.v, elem, 1); + mlxsw_pci_cqe_owner_set(q->u.cq.v, elem, 1); } - if (q->cq.v == MLXSW_PCI_CQE_V1) + if (q->u.cq.v == MLXSW_PCI_CQE_V1) mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1); - else if (q->cq.v == MLXSW_PCI_CQE_V2) + else if (q->u.cq.v == MLXSW_PCI_CQE_V2) mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox, MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2); @@ -831,13 +831,13 @@ static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci, static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q) { - return q->cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT : + return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT : MLXSW_PCI_CQE01_COUNT; } static u8 mlxsw_pci_cq_elem_size(const struct mlxsw_pci_queue *q) { - return q->cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_SIZE : + return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_SIZE : MLXSW_PCI_CQE01_SIZE; } @@ -860,7 +860,7 @@ static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q) static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) { unsigned long active_cqns[BITS_TO_LONGS(MLXSW_PCI_CQS_MAX)]; - struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet); + struct mlxsw_pci_queue *q = from_tasklet(q, t, u.eq.tasklet); struct mlxsw_pci *mlxsw_pci = q->pci; int credits = q->count >> 1; u8 cqn, cq_count; @@ -886,7 +886,7 @@ static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) cq_count = mlxsw_pci->num_cqs; for_each_set_bit(cqn, active_cqns, cq_count) { q = mlxsw_pci_cq_get(mlxsw_pci, cqn); - mlxsw_pci_queue_tasklet_schedule(q); + tasklet_schedule(&q->u.cq.tasklet); } } @@ -922,7 +922,7 @@ static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, err = mlxsw_cmd_sw2hw_eq(mlxsw_pci->core, mbox, q->num); if (err) return err; - tasklet_setup(&q->tasklet, mlxsw_pci_eq_tasklet); + tasklet_setup(&q->u.eq.tasklet, mlxsw_pci_eq_tasklet); mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); return 0; @@ -1483,7 +1483,7 @@ static irqreturn_t mlxsw_pci_eq_irq_handler(int irq, void *dev_id) struct mlxsw_pci_queue *q; q = mlxsw_pci_eq_get(mlxsw_pci); - mlxsw_pci_queue_tasklet_schedule(q); + tasklet_schedule(&q->u.eq.tasklet); return IRQ_HANDLED; } -- cgit v1.2.3 From 3b0b3019dbea1a110ff01896e00fe30804bf78bc Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 26 Apr 2024 14:42:26 +0200 Subject: mlxsw: pci: Use NAPI for event processing Spectrum ASICs only support a single interrupt, that means that all the events are handled by one IRQ (interrupt request) handler. Once an interrupt is received, we schedule tasklet to handle events from EQ and then schedule tasklets to handle completions from CQs. Tasklet runs in softIRQ (software IRQ) context, and will be run on the same CPU which scheduled it. That means that today we use only one CPU to handle all the packets (both network packets and EMADs) from hardware. This can be improved using NAPI. The idea is to use NAPI instance per CQ, which is mapped 1:1 to DQ (RDQ or SDQ). NAPI poll method can be run in kernel thread, so then the driver will be able to handle WQEs in several CPUs. Convert the existing code to use NAPI APIs. Add NAPI instance as part of 'struct mlxsw_pci_queue' and initialize it as part of CQs initialization. Set the appropriate poll method and dummy net device, according to queue number, similar to tasklet setup. For CQs which are used for completions of RDQ, use Rx poll method and 'napi_dev_rx', which is set as 'threaded'. It means that Rx poll method will run in kernel context, so several RDQs will be handled in parallel. For CQs which are used for completions of SDQ, use Tx poll method and 'napi_dev_tx', this method will run in softIRQ context, as it is recommended in NAPI documentation, as Tx packets' processing is short task. Convert mlxsw_pci_cq_{rx,tx}_tasklet() to poll methods. Handle 'budget' argument - ignore it in Tx poll method, as it is recommended to not limit Tx processing. For Rx processing, handle up to 'budget' completions. Return 'work_done' which is the amount of completions that were handled. Handle the following cases: 1. After processing 'budget' completions, the driver still has work to do: Return work-done = budget. In that case, the NAPI instance will be polled again (without the need to be rescheduled). Do not re-arm the queue, as NAPI will handle the reschedule, so we do not have to involve hardware to send an additional interrupt for the completions that should be processed. 2. Event processing has been completed: Call napi_complete_done() to mark NAPI processing as completed, which means that the poll method will not be rescheduled. Re-arm the queue, as all completions were handled. In case that poll method handled exactly 'budget' completions, return work-done = budget -1, to distinguish from the case that driver still has completions to handle. Otherwise, return the amount of completions that were handled. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 96 +++++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 7724f9a61479..bf66d996e32e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -87,7 +87,7 @@ struct mlxsw_pci_queue { struct { enum mlxsw_pci_cqe_v v; struct mlxsw_pci_queue *dq; - struct tasklet_struct tasklet; + struct napi_struct napi; } cq; struct { struct tasklet_struct tasklet; @@ -684,16 +684,29 @@ static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) return elem; } -#define MLXSW_PCI_CQ_MAX_HANDLE 64 +static bool mlxsw_pci_cq_cqe_to_handle(struct mlxsw_pci_queue *q) +{ + struct mlxsw_pci_queue_elem_info *elem_info; + bool owner_bit; -static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) + elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + owner_bit = mlxsw_pci_cqe_owner_get(q->u.cq.v, elem_info->elem); + return !mlxsw_pci_elem_hw_owned(q, owner_bit); +} + +static int mlxsw_pci_napi_poll_cq_rx(struct napi_struct *napi, int budget) { - struct mlxsw_pci_queue *q = from_tasklet(q, t, u.cq.tasklet); + struct mlxsw_pci_queue *q = container_of(napi, struct mlxsw_pci_queue, + u.cq.napi); struct mlxsw_pci_queue *rdq = q->u.cq.dq; struct mlxsw_pci *mlxsw_pci = q->pci; - int items = 0; + int work_done = 0; char *cqe; + /* If the budget is 0, Rx processing should be skipped. */ + if (unlikely(!budget)) + return 0; + while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) { u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe); @@ -712,22 +725,44 @@ static void mlxsw_pci_cq_rx_tasklet(struct tasklet_struct *t) mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, wqe_counter, q->u.cq.v, cqe); - if (++items == MLXSW_PCI_CQ_MAX_HANDLE) + if (++work_done == budget) break; } mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, rdq); - mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); + + if (work_done < budget) + goto processing_completed; + + /* The driver still has outstanding work to do, budget was exhausted. + * Return exactly budget. In that case, the NAPI instance will be polled + * again. + */ + if (mlxsw_pci_cq_cqe_to_handle(q)) + goto out; + + /* The driver processed all the completions and handled exactly + * 'budget'. Return 'budget - 1' to distinguish from the case that + * driver still has completions to handle. + */ + if (work_done == budget) + work_done--; + +processing_completed: + if (napi_complete_done(napi, work_done)) + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); +out: + return work_done; } -static void mlxsw_pci_cq_tx_tasklet(struct tasklet_struct *t) +static int mlxsw_pci_napi_poll_cq_tx(struct napi_struct *napi, int budget) { - struct mlxsw_pci_queue *q = from_tasklet(q, t, u.cq.tasklet); + struct mlxsw_pci_queue *q = container_of(napi, struct mlxsw_pci_queue, + u.cq.napi); struct mlxsw_pci_queue *sdq = q->u.cq.dq; struct mlxsw_pci *mlxsw_pci = q->pci; - int credits = q->count >> 1; - int items = 0; + int work_done = 0; char *cqe; while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) { @@ -752,11 +787,21 @@ static void mlxsw_pci_cq_tx_tasklet(struct tasklet_struct *t) mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, wqe_counter, q->u.cq.v, ncqe); - if (++items == credits) - break; + work_done++; } + /* If the budget is 0 napi_complete_done() should never be called. */ + if (unlikely(!budget)) + goto processing_completed; + + work_done = min(work_done, budget - 1); + if (unlikely(!napi_complete_done(napi, work_done))) + goto out; + +processing_completed: mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); +out: + return work_done; } static enum mlxsw_pci_cq_type @@ -772,17 +817,29 @@ mlxsw_pci_cq_type(const struct mlxsw_pci *mlxsw_pci, return MLXSW_PCI_CQ_RDQ; } -static void mlxsw_pci_cq_tasklet_setup(struct mlxsw_pci_queue *q, - enum mlxsw_pci_cq_type cq_type) +static void mlxsw_pci_cq_napi_setup(struct mlxsw_pci_queue *q, + enum mlxsw_pci_cq_type cq_type) { + struct mlxsw_pci *mlxsw_pci = q->pci; + switch (cq_type) { case MLXSW_PCI_CQ_SDQ: - tasklet_setup(&q->u.cq.tasklet, mlxsw_pci_cq_tx_tasklet); + netif_napi_add(mlxsw_pci->napi_dev_tx, &q->u.cq.napi, + mlxsw_pci_napi_poll_cq_tx); break; case MLXSW_PCI_CQ_RDQ: - tasklet_setup(&q->u.cq.tasklet, mlxsw_pci_cq_rx_tasklet); + netif_napi_add(mlxsw_pci->napi_dev_rx, &q->u.cq.napi, + mlxsw_pci_napi_poll_cq_rx); break; } + + napi_enable(&q->u.cq.napi); +} + +static void mlxsw_pci_cq_napi_teardown(struct mlxsw_pci_queue *q) +{ + napi_disable(&q->u.cq.napi); + netif_napi_del(&q->u.cq.napi); } static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, @@ -817,7 +874,7 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num); if (err) return err; - mlxsw_pci_cq_tasklet_setup(q, mlxsw_pci_cq_type(mlxsw_pci, q)); + mlxsw_pci_cq_napi_setup(q, mlxsw_pci_cq_type(mlxsw_pci, q)); mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); return 0; @@ -826,6 +883,7 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q) { + mlxsw_pci_cq_napi_teardown(q); mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num); } @@ -886,7 +944,7 @@ static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t) cq_count = mlxsw_pci->num_cqs; for_each_set_bit(cqn, active_cqns, cq_count) { q = mlxsw_pci_cq_get(mlxsw_pci, cqn); - tasklet_schedule(&q->u.cq.tasklet); + napi_schedule(&q->u.cq.napi); } } -- cgit v1.2.3 From e8dfd42c17faf183415323db1ef0c977be0d6489 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Apr 2024 15:19:52 +0000 Subject: ipv6: introduce dst_rt6_info() helper Instead of (struct rt6_info *)dst casts, we can use : #define dst_rt6_info(_ptr) \ container_of_const(_ptr, struct rt6_info, dst) Some places needed missing const qualifiers : ip6_confirm_neigh(), ipv6_anycast_destination(), ipv6_unicast_destination(), has_gateway() v2: added missing parts (David Ahern) Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/infiniband/core/addr.c | 6 ++--- .../net/ethernet/mellanox/mlxsw/spectrum_span.c | 2 +- drivers/net/vrf.c | 2 +- drivers/net/vxlan/vxlan_core.c | 2 +- drivers/s390/net/qeth_core.h | 4 ++-- include/net/ip6_fib.h | 6 +++-- include/net/ip6_route.h | 11 ++++----- net/bluetooth/6lowpan.c | 2 +- net/core/dst_cache.c | 2 +- net/core/filter.c | 2 +- net/ipv4/ip_tunnel.c | 2 +- net/ipv6/icmp.c | 8 +++---- net/ipv6/ila/ila_lwt.c | 4 ++-- net/ipv6/ip6_output.c | 18 +++++++------- net/ipv6/ip6mr.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 4 ++-- net/ipv6/route.c | 28 +++++++++++----------- net/ipv6/tcp_ipv6.c | 4 +--- net/ipv6/udp.c | 11 ++++----- net/ipv6/xfrm6_policy.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- net/mpls/mpls_iptunnel.c | 2 +- net/netfilter/ipvs/ip_vs_xmit.c | 14 +++++------ net/netfilter/nf_flow_table_core.c | 8 ++----- net/netfilter/nf_flow_table_ip.c | 4 ++-- net/netfilter/nft_rt.c | 2 +- net/sctp/ipv6.c | 2 +- net/xfrm/xfrm_policy.c | 3 +-- 30 files changed, 77 insertions(+), 86 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index f253295795f0..f20dfe70fa0e 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -348,15 +348,15 @@ static int dst_fetch_ha(const struct dst_entry *dst, static bool has_gateway(const struct dst_entry *dst, sa_family_t family) { - struct rtable *rt; - struct rt6_info *rt6; + const struct rtable *rt; + const struct rt6_info *rt6; if (family == AF_INET) { rt = container_of(dst, struct rtable, dst); return rt->rt_uses_gateway; } - rt6 = container_of(dst, struct rt6_info, dst); + rt6 = dst_rt6_info(dst); return rt6->rt6i_flags & RTF_GATEWAY; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 3de69b2eb5c4..4b5fd71c897d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -540,7 +540,7 @@ mlxsw_sp_span_gretap6_route(const struct net_device *to_dev, if (!dst || dst->error) goto out; - rt6 = container_of(dst, struct rt6_info, dst); + rt6 = dst_rt6_info(dst); dev = dst->dev; *saddrp = fl6.saddr; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 66f8542f3b18..784b9b2d275e 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -653,7 +653,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, skb->dev = dev; rcu_read_lock(); - nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + nexthop = rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 4bbfb516ac05..a9a85d0f5a5d 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2513,7 +2513,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } if (!info) { - u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags; + u32 rt6i_flags = dst_rt6_info(ndst)->rt6i_flags; err = encap_bypass_if_local(skb, dev, vxlan, AF_INET6, dst_port, ifindex, vni, diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 613eab729704..5f17a2a5d0e3 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -956,7 +956,7 @@ static inline struct dst_entry *qeth_dst_check_rcu(struct sk_buff *skb, struct dst_entry *dst = skb_dst(skb); struct rt6_info *rt; - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (dst) { if (proto == htons(ETH_P_IPV6)) dst = dst_check(dst, rt6_get_cookie(rt)); @@ -978,7 +978,7 @@ static inline __be32 qeth_next_hop_v4_rcu(struct sk_buff *skb, static inline struct in6_addr *qeth_next_hop_v6_rcu(struct sk_buff *skb, struct dst_entry *dst) { - struct rt6_info *rt = (struct rt6_info *) dst; + struct rt6_info *rt = dst_rt6_info(dst); if (rt && !ipv6_addr_any(&rt->rt6i_gateway)) return &rt->rt6i_gateway; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 7834f7f29d3c..6cb867ce4878 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -234,9 +234,11 @@ struct fib6_result { for (rt = (w)->leaf; rt; \ rt = rcu_dereference_protected(rt->fib6_next, 1)) -static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) +#define dst_rt6_info(_ptr) container_of_const(_ptr, struct rt6_info, dst) + +static inline struct inet6_dev *ip6_dst_idev(const struct dst_entry *dst) { - return ((struct rt6_info *)dst)->rt6i_idev; + return dst_rt6_info(dst)->rt6i_idev; } static inline bool fib6_requires_src(const struct fib6_info *rt) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a30c6aa9e5cf..a18ed24fed94 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -210,12 +210,11 @@ void rt6_uncached_list_del(struct rt6_info *rt); static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) { const struct dst_entry *dst = skb_dst(skb); - const struct rt6_info *rt6 = NULL; if (dst) - rt6 = container_of(dst, struct rt6_info, dst); + return dst_rt6_info(dst); - return rt6; + return NULL; } /* @@ -227,7 +226,7 @@ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, { struct ipv6_pinfo *np = inet6_sk(sk); - np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst); + np->dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); sk_setup_caps(sk, dst); np->daddr_cache = daddr; #ifdef CONFIG_IPV6_SUBTREES @@ -240,7 +239,7 @@ void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst, static inline bool ipv6_unicast_destination(const struct sk_buff *skb) { - struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); + const struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); return rt->rt6i_flags & RTF_LOCAL; } @@ -248,7 +247,7 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb) static inline bool ipv6_anycast_destination(const struct dst_entry *dst, const struct in6_addr *daddr) { - struct rt6_info *rt = (struct rt6_info *)dst; + const struct rt6_info *rt = dst_rt6_info(dst); return rt->rt6i_flags & RTF_ANYCAST || (rt->rt6i_dst.plen < 127 && diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 27520a8a486f..50cfec8ccac4 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -133,7 +133,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev, struct in6_addr *daddr, struct sk_buff *skb) { - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int count = atomic_read(&dev->peer_count); const struct in6_addr *nexthop; struct lowpan_peer *peer; diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c index 0ccfd5fa5cb9..b17171345d64 100644 --- a/net/core/dst_cache.c +++ b/net/core/dst_cache.c @@ -112,7 +112,7 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, idst = this_cpu_ptr(dst_cache->cache); dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst, - rt6_get_cookie((struct rt6_info *)dst)); + rt6_get_cookie(dst_rt6_info(dst))); idst->in6_saddr = *saddr; } EXPORT_SYMBOL_GPL(dst_cache_set_ip6); diff --git a/net/core/filter.c b/net/core/filter.c index 294670d3850d..5662464e1abd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2215,7 +2215,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, rcu_read_lock(); if (!nh) { dst = skb_dst(skb); - nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), + nexthop = rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr); } else { nexthop = &nh->ipv6_nh; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 177f40c3a8e8..ba46cf7612f4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -543,7 +543,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, struct rt6_info *rt6; __be32 daddr; - rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) : + rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) : NULL; daddr = md ? dst : tunnel->parms.iph.daddr; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1635da07285f..d285c1f6f1a6 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -212,7 +212,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { res = true; } else { - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt = dst_rt6_info(dst); int tmo = net->ipv6.sysctl.icmpv6_time; struct inet_peer *peer; @@ -241,7 +241,7 @@ static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type, dst = ip6_route_output(net, sk, fl6); if (!dst->error) { - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt = dst_rt6_info(dst); struct in6_addr prefsrc; rt6_get_prefsrc(rt, &prefsrc); @@ -616,7 +616,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), - &ipc6, &fl6, (struct rt6_info *)dst, + &ipc6, &fl6, dst_rt6_info(dst), MSG_DONTWAIT)) { ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); @@ -803,7 +803,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) if (ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), &ipc6, &fl6, - (struct rt6_info *)dst, MSG_DONTWAIT)) { + dst_rt6_info(dst), MSG_DONTWAIT)) { __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 8c1ce78956ba..0601bad79822 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -38,7 +38,7 @@ static inline struct ila_params *ila_params_lwtunnel( static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); - struct rt6_info *rt = (struct rt6_info *)orig_dst; + struct rt6_info *rt = dst_rt6_info(orig_dst); struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate); struct dst_entry *dst; int err = -EINVAL; @@ -70,7 +70,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = orig_dst->dev->ifindex; fl6.flowi6_iif = LOOPBACK_IFINDEX; - fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst, + fl6.daddr = *rt6_nexthop(dst_rt6_info(orig_dst), &ip6h->daddr); dst = ip6_route_output(net, NULL, &fl6); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b9dd3a66e423..8f906e9fbc38 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -120,7 +120,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); rcu_read_lock(); - nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); + nexthop = rt6_nexthop(dst_rt6_info(dst), daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); if (unlikely(IS_ERR_OR_NULL(neigh))) { @@ -599,7 +599,7 @@ int ip6_forward(struct sk_buff *skb) * send a redirect. */ - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) target = &rt->rt6i_gateway; else @@ -856,7 +856,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { struct sk_buff *frag; - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; bool mono_delivery_time = skb->mono_delivery_time; @@ -1063,7 +1063,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, return NULL; } - rt = (struct rt6_info *)dst; + rt = dst_rt6_info(dst); /* Yes, checking route validity in not connected * case is not very simple. Take into account, * that we do not support routing by source, TOS, @@ -1118,7 +1118,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, struct rt6_info *rt; *dst = ip6_route_output(net, sk, fl6); - rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; + rt = (*dst)->error ? NULL : dst_rt6_info(*dst); rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; @@ -1159,7 +1159,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ - rt = (struct rt6_info *) *dst; + rt = dst_rt6_info(*dst); rcu_read_lock(); n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); @@ -1423,7 +1423,7 @@ static int __ip6_append_data(struct sock *sk, int offset = 0; bool zc = false; u32 tskey = 0; - struct rt6_info *rt = (struct rt6_info *)cork->dst; + struct rt6_info *rt = dst_rt6_info(cork->dst); bool paged, hold_tskey, extra_uref = false; struct ipv6_txoptions *opt = v6_cork->opt; int csummode = CHECKSUM_NONE; @@ -1877,7 +1877,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = v6_cork->opt; - struct rt6_info *rt = (struct rt6_info *)cork->base.dst; + struct rt6_info *rt = dst_rt6_info(cork->base.dst); struct flowi6 *fl6 = &cork->fl.u.ip6; unsigned char proto = fl6->flowi6_proto; @@ -1949,7 +1949,7 @@ out: int ip6_send_skb(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int err; err = ip6_local_out(net, skb->sk, skb); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index cb0ee81a068a..dd342e6ecf3f 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2273,7 +2273,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, int err; struct mr_table *mrt; struct mfc6_cache *cache; - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); if (!mrt) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ae134634c323..d914b23256ce 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1722,7 +1722,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) if (IS_ERR(dst)) return; - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) { ND_PRINTK(2, warn, diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index ef2059c88955..88b3fcacd4f9 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -154,7 +154,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false); if (IS_ERR(dst)) return PTR_ERR(dst); - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0d896ca7b589..2eedf255600b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -598,7 +598,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct ipv6hdr *iph; struct sk_buff *skb; int err; - struct rt6_info *rt = (struct rt6_info *)*dstp; + struct rt6_info *rt = dst_rt6_info(*dstp); int hlen = LL_RESERVED_SPACE(rt->dst.dev); int tlen = rt->dst.dev->needed_tailroom; @@ -917,7 +917,7 @@ back_from_confirm: ipc6.opt = opt; lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, - len, 0, &ipc6, &fl6, (struct rt6_info *)dst, + len, 0, &ipc6, &fl6, dst_rt6_info(dst), msg->msg_flags); if (err) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1f4b935a0e57..3e0b2cb20fd2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -226,7 +226,7 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr) { - const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); + const struct rt6_info *rt = dst_rt6_info(dst); return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any), dst->dev, skb, daddr); @@ -234,8 +234,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) { + const struct rt6_info *rt = dst_rt6_info(dst); struct net_device *dev = dst->dev; - struct rt6_info *rt = (struct rt6_info *)dst; daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr); if (!daddr) @@ -354,7 +354,7 @@ EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt = dst_rt6_info(dst); struct fib6_info *from; struct inet6_dev *idev; @@ -373,7 +373,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt = dst_rt6_info(dst); struct inet6_dev *idev = rt->rt6i_idev; if (idev && idev->dev != blackhole_netdev) { @@ -1288,7 +1288,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup); if (dst->error == 0) - return (struct rt6_info *) dst; + return dst_rt6_info(dst); dst_release(dst); @@ -2647,7 +2647,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, rcu_read_lock(); dst = ip6_route_output_flags_noref(net, sk, fl6, flags); - rt6 = (struct rt6_info *)dst; + rt6 = dst_rt6_info(dst); /* For dst cached in uncached_list, refcnt is already taken. */ if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) { dst = &net->ipv6.ip6_null_entry->dst; @@ -2661,7 +2661,7 @@ EXPORT_SYMBOL_GPL(ip6_route_output_flags); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; + struct rt6_info *rt, *ort = dst_rt6_info(dst_orig); struct net_device *loopback_dev = net->loopback_dev; struct dst_entry *new = NULL; @@ -2744,7 +2744,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, struct fib6_info *from; struct rt6_info *rt; - rt = container_of(dst, struct rt6_info, dst); + rt = dst_rt6_info(dst); if (rt->sernum) return rt6_is_valid(rt) ? dst : NULL; @@ -2772,7 +2772,7 @@ EXPORT_INDIRECT_CALLABLE(ip6_dst_check); static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) { - struct rt6_info *rt = (struct rt6_info *) dst; + struct rt6_info *rt = dst_rt6_info(dst); if (rt) { if (rt->rt6i_flags & RTF_CACHE) { @@ -2796,7 +2796,7 @@ static void ip6_link_failure(struct sk_buff *skb) icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); - rt = (struct rt6_info *) skb_dst(skb); + rt = dst_rt6_info(skb_dst(skb)); if (rt) { rcu_read_lock(); if (rt->rt6i_flags & RTF_CACHE) { @@ -2852,7 +2852,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, bool confirm_neigh) { const struct in6_addr *daddr, *saddr; - struct rt6_info *rt6 = (struct rt6_info *)dst; + struct rt6_info *rt6 = dst_rt6_info(dst); /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU) * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it. @@ -4174,7 +4174,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu } } - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_REJECT) { net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); return; @@ -5608,7 +5608,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, int iif, int type, u32 portid, u32 seq, unsigned int flags) { - struct rt6_info *rt6 = (struct rt6_info *)dst; + struct rt6_info *rt6 = dst_rt6_info(dst); struct rt6key *rt6_dst, *rt6_src; u32 *pmetrics, table, rt6_flags; unsigned char nh_flags = 0; @@ -6111,7 +6111,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, } - rt = container_of(dst, struct rt6_info, dst); + rt = dst_rt6_info(dst); if (rt->dst.error) { err = rt->dst.error; ip6_rt_put(rt); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 77958adf2e16..37201c4fb393 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -97,11 +97,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); if (dst && dst_hold_safe(dst)) { - const struct rt6_info *rt = (const struct rt6_info *)dst; - rcu_assign_pointer(sk->sk_rx_dst, dst); sk->sk_rx_dst_ifindex = skb->skb_iif; - sk->sk_rx_dst_cookie = rt6_get_cookie(rt); + sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 085ee236d9a1..674eadfae569 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -910,11 +910,8 @@ start_lookup: static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { - if (udp_sk_rx_dst_set(sk, dst)) { - const struct rt6_info *rt = (const struct rt6_info *)dst; - - sk->sk_rx_dst_cookie = rt6_get_cookie(rt); - } + if (udp_sk_rx_dst_set(sk, dst)) + sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); } /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and @@ -1585,7 +1582,7 @@ back_from_confirm: skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, - (struct rt6_info *)dst, + dst_rt6_info(dst), msg->msg_flags, &cork); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) @@ -1612,7 +1609,7 @@ do_append_data: ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), - &ipc6, fl6, (struct rt6_info *)dst, + &ipc6, fl6, dst_rt6_info(dst), corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4891012b692f..7924e08ee142 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -70,7 +70,7 @@ static int xfrm6_get_saddr(struct net *net, int oif, static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { - struct rt6_info *rt = (struct rt6_info *)xdst->route; + struct rt6_info *rt = dst_rt6_info(xdst->route); xdst->u.dst.dev = dev; netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 7bf14cf9ffaa..8780ec64f376 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -630,7 +630,7 @@ back_from_confirm: ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, - &fl6, (struct rt6_info *)dst, + &fl6, dst_rt6_info(dst), msg->msg_flags); if (err) ip6_flush_pending_frames(sk); diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 8fc790f2a01b..606349c8df0e 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -90,7 +90,7 @@ static int mpls_xmit(struct sk_buff *skb) ttl = net->mpls.default_ttl; else ttl = ipv6_hdr(skb)->hop_limit; - rt6 = (struct rt6_info *)dst; + rt6 = dst_rt6_info(dst); } else { goto drop; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 39b5fd6bbf65..6e8b9d100ad2 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -180,7 +180,7 @@ static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb, (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && (addr_type & IPV6_ADDR_LOOPBACK); old_rt_is_local = __ip_vs_is_local_route6( - (struct rt6_info *)skb_dst(skb)); + dst_rt6_info(skb_dst(skb))); } else #endif { @@ -481,7 +481,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (dest) { dest_dst = __ip_vs_dst_check(dest); if (likely(dest_dst)) - rt = (struct rt6_info *) dest_dst->dst_cache; + rt = dst_rt6_info(dest_dst->dst_cache); else { u32 cookie; @@ -501,7 +501,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, ip_vs_dest_dst_free(dest_dst); goto err_unreach; } - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); cookie = rt6_get_cookie(rt); __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); spin_unlock_bh(&dest->dst_lock); @@ -517,7 +517,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, rt_mode); if (!dst) goto err_unreach; - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); } local = __ip_vs_is_local_route6(rt); @@ -862,7 +862,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_RDR); if (local < 0) goto tx_error; - rt = (struct rt6_info *) skb_dst(skb); + rt = dst_rt6_info(skb_dst(skb)); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed @@ -1288,7 +1288,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (local) return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); - rt = (struct rt6_info *) skb_dst(skb); + rt = dst_rt6_info(skb_dst(skb)); tdev = rt->dst.dev; /* @@ -1590,7 +1590,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); if (local < 0) goto tx_error; - rt = (struct rt6_info *) skb_dst(skb); + rt = dst_rt6_info(skb_dst(skb)); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index a0571339239c..5c1ff07eaee0 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -77,12 +77,8 @@ EXPORT_SYMBOL_GPL(flow_offload_alloc); static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) { - const struct rt6_info *rt; - - if (flow_tuple->l3proto == NFPROTO_IPV6) { - rt = (const struct rt6_info *)flow_tuple->dst_cache; - return rt6_get_cookie(rt); - } + if (flow_tuple->l3proto == NFPROTO_IPV6) + return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache)); return 0; } diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 5383bed3d3e0..100887beed31 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -729,7 +729,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, return NF_ACCEPT; if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { - rt = (struct rt6_info *)tuplehash->tuple.dst_cache; + rt = dst_rt6_info(tuplehash->tuple.dst_cache); memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); IP6CB(skb)->iif = skb->dev->ifindex; IP6CB(skb)->flags = IP6SKB_FORWARDED; @@ -741,7 +741,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: - rt = (struct rt6_info *)tuplehash->tuple.dst_cache; + rt = dst_rt6_info(tuplehash->tuple.dst_cache); outdev = rt->dst.dev; skb->dev = outdev; nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 24d977138572..2434c624aafd 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -80,7 +80,7 @@ void nft_rt_get_eval(const struct nft_expr *expr, if (nft_pf(pkt) != NFPROTO_IPV6) goto err; - memcpy(dest, rt6_nexthop((struct rt6_info *)dst, + memcpy(dest, rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr), sizeof(struct in6_addr)); break; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 24368f755ab1..f7b809c0d142 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -415,7 +415,7 @@ out: if (!IS_ERR_OR_NULL(dst)) { struct rt6_info *rt; - rt = (struct rt6_info *)dst; + rt = dst_rt6_info(dst); t->dst_cookie = rt6_get_cookie(rt); pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n", &rt->rt6i_dst.addr, rt->rt6i_dst.plen, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6affe5cd85d8..1a41650d9d7b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2598,8 +2598,7 @@ static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { if (dst->ops->family == AF_INET6) { - struct rt6_info *rt = (struct rt6_info *)dst; - path->path_cookie = rt6_get_cookie(rt); + path->path_cookie = rt6_get_cookie(dst_rt6_info(dst)); path->u.rt6.rt6i_nfheader_len = nfheader_len; } } -- cgit v1.2.3 From 45f54a9106265c3bbcf3fa9ebe510cbb10e933da Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 26 Apr 2024 17:17:53 +0100 Subject: net: mvpp2: use phylink_pcs_change() to report PCS link change events Use phylink_pcs_change() when reporting changes in PCS link state to phylink as the interrupts are informing us about changes to the PCS state. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/E1s0OGn-009hgf-G6@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 23adf53c2aa1..19253a0fb4fe 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3434,12 +3434,13 @@ static void mvpp2_isr_handle_ptp(struct mvpp2_port *port) mvpp2_isr_handle_ptp_queue(port, 1); } -static void mvpp2_isr_handle_link(struct mvpp2_port *port, bool link) +static void mvpp2_isr_handle_link(struct mvpp2_port *port, + struct phylink_pcs *pcs, bool link) { struct net_device *dev = port->dev; if (port->phylink) { - phylink_mac_change(port->phylink, link); + phylink_pcs_change(pcs, link); return; } @@ -3472,7 +3473,7 @@ static void mvpp2_isr_handle_xlg(struct mvpp2_port *port) if (val & MVPP22_XLG_INT_STAT_LINK) { val = readl(port->base + MVPP22_XLG_STATUS); link = (val & MVPP22_XLG_STATUS_LINK_UP); - mvpp2_isr_handle_link(port, link); + mvpp2_isr_handle_link(port, &port->pcs_xlg, link); } } @@ -3488,7 +3489,7 @@ static void mvpp2_isr_handle_gmac_internal(struct mvpp2_port *port) if (val & MVPP22_GMAC_INT_STAT_LINK) { val = readl(port->base + MVPP2_GMAC_STATUS0); link = (val & MVPP2_GMAC_STATUS0_LINK_UP); - mvpp2_isr_handle_link(port, link); + mvpp2_isr_handle_link(port, &port->pcs_gmac, link); } } } -- cgit v1.2.3 From 21c8e45acbdbe62404b3120e4bd7975de8e38172 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 26 Apr 2024 17:17:58 +0100 Subject: net: mvneta: use phylink_pcs_change() to report PCS link change events Use phylink_pcs_change() when reporting changes in PCS link state to phylink as the interrupts are informing us about changes to the PCS state. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/E1s0OGs-009hgl-Jg@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 40a5f1431e4e..26bf5d47ba02 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3259,7 +3259,8 @@ static void mvneta_link_change(struct mvneta_port *pp) { u32 gmac_stat = mvreg_read(pp, MVNETA_GMAC_STATUS); - phylink_mac_change(pp->phylink, !!(gmac_stat & MVNETA_GMAC_LINK_UP)); + phylink_pcs_change(&pp->phylink_pcs, + !!(gmac_stat & MVNETA_GMAC_LINK_UP)); } /* NAPI handler -- cgit v1.2.3 From e47e5e85da3abfc68b3e2f574285234b3fff6fc0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 26 Apr 2024 17:18:03 +0100 Subject: net: prestera: use phylink_pcs_change() to report PCS link change events Use phylink_pcs_change() when reporting changes in PCS link state to phylink as the interrupts are informing us about changes to the PCS state. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/E1s0OGx-009hgr-NP@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/prestera_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 4fb886c57cd7..ba6d53ac7f55 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -821,7 +821,7 @@ static void prestera_port_handle_event(struct prestera_switch *sw, if (port->state_mac.oper) { if (port->phy_link) - phylink_mac_change(port->phy_link, true); + phylink_pcs_change(&port->phylink_pcs, true); else netif_carrier_on(port->dev); @@ -829,7 +829,7 @@ static void prestera_port_handle_event(struct prestera_switch *sw, queue_delayed_work(prestera_wq, caching_dw, 0); } else { if (port->phy_link) - phylink_mac_change(port->phy_link, false); + phylink_pcs_change(&port->phylink_pcs, false); else if (netif_running(port->dev) && netif_carrier_ok(port->dev)) netif_carrier_off(port->dev); -- cgit v1.2.3 From dd1941f801bc958d2ee13f5be8b38db6b034b806 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 26 Apr 2024 17:18:08 +0100 Subject: net: txgbe: use phylink_pcs_change() to report PCS link change events Use phylink_pcs_change() when reporting changes in PCS link state to phylink as the interrupts are informing us about changes to the PCS state. Signed-off-by: Russell King (Oracle) Acked-by: Jiawen Wu Link: https://lore.kernel.org/r/E1s0OH2-009hgx-Qw@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 93295916b1d2..5f502265f0a6 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -302,7 +302,7 @@ irqreturn_t txgbe_link_irq_handler(int irq, void *data) status = rd32(wx, TXGBE_CFG_PORT_ST); up = !!(status & TXGBE_CFG_PORT_ST_LINK_UP); - phylink_mac_change(wx->phylink, up); + phylink_pcs_change(&txgbe->xpcs->pcs, up); return IRQ_HANDLED; } -- cgit v1.2.3 From b92379dc94c1ee52aa92a64bd4487c1a81db7254 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 27 Apr 2024 09:26:02 +0200 Subject: i40e: Remove flags field from i40e_veb The field is initialized always to zero and it is never read. Remove it. Reviewed-by: Michal Schmidt Reviewed-by: Aleksandr Loktionov Reviewed-by: Kalesh AP Tested-by: Pucha Himasekhar Reddy Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e.h | 3 +-- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 13 +++++-------- 3 files changed, 7 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 2fbabcdb5bb5..5248e78f7849 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -788,7 +788,6 @@ struct i40e_veb { u16 stats_idx; /* index of VEB parent */ u8 enabled_tc; u16 bridge_mode; /* Bridge Mode (VEB/VEPA) */ - u16 flags; u16 bw_limit; u8 bw_max_quanta; bool is_abs_credits; @@ -1213,7 +1212,7 @@ void i40e_vsi_stop_rings(struct i40e_vsi *vsi); void i40e_vsi_stop_rings_no_wait(struct i40e_vsi *vsi); int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi); int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count); -struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid, +struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 uplink_seid, u16 downlink_seid, u8 enabled_tc); void i40e_veb_release(struct i40e_veb *veb); diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index f9ba45f596c9..6147c5f128e8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -867,7 +867,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, goto command_write_done; } - veb = i40e_veb_setup(pf, 0, uplink_seid, vsi_seid, enabled_tc); + veb = i40e_veb_setup(pf, uplink_seid, vsi_seid, enabled_tc); if (veb) dev_info(&pf->pdev->dev, "added relay %d\n", veb->seid); else diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index bcc99a6676f9..e27b2aa544b6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13127,7 +13127,7 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, /* Insert a new HW bridge */ if (!veb) { - veb = i40e_veb_setup(pf, 0, vsi->uplink_seid, vsi->seid, + veb = i40e_veb_setup(pf, vsi->uplink_seid, vsi->seid, vsi->tc_config.enabled_tc); if (veb) { veb->bridge_mode = mode; @@ -14383,10 +14383,10 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, } if (vsi->uplink_seid == pf->mac_seid) - veb = i40e_veb_setup(pf, 0, pf->mac_seid, vsi->seid, + veb = i40e_veb_setup(pf, pf->mac_seid, vsi->seid, vsi->tc_config.enabled_tc); else if ((vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) - veb = i40e_veb_setup(pf, 0, vsi->uplink_seid, vsi->seid, + veb = i40e_veb_setup(pf, vsi->uplink_seid, vsi->seid, vsi->tc_config.enabled_tc); if (veb) { if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) { @@ -14780,7 +14780,6 @@ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi) /** * i40e_veb_setup - Set up a VEB * @pf: board private structure - * @flags: VEB setup flags * @uplink_seid: the switch element to link to * @vsi_seid: the initial VSI seid * @enabled_tc: Enabled TC bit-map @@ -14793,9 +14792,8 @@ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi) * Returns pointer to the successfully allocated VEB sw struct on * success, otherwise returns NULL on failure. **/ -struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, - u16 uplink_seid, u16 vsi_seid, - u8 enabled_tc) +struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 uplink_seid, + u16 vsi_seid, u8 enabled_tc) { struct i40e_vsi *vsi = NULL; struct i40e_veb *veb; @@ -14826,7 +14824,6 @@ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, if (veb_idx < 0) goto err_alloc; veb = pf->veb[veb_idx]; - veb->flags = flags; veb->uplink_seid = uplink_seid; veb->enabled_tc = (enabled_tc ? enabled_tc : 0x1); -- cgit v1.2.3 From 54c4664e48eea52f2b296c73ddb8f5629b958678 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 27 Apr 2024 09:26:03 +0200 Subject: i40e: Refactor argument of several client notification functions Commit 0ef2d5afb12d ("i40e: KISS the client interface") simplified the client interface so in practice it supports only one client per i40e netdev. But we have still 2 notification functions that uses as parameter a pointer to VSI of netdevice associated with the client. After the mentioned commit only possible and used VSI is the main (LAN) VSI. So refactor these functions so they are called with PF pointer argument and the associated VSI (LAN) is taken inside them. Reviewed-by: Michal Schmidt Tested-by: Pucha Himasekhar Reddy Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e.h | 4 ++-- drivers/net/ethernet/intel/i40e/i40e_client.c | 20 ++++++++++---------- drivers/net/ethernet/intel/i40e/i40e_main.c | 12 +++++------- 3 files changed, 17 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 5248e78f7849..0792c7324527 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1236,8 +1236,8 @@ static inline void i40e_dbg_exit(void) {} int i40e_lan_add_device(struct i40e_pf *pf); int i40e_lan_del_device(struct i40e_pf *pf); void i40e_client_subtask(struct i40e_pf *pf); -void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi); -void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset); +void i40e_notify_client_of_l2_param_changes(struct i40e_pf *pf); +void i40e_notify_client_of_netdev_close(struct i40e_pf *pf, bool reset); void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs); void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id); void i40e_client_update_msix_info(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index b32071ee84af..93e52138826e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -101,25 +101,26 @@ i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len) /** * i40e_notify_client_of_l2_param_changes - call the client notify callback - * @vsi: the VSI with l2 param changes + * @pf: PF device pointer * - * If there is a client to this VSI, call the client + * If there is a client, call its callback **/ -void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) +void i40e_notify_client_of_l2_param_changes(struct i40e_pf *pf) { - struct i40e_pf *pf = vsi->back; struct i40e_client_instance *cdev = pf->cinst; + struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_params params; if (!cdev || !cdev->client) return; if (!cdev->client->ops || !cdev->client->ops->l2_param_change) { - dev_dbg(&vsi->back->pdev->dev, + dev_dbg(&pf->pdev->dev, "Cannot locate client instance l2_param_change routine\n"); return; } if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { - dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort l2 param change\n"); + dev_dbg(&pf->pdev->dev, + "Client is not open, abort l2 param change\n"); return; } memset(¶ms, 0, sizeof(params)); @@ -157,20 +158,19 @@ static void i40e_client_release_qvlist(struct i40e_info *ldev) /** * i40e_notify_client_of_netdev_close - call the client close callback - * @vsi: the VSI with netdev closed + * @pf: PF device pointer * @reset: true when close called due to a reset pending * * If there is a client to this netdev, call the client with close **/ -void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset) +void i40e_notify_client_of_netdev_close(struct i40e_pf *pf, bool reset) { - struct i40e_pf *pf = vsi->back; struct i40e_client_instance *cdev = pf->cinst; if (!cdev || !cdev->client) return; if (!cdev->client->ops || !cdev->client->ops->close) { - dev_dbg(&vsi->back->pdev->dev, + dev_dbg(&pf->pdev->dev, "Cannot locate client instance close routine\n"); return; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e27b2aa544b6..aa874d6ff8c3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11276,14 +11276,12 @@ static void i40e_service_task(struct work_struct *work) i40e_fdir_reinit_subtask(pf); if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) { /* Client subtask will reopen next time through. */ - i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], - true); + i40e_notify_client_of_netdev_close(pf, true); } else { i40e_client_subtask(pf); if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE, pf->state)) - i40e_notify_client_of_l2_param_changes( - pf->vsi[pf->lan_vsi]); + i40e_notify_client_of_l2_param_changes(pf); } i40e_sync_filters_subtask(pf); } else { @@ -16217,7 +16215,7 @@ static void i40e_remove(struct pci_dev *pdev) /* Client close must be called explicitly here because the timer * has been stopped. */ - i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false); + i40e_notify_client_of_netdev_close(pf, false); i40e_fdir_teardown(pf); @@ -16476,7 +16474,7 @@ static void i40e_shutdown(struct pci_dev *pdev) /* Client close must be called explicitly here because the timer * has been stopped. */ - i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false); + i40e_notify_client_of_netdev_close(pf, false); if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) && pf->wol_en) @@ -16530,7 +16528,7 @@ static int i40e_suspend(struct device *dev) /* Client close must be called explicitly here because the timer * has been stopped. */ - i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false); + i40e_notify_client_of_netdev_close(pf, false); if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) && pf->wol_en) -- cgit v1.2.3 From 7033ada04e33048c8b33294fecbb0d73f3cd1088 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 27 Apr 2024 09:26:04 +0200 Subject: i40e: Refactor argument of i40e_detect_recover_hung() Commit 07d44190a389 ("i40e/i40evf: Detect and recover hung queue scenario") changes i40e_detect_recover_hung() argument type from i40e_pf* to i40e_vsi* to be shareable by both i40e and i40evf. Because the i40evf does not exist anymore and the function is exclusively used by i40e we can revert this change. Reviewed-by: Michal Schmidt Tested-by: Pucha Himasekhar Reddy Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 10 ++++++---- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index aa874d6ff8c3..4291001d0053 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11267,7 +11267,7 @@ static void i40e_service_task(struct work_struct *work) return; if (!test_bit(__I40E_RECOVERY_MODE, pf->state)) { - i40e_detect_recover_hung(pf->vsi[pf->lan_vsi]); + i40e_detect_recover_hung(pf); i40e_sync_filters_subtask(pf); i40e_reset_subtask(pf); i40e_handle_mdd_event(pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index bc9e766d88cb..fa08b0297925 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -861,13 +861,15 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) /** * i40e_detect_recover_hung - Function to detect and recover hung_queues - * @vsi: pointer to vsi struct with tx queues + * @pf: pointer to PF struct * - * VSI has netdev and netdev has TX queues. This function is to check each of - * those TX queues if they are hung, trigger recovery by issuing SW interrupt. + * LAN VSI has netdev and netdev has TX queues. This function is to check + * each of those TX queues if they are hung, trigger recovery by issuing + * SW interrupt. **/ -void i40e_detect_recover_hung(struct i40e_vsi *vsi) +void i40e_detect_recover_hung(struct i40e_pf *pf) { + struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_ring *tx_ring = NULL; struct net_device *netdev; unsigned int i; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 2cdc7de6301c..7c26c9a2bf65 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -470,7 +470,7 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring); int i40e_napi_poll(struct napi_struct *napi, int budget); void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); -void i40e_detect_recover_hung(struct i40e_vsi *vsi); +void i40e_detect_recover_hung(struct i40e_pf *pf); int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); bool __i40e_chk_linearize(struct sk_buff *skb); int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, -- cgit v1.2.3 From 43f4466ca91debe63a07a80250282cf3210de1aa Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 27 Apr 2024 09:26:05 +0200 Subject: i40e: Add helper to access main VSI Add simple helper i40e_pf_get_main_vsi(pf) to access main VSI that replaces pattern 'pf->vsi[pf->lan_vsi]' Reviewed-by: Michal Schmidt Tested-by: Pucha Himasekhar Reddy Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e.h | 11 +++ drivers/net/ethernet/intel/i40e/i40e_client.c | 10 +- drivers/net/ethernet/intel/i40e/i40e_ddp.c | 3 +- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 32 +++--- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 8 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 107 ++++++++++++--------- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 6 +- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 8 +- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 14 +-- 9 files changed, 116 insertions(+), 83 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 0792c7324527..58610a624a59 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1372,6 +1372,17 @@ i40e_pf_get_vsi_by_seid(struct i40e_pf *pf, u16 seid) return NULL; } +/** + * i40e_pf_get_main_vsi - get pointer to main VSI + * @pf: pointer to a PF + * + * Return: pointer to main VSI or NULL if it does not exist + **/ +static inline struct i40e_vsi *i40e_pf_get_main_vsi(struct i40e_pf *pf) +{ + return (pf->lan_vsi != I40E_NO_VSI) ? pf->vsi[pf->lan_vsi] : NULL; +} + /** * i40e_pf_get_veb_by_seid - find VEB by SEID * @pf: pointer to a PF diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 93e52138826e..59263551c383 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -107,8 +107,8 @@ i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len) **/ void i40e_notify_client_of_l2_param_changes(struct i40e_pf *pf) { + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_client_instance *cdev = pf->cinst; - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_params params; if (!cdev || !cdev->client) @@ -333,9 +333,9 @@ static int i40e_register_auxiliary_dev(struct i40e_info *ldev, const char *name) **/ static void i40e_client_add_instance(struct i40e_pf *pf) { + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_client_instance *cdev = NULL; struct netdev_hw_addr *mac = NULL; - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); if (!cdev) @@ -399,9 +399,9 @@ void i40e_client_del_instance(struct i40e_pf *pf) **/ void i40e_client_subtask(struct i40e_pf *pf) { - struct i40e_client *client; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_client_instance *cdev; - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_client *client; int ret = 0; if (!test_and_clear_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state)) @@ -665,8 +665,8 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev, bool is_vf, u32 vf_id, u32 flag, u32 valid_flag) { + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(ldev->pf); struct i40e_pf *pf = ldev->pf; - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_vsi_context ctxt; bool update = true; int err; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ddp.c b/drivers/net/ethernet/intel/i40e/i40e_ddp.c index 2f53f0f53bc3..daa9f2c42f70 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ddp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ddp.c @@ -407,8 +407,9 @@ static int i40e_ddp_load(struct net_device *netdev, const u8 *data, size_t size, **/ static int i40e_ddp_restore(struct i40e_pf *pf) { + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); + struct net_device *netdev = vsi->netdev; struct i40e_ddp_old_profile_list *entry; - struct net_device *netdev = pf->vsi[pf->lan_vsi]->netdev; int status = 0; if (!list_empty(&pf->ddp_old_prof)) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 6147c5f128e8..09db46de2994 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -53,6 +53,7 @@ static ssize_t i40e_dbg_command_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { struct i40e_pf *pf = filp->private_data; + struct i40e_vsi *main_vsi; int bytes_not_copied; int buf_size = 256; char *buf; @@ -68,8 +69,8 @@ static ssize_t i40e_dbg_command_read(struct file *filp, char __user *buffer, if (!buf) return -ENOSPC; - len = snprintf(buf, buf_size, "%s: %s\n", - pf->vsi[pf->lan_vsi]->netdev->name, + main_vsi = i40e_pf_get_main_vsi(pf); + len = snprintf(buf, buf_size, "%s: %s\n", main_vsi->netdev->name, i40e_dbg_command_buf); bytes_not_copied = copy_to_user(buffer, buf, len); @@ -786,7 +787,8 @@ static ssize_t i40e_dbg_command_write(struct file *filp, cnt = sscanf(&cmd_buf[7], "%i", &vsi_seid); if (cnt == 0) { /* default to PF VSI */ - vsi_seid = pf->vsi[pf->lan_vsi]->seid; + vsi = i40e_pf_get_main_vsi(pf); + vsi_seid = vsi->seid; } else if (vsi_seid < 0) { dev_info(&pf->pdev->dev, "add VSI %d: bad vsi seid\n", vsi_seid); @@ -1030,7 +1032,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, goto command_write_done; } - vsi = pf->vsi[pf->lan_vsi]; + vsi = i40e_pf_get_main_vsi(pf); switch_id = le16_to_cpu(vsi->info.switch_id) & I40E_AQ_VSI_SW_ID_MASK; @@ -1380,6 +1382,9 @@ static ssize_t i40e_dbg_command_write(struct file *filp, dev_info(&pf->pdev->dev, "FD current total filter count for this interface: %d\n", i40e_get_current_fd_count(pf)); } else if (strncmp(cmd_buf, "lldp", 4) == 0) { + /* Get main VSI */ + struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); + if (strncmp(&cmd_buf[5], "stop", 4) == 0) { int ret; @@ -1391,10 +1396,9 @@ static ssize_t i40e_dbg_command_write(struct file *filp, goto command_write_done; } ret = i40e_aq_add_rem_control_packet_filter(&pf->hw, - pf->hw.mac.addr, - ETH_P_LLDP, 0, - pf->vsi[pf->lan_vsi]->seid, - 0, true, NULL, NULL); + pf->hw.mac.addr, ETH_P_LLDP, 0, + main_vsi->seid, 0, true, NULL, + NULL); if (ret) { dev_info(&pf->pdev->dev, "%s: Add Control Packet Filter AQ command failed =0x%x\n", @@ -1409,10 +1413,9 @@ static ssize_t i40e_dbg_command_write(struct file *filp, int ret; ret = i40e_aq_add_rem_control_packet_filter(&pf->hw, - pf->hw.mac.addr, - ETH_P_LLDP, 0, - pf->vsi[pf->lan_vsi]->seid, - 0, false, NULL, NULL); + pf->hw.mac.addr, ETH_P_LLDP, 0, + main_vsi->seid, 0, false, NULL, + NULL); if (ret) { dev_info(&pf->pdev->dev, "%s: Remove Control Packet Filter AQ command failed =0x%x\n", @@ -1639,6 +1642,7 @@ static ssize_t i40e_dbg_netdev_ops_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { struct i40e_pf *pf = filp->private_data; + struct i40e_vsi *main_vsi; int bytes_not_copied; int buf_size = 256; char *buf; @@ -1654,8 +1658,8 @@ static ssize_t i40e_dbg_netdev_ops_read(struct file *filp, char __user *buffer, if (!buf) return -ENOSPC; - len = snprintf(buf, buf_size, "%s: %s\n", - pf->vsi[pf->lan_vsi]->netdev->name, + main_vsi = i40e_pf_get_main_vsi(pf); + len = snprintf(buf, buf_size, "%s: %s\n", main_vsi->netdev->name, i40e_dbg_netdev_ops_buf); bytes_not_copied = copy_to_user(buffer, buf, len); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 42e7e6cdaa6d..0905c1fb2337 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2029,7 +2029,7 @@ static void i40e_get_ringparam(struct net_device *netdev, { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_pf *pf = np->vsi->back; - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); ring->rx_max_pending = i40e_get_max_num_descriptors(pf); ring->tx_max_pending = i40e_get_max_num_descriptors(pf); @@ -3370,6 +3370,7 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf, struct i40e_rx_flow_userdef userdef = {0}; struct i40e_fdir_filter *rule = NULL; struct hlist_node *node2; + struct i40e_vsi *vsi; u64 input_set; u16 index; @@ -3493,9 +3494,8 @@ no_input_set: fsp->flow_type |= FLOW_EXT; } - if (rule->dest_vsi != pf->vsi[pf->lan_vsi]->id) { - struct i40e_vsi *vsi; - + vsi = i40e_pf_get_main_vsi(pf); + if (rule->dest_vsi != vsi->id) { vsi = i40e_find_vsi_from_id(pf, rule->dest_vsi); if (vsi && vsi->type == I40E_VSI_SRIOV) { /* VFs are zero-indexed by the driver, but ethtool diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 4291001d0053..e45a556b19bb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2476,7 +2476,7 @@ i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, **/ static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; int aq_ret; @@ -4323,7 +4323,7 @@ static irqreturn_t i40e_intr(int irq, void *data) /* only q0 is used in MSI/Legacy mode, and none are used in MSIX */ if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_q_vector *q_vector = vsi->q_vectors[0]; /* We do not have a way to disarm Queue causes while leaving @@ -5473,7 +5473,7 @@ static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg) **/ static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); u8 num_tc = vsi->mqprio_qopt.qopt.num_tc; u8 enabled_tc = 1, i; @@ -5490,13 +5490,14 @@ static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf) **/ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) { - struct i40e_hw *hw = &pf->hw; u8 i, enabled_tc = 1; u8 num_tc = 0; - struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config; - if (i40e_is_tc_mqprio_enabled(pf)) - return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc; + if (i40e_is_tc_mqprio_enabled(pf)) { + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); + + return vsi->mqprio_qopt.qopt.num_tc; + } /* If neither MQPRIO nor DCB is enabled, then always use single TC */ if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) @@ -5504,7 +5505,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) /* SFP mode will be enabled for all TCs on port */ if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) - return i40e_dcb_get_num_tc(dcbcfg); + return i40e_dcb_get_num_tc(&pf->hw.local_dcbx_config); /* MFP mode return count of enabled TCs for this PF */ if (pf->hw.func_caps.iscsi) @@ -6478,6 +6479,7 @@ static inline int i40e_setup_hw_channel(struct i40e_pf *pf, static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi, struct i40e_channel *ch) { + struct i40e_vsi *main_vsi; u8 vsi_type; u16 seid; int ret; @@ -6491,7 +6493,8 @@ static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi, } /* underlying switching element */ - seid = pf->vsi[pf->lan_vsi]->uplink_seid; + main_vsi = i40e_pf_get_main_vsi(pf); + seid = main_vsi->uplink_seid; /* create channel (VSI), configure TX rings */ ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type); @@ -7048,7 +7051,9 @@ int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg) /* Configure Rx Packet Buffers in HW */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { - mfs_tc[i] = pf->vsi[pf->lan_vsi]->netdev->mtu; + struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); + + mfs_tc[i] = main_vsi->netdev->mtu; mfs_tc[i] += I40E_PACKET_HDR_PAD; } @@ -9805,7 +9810,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) dev_warn(&pf->pdev->dev, "FD table did not flush, needs more time\n"); } else { /* replay sideband filters */ - i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]); + i40e_fdir_filter_restore(i40e_pf_get_main_vsi(pf)); if (!disable_atr && !pf->fd_tcp4_filter_cnt) clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state); clear_bit(__I40E_FD_FLUSH_REQUESTED, pf->state); @@ -9903,7 +9908,7 @@ static void i40e_veb_link_event(struct i40e_veb *veb, bool link_up) **/ static void i40e_link_event(struct i40e_pf *pf) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); u8 new_link_speed, old_link_speed; bool new_link, old_link; int status; @@ -10274,7 +10279,7 @@ static void i40e_verify_eeprom(struct i40e_pf *pf) **/ static void i40e_enable_pf_switch_lb(struct i40e_pf *pf) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_vsi_context ctxt; int ret; @@ -10310,7 +10315,7 @@ static void i40e_enable_pf_switch_lb(struct i40e_pf *pf) **/ static void i40e_disable_pf_switch_lb(struct i40e_pf *pf) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_vsi_context ctxt; int ret; @@ -10386,7 +10391,7 @@ static int i40e_reconstitute_veb(struct i40e_veb *veb) if (veb->uplink_seid == pf->mac_seid) { /* Check that the LAN VSI has VEB owning flag set */ - ctl_vsi = pf->vsi[pf->lan_vsi]; + ctl_vsi = i40e_pf_get_main_vsi(pf); if (WARN_ON(ctl_vsi->veb_idx != veb->idx || !(ctl_vsi->flags & I40E_VSI_FLAG_VEB_OWNER))) { @@ -10529,7 +10534,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi); **/ static void i40e_fdir_sb_setup(struct i40e_pf *pf) { - struct i40e_vsi *vsi; + struct i40e_vsi *main_vsi, *vsi; /* quick workaround for an NVM issue that leaves a critical register * uninitialized @@ -10554,8 +10559,8 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf) /* create a new VSI if none exists */ if (!vsi) { - vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR, - pf->vsi[pf->lan_vsi]->seid, 0); + main_vsi = i40e_pf_get_main_vsi(pf); + vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR, main_vsi->seid, 0); if (!vsi) { dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n"); clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); @@ -10834,7 +10839,7 @@ static int i40e_reset(struct i40e_pf *pf) static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf); - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; struct i40e_veb *veb; int ret; @@ -10843,7 +10848,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && is_recovery_mode_reported) - i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev); + i40e_set_ethtool_ops(vsi->netdev); if (test_bit(__I40E_DOWN, pf->state) && !test_bit(__I40E_RECOVERY_MODE, pf->state)) @@ -12395,7 +12400,7 @@ void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, **/ static int i40e_pf_config_rss(struct i40e_pf *pf) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); u8 seed[I40E_HKEY_ARRAY_SIZE]; u8 *lut; struct i40e_hw *hw = &pf->hw; @@ -12467,7 +12472,7 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) **/ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); int new_rss_size; if (!test_bit(I40E_FLAG_RSS_ENA, pf->flags)) @@ -13756,9 +13761,10 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) * the end, which is 4 bytes long, so force truncation of the * original name by IFNAMSIZ - 4 */ - snprintf(netdev->name, IFNAMSIZ, "%.*sv%%d", - IFNAMSIZ - 4, - pf->vsi[pf->lan_vsi]->netdev->name); + struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); + + snprintf(netdev->name, IFNAMSIZ, "%.*sv%%d", IFNAMSIZ - 4, + main_vsi->netdev->name); eth_random_addr(mac_addr); spin_lock_bh(&vsi->mac_filter_hash_lock); @@ -14270,6 +14276,7 @@ vector_setup_out: **/ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) { + struct i40e_vsi *main_vsi; u16 alloc_queue_pairs; struct i40e_pf *pf; u8 enabled_tc; @@ -14304,10 +14311,12 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) /* Update the FW view of the VSI. Force a reset of TC and queue * layout configurations. */ - enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc; - pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0; - pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid; - i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc); + main_vsi = i40e_pf_get_main_vsi(pf); + enabled_tc = main_vsi->tc_config.enabled_tc; + main_vsi->tc_config.enabled_tc = 0; + main_vsi->seid = pf->main_vsi_seid; + i40e_vsi_config_tc(main_vsi, enabled_tc); + if (vsi->type == I40E_VSI_MAIN) i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr); @@ -14990,6 +14999,7 @@ int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig) **/ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired) { + struct i40e_vsi *main_vsi; u16 flags = 0; int ret; @@ -15034,8 +15044,8 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui } /* first time setup */ - if (pf->lan_vsi == I40E_NO_VSI || reinit) { - struct i40e_vsi *vsi = NULL; + main_vsi = i40e_pf_get_main_vsi(pf); + if (!main_vsi || reinit) { u16 uplink_seid; /* Set up the PF VSI associated with the PF's main VSI @@ -15045,11 +15055,12 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui uplink_seid = pf->veb[pf->lan_veb]->seid; else uplink_seid = pf->mac_seid; - if (pf->lan_vsi == I40E_NO_VSI) - vsi = i40e_vsi_setup(pf, I40E_VSI_MAIN, uplink_seid, 0); + if (!main_vsi) + main_vsi = i40e_vsi_setup(pf, I40E_VSI_MAIN, + uplink_seid, 0); else if (reinit) - vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]); - if (!vsi) { + main_vsi = i40e_vsi_reinit_setup(main_vsi); + if (!main_vsi) { dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n"); i40e_cloud_filter_exit(pf); i40e_fdir_teardown(pf); @@ -15057,13 +15068,13 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui } } else { /* force a reset of TC and queue layout configurations */ - u8 enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc; + u8 enabled_tc = main_vsi->tc_config.enabled_tc; - pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0; - pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid; - i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc); + main_vsi->tc_config.enabled_tc = 0; + main_vsi->seid = pf->main_vsi_seid; + i40e_vsi_config_tc(main_vsi, enabled_tc); } - i40e_vlan_stripping_disable(pf->vsi[pf->lan_vsi]); + i40e_vlan_stripping_disable(main_vsi); i40e_fdir_sb_setup(pf); @@ -15090,7 +15101,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui rtnl_lock(); /* repopulate tunnel port filters */ - udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev); + udp_tunnel_nic_reset_ntf(main_vsi->netdev); if (!lock_acquired) rtnl_unlock(); @@ -15234,6 +15245,7 @@ static int i40e_setup_pf_filter_control(struct i40e_pf *pf) #define REMAIN(__x) (INFO_STRING_LEN - (__x)) static void i40e_print_features(struct i40e_pf *pf) { + struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; char *buf; int i; @@ -15247,8 +15259,7 @@ static void i40e_print_features(struct i40e_pf *pf) i += scnprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs); #endif i += scnprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d", - pf->hw.func_caps.num_vsis, - pf->vsi[pf->lan_vsi]->num_queue_pairs); + pf->hw.func_caps.num_vsis, main_vsi->num_queue_pairs); if (test_bit(I40E_FLAG_RSS_ENA, pf->flags)) i += scnprintf(&buf[i], REMAIN(i), " RSS"); if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags)) @@ -15912,7 +15923,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err); goto err_vsis; } - INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list); + + vsi = i40e_pf_get_main_vsi(pf); + INIT_LIST_HEAD(&vsi->ch_list); /* if FDIR VSI was set up, start it now */ vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); @@ -16414,15 +16427,15 @@ static void i40e_pci_error_resume(struct pci_dev *pdev) **/ static void i40e_enable_mc_magic_wake(struct i40e_pf *pf) { + struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; u8 mac_addr[6]; u16 flags = 0; int ret; /* Get current MAC address in case it's an LAA */ - if (pf->vsi[pf->lan_vsi] && pf->vsi[pf->lan_vsi]->netdev) { - ether_addr_copy(mac_addr, - pf->vsi[pf->lan_vsi]->netdev->dev_addr); + if (main_vsi && main_vsi->netdev) { + ether_addr_copy(mac_addr, main_vsi->netdev->dev_addr); } else { dev_err(&pf->pdev->dev, "Failed to retrieve MAC address; using default\n"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index e7ebcb09f23c..b72a4b5d76b9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -1472,7 +1472,8 @@ void i40e_ptp_restore_hw_time(struct i40e_pf *pf) **/ void i40e_ptp_init(struct i40e_pf *pf) { - struct net_device *netdev = pf->vsi[pf->lan_vsi]->netdev; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); + struct net_device *netdev = vsi->netdev; struct i40e_hw *hw = &pf->hw; u32 pf_id; long err; @@ -1536,6 +1537,7 @@ void i40e_ptp_init(struct i40e_pf *pf) **/ void i40e_ptp_stop(struct i40e_pf *pf) { + struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; u32 regval; @@ -1555,7 +1557,7 @@ void i40e_ptp_stop(struct i40e_pf *pf) ptp_clock_unregister(pf->ptp_clock); pf->ptp_clock = NULL; dev_info(&pf->pdev->dev, "%s: removed PHC on %s\n", __func__, - pf->vsi[pf->lan_vsi]->netdev->name); + main_vsi->netdev->name); } if (i40e_is_ptp_pin_dev(&pf->hw)) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index fa08b0297925..c006f716a3bd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -24,7 +24,7 @@ static void i40e_fdir(struct i40e_ring *tx_ring, { struct i40e_filter_program_desc *fdir_desc; struct i40e_pf *pf = tx_ring->vsi->back; - u32 flex_ptype, dtype_cmd; + u32 flex_ptype, dtype_cmd, vsi_id; u16 i; /* grab the next descriptor */ @@ -42,8 +42,8 @@ static void i40e_fdir(struct i40e_ring *tx_ring, flex_ptype |= FIELD_PREP(I40E_TXD_FLTR_QW0_PCTYPE_MASK, fdata->pctype); /* Use LAN VSI Id if not programmed by user */ - flex_ptype |= FIELD_PREP(I40E_TXD_FLTR_QW0_DEST_VSI_MASK, - fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id); + vsi_id = fdata->dest_vsi ? : i40e_pf_get_main_vsi(pf)->id; + flex_ptype |= FIELD_PREP(I40E_TXD_FLTR_QW0_DEST_VSI_MASK, vsi_id); dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; @@ -869,7 +869,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) **/ void i40e_detect_recover_hung(struct i40e_pf *pf) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_ring *tx_ring = NULL; struct net_device *netdev; unsigned int i; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 232b65b9c8ea..662622f01e31 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -795,13 +795,13 @@ error_param: static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx) { struct i40e_mac_filter *f = NULL; + struct i40e_vsi *main_vsi, *vsi; struct i40e_pf *pf = vf->pf; - struct i40e_vsi *vsi; u64 max_tx_rate = 0; int ret = 0; - vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, pf->vsi[pf->lan_vsi]->seid, - vf->vf_id); + main_vsi = i40e_pf_get_main_vsi(pf); + vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, main_vsi->seid, vf->vf_id); if (!vsi) { dev_err(&pf->pdev->dev, @@ -3322,8 +3322,9 @@ error_param: static int i40e_vc_rdma_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) { struct i40e_pf *pf = vf->pf; - int abs_vf_id = vf->vf_id + pf->hw.func_caps.vf_base_id; + struct i40e_vsi *main_vsi; int aq_ret = 0; + int abs_vf_id; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || !test_bit(I40E_VF_STATE_RDMAENA, &vf->vf_states)) { @@ -3331,8 +3332,9 @@ static int i40e_vc_rdma_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) goto error_param; } - i40e_notify_client_of_vf_msg(pf->vsi[pf->lan_vsi], abs_vf_id, - msg, msglen); + main_vsi = i40e_pf_get_main_vsi(pf); + abs_vf_id = vf->vf_id + pf->hw.func_caps.vf_base_id; + i40e_notify_client_of_vf_msg(main_vsi, abs_vf_id, msg, msglen); error_param: /* send the response to the VF */ -- cgit v1.2.3 From 6c8e355ea5fccf686703d71219f3b96ec7ec923f Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 27 Apr 2024 09:26:06 +0200 Subject: i40e: Consolidate checks whether given VSI is main In the driver code there are 3 types of checks whether given VSI is main or not: 1. vsi->type ==/!= I40E_VSI_MAIN 2. vsi ==/!= pf->vsi[pf->lan_vsi] 3. vsi->seid ==/!= pf->vsi[pf->lan_vsi]->seid All of them are equivalent and can be consolidated. Convert cases 2 and 3 to case 1. Reviewed-by: Michal Schmidt Tested-by: Pucha Himasekhar Reddy Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 12 ++++++------ drivers/net/ethernet/intel/i40e/i40e_main.c | 19 +++++++++---------- 3 files changed, 16 insertions(+), 17 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 09db46de2994..abf624d770e6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -129,7 +129,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) dev_info(&pf->pdev->dev, " state[%d] = %08lx\n", i, vsi->state[i]); - if (vsi == pf->vsi[pf->lan_vsi]) + if (vsi->type == I40E_VSI_MAIN) dev_info(&pf->pdev->dev, " MAC address: %pM Port MAC: %pM\n", pf->hw.mac.addr, pf->hw.mac.port_addr); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 0905c1fb2337..5cd0d1b45f01 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1241,7 +1241,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, i40e_partition_setting_complaint(pf); return -EOPNOTSUPP; } - if (vsi != pf->vsi[pf->lan_vsi]) + if (vsi->type != I40E_VSI_MAIN) return -EOPNOTSUPP; if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET && hw->phy.media_type != I40E_MEDIA_TYPE_FIBER && @@ -1710,7 +1710,7 @@ static int i40e_set_pauseparam(struct net_device *netdev, return -EOPNOTSUPP; } - if (vsi != pf->vsi[pf->lan_vsi]) + if (vsi->type != I40E_VSI_MAIN) return -EOPNOTSUPP; is_an = hw_link_info->an_info & I40E_AQ_AN_COMPLETED; @@ -2292,7 +2292,7 @@ static int i40e_get_stats_count(struct net_device *netdev) struct i40e_pf *pf = vsi->back; int stats_len; - if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1) + if (vsi->type == I40E_VSI_MAIN && pf->hw.partition_id == 1) stats_len = I40E_PF_STATS_LEN; else stats_len = I40E_VSI_STATS_LEN; @@ -2422,7 +2422,7 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, } rcu_read_unlock(); - if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1) + if (vsi->type != I40E_VSI_MAIN || pf->hw.partition_id != 1) goto check_data_pointer; veb_stats = ((pf->lan_veb != I40E_NO_VEB) && @@ -2495,7 +2495,7 @@ static void i40e_get_stat_strings(struct net_device *netdev, u8 *data) "rx", i); } - if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1) + if (vsi->type != I40E_VSI_MAIN || pf->hw.partition_id != 1) goto check_data_pointer; i40e_add_stat_strings(&data, i40e_gstrings_veb_stats); @@ -2792,7 +2792,7 @@ static int i40e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return -EOPNOTSUPP; } - if (vsi != pf->vsi[pf->lan_vsi]) + if (vsi->type != I40E_VSI_MAIN) return -EOPNOTSUPP; /* NVM bit on means WoL disabled for the port */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e45a556b19bb..85d609ad81dc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -990,7 +990,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) ns->tx_dropped = es->tx_discards; /* pull in a couple PF stats if this is the main vsi */ - if (vsi == pf->vsi[pf->lan_vsi]) { + if (vsi->type == I40E_VSI_MAIN) { ns->rx_crc_errors = pf->stats.crc_errors; ns->rx_errors = pf->stats.crc_errors + pf->stats.illegal_bytes; ns->rx_length_errors = pf->stats.rx_length_errors; @@ -1235,7 +1235,7 @@ void i40e_update_stats(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; - if (vsi == pf->vsi[pf->lan_vsi]) + if (vsi->type == I40E_VSI_MAIN) i40e_update_pf_stats(pf); i40e_update_vsi_stats(vsi); @@ -6812,7 +6812,7 @@ static void i40e_dcb_reconfigure(struct i40e_pf *pf) /* - Enable all TCs for the LAN VSI * - For all others keep them at TC0 for now */ - if (v == pf->lan_vsi) + if (vsi->type == I40E_VSI_MAIN) tc_map = i40e_pf_get_tc_map(pf); else tc_map = I40E_DEFAULT_TRAFFIC_CLASS; @@ -9119,7 +9119,7 @@ err_setup_rx: i40e_vsi_free_rx_resources(vsi); err_setup_tx: i40e_vsi_free_tx_resources(vsi); - if (vsi == pf->vsi[pf->lan_vsi]) + if (vsi->type == I40E_VSI_MAIN) i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); return err; @@ -11994,7 +11994,7 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi) /* if not MSIX, give the one vector only to the LAN VSI */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) num_q_vectors = vsi->num_q_vectors; - else if (vsi == pf->vsi[pf->lan_vsi]) + else if (vsi->type == I40E_VSI_MAIN) num_q_vectors = 1; else return -EINVAL; @@ -13111,7 +13111,7 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, int rem; /* Only for PF VSI for now */ - if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) + if (vsi->type != I40E_VSI_MAIN) return -EOPNOTSUPP; /* Find the HW bridge for PF VSI */ @@ -13179,7 +13179,7 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct i40e_veb *veb; /* Only for PF VSI for now */ - if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) + if (vsi->type != I40E_VSI_MAIN) return -EOPNOTSUPP; /* Find the HW bridge for the PF VSI */ @@ -14131,8 +14131,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) vsi->seid, vsi->uplink_seid); return -ENODEV; } - if (vsi == pf->vsi[pf->lan_vsi] && - !test_bit(__I40E_DOWN, pf->state)) { + if (vsi->type == I40E_VSI_MAIN && !test_bit(__I40E_DOWN, pf->state)) { dev_info(&pf->pdev->dev, "Can't remove PF VSI\n"); return -ENODEV; } @@ -14396,7 +14395,7 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, veb = i40e_veb_setup(pf, vsi->uplink_seid, vsi->seid, vsi->tc_config.enabled_tc); if (veb) { - if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) { + if (vsi->type != I40E_VSI_MAIN) { dev_info(&vsi->back->pdev->dev, "New VSI creation error, uplink seid of LAN VSI expected.\n"); return NULL; -- cgit v1.2.3 From 5509fc9e3ab6938910626dccdd3d59671086edbf Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 27 Apr 2024 09:26:07 +0200 Subject: i40e: Add helper to access main VEB Add a helper to access main VEB: i40e_pf_get_main_veb(pf) replaces 'pf->veb[pf->lan_veb]' Reviewed-by: Michal Schmidt Reviewed-by: Aleksandr Loktionov Tested-by: Pucha Himasekhar Reddy Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e.h | 11 +++++++++++ drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 9 +++------ drivers/net/ethernet/intel/i40e/i40e_main.c | 27 ++++++++++++++++---------- 3 files changed, 31 insertions(+), 16 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 58610a624a59..bca2084cc54b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1401,4 +1401,15 @@ i40e_pf_get_veb_by_seid(struct i40e_pf *pf, u16 seid) return NULL; } +/** + * i40e_pf_get_main_veb - get pointer to main VEB + * @pf: pointer to a PF + * + * Return: pointer to main VEB or NULL if it does not exist + **/ +static inline struct i40e_veb *i40e_pf_get_main_veb(struct i40e_pf *pf) +{ + return (pf->lan_veb != I40E_NO_VEB) ? pf->veb[pf->lan_veb] : NULL; +} + #endif /* _I40E_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 5cd0d1b45f01..4e28785c9fb2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2425,14 +2425,11 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, if (vsi->type != I40E_VSI_MAIN || pf->hw.partition_id != 1) goto check_data_pointer; - veb_stats = ((pf->lan_veb != I40E_NO_VEB) && - (pf->lan_veb < I40E_MAX_VEB) && - test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags)); + veb = i40e_pf_get_main_veb(pf); + veb_stats = veb && test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags); - if (veb_stats) { - veb = pf->veb[pf->lan_veb]; + if (veb_stats) i40e_update_veb_stats(veb); - } /* If veb stats aren't enabled, pass NULL instead of the veb so that * we initialize stats to zero and update the data pointer diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 85d609ad81dc..26e5c21df19d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2481,7 +2481,7 @@ static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc) int aq_ret; if (vsi->type == I40E_VSI_MAIN && - pf->lan_veb != I40E_NO_VEB && + i40e_pf_get_main_veb(pf) && !test_bit(I40E_FLAG_MFP_ENA, pf->flags)) { /* set defport ON for Main VSI instead of true promisc * this way we will get all unicast/multicast and VLAN @@ -9909,6 +9909,7 @@ static void i40e_veb_link_event(struct i40e_veb *veb, bool link_up) static void i40e_link_event(struct i40e_pf *pf) { struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); + struct i40e_veb *veb = i40e_pf_get_main_veb(pf); u8 new_link_speed, old_link_speed; bool new_link, old_link; int status; @@ -9948,8 +9949,8 @@ static void i40e_link_event(struct i40e_pf *pf) /* Notify the base of the switch tree connected to * the link. Floating VEBs are not notified. */ - if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb]) - i40e_veb_link_event(pf->veb[pf->lan_veb], new_link); + if (veb) + i40e_veb_link_event(veb, new_link); else i40e_vsi_link_event(vsi, new_link); @@ -14881,7 +14882,8 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf, /* Main VEB? */ if (uplink_seid != pf->mac_seid) break; - if (pf->lan_veb >= I40E_MAX_VEB) { + veb = i40e_pf_get_main_veb(pf); + if (!veb) { int v; /* find existing or else empty VEB */ @@ -14895,12 +14897,15 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf, pf->lan_veb = v; } } - if (pf->lan_veb >= I40E_MAX_VEB) + + /* Try to get again main VEB as pf->lan_veb may have changed */ + veb = i40e_pf_get_main_veb(pf); + if (!veb) break; - pf->veb[pf->lan_veb]->seid = seid; - pf->veb[pf->lan_veb]->uplink_seid = pf->mac_seid; - pf->veb[pf->lan_veb]->pf = pf; + veb->seid = seid; + veb->uplink_seid = pf->mac_seid; + veb->pf = pf; break; case I40E_SWITCH_ELEMENT_TYPE_VSI: if (num_reported != 1) @@ -15045,13 +15050,15 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui /* first time setup */ main_vsi = i40e_pf_get_main_vsi(pf); if (!main_vsi || reinit) { + struct i40e_veb *veb; u16 uplink_seid; /* Set up the PF VSI associated with the PF's main VSI * that is already in the HW switch */ - if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb]) - uplink_seid = pf->veb[pf->lan_veb]->seid; + veb = i40e_pf_get_main_veb(pf); + if (veb) + uplink_seid = veb->seid; else uplink_seid = pf->mac_seid; if (!main_vsi) -- cgit v1.2.3 From 29385de339564b2f5bd2ad65eae8ded80d0ad854 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 27 Apr 2024 09:26:08 +0200 Subject: i40e: Add and use helper to reconfigure TC for given VSI Add helper i40e_vsi_reconfig_tc(vsi) that configures TC for given VSI using previously stored TC bitmap. Effectively replaces open-coded patterns: enabled_tc = vsi->tc_config.enabled_tc; vsi->tc_config.enabled_tc = 0; i40e_vsi_config_tc(vsi, enabled_tc); Reviewed-by: Michal Schmidt Tested-by: Pucha Himasekhar Reddy Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 32 +++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 26e5c21df19d..2cc7bec0557b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5917,6 +5917,28 @@ out: return ret; } +/** + * i40e_vsi_reconfig_tc - Reconfigure VSI Tx Scheduler for stored TC map + * @vsi: VSI to be reconfigured + * + * This reconfigures a particular VSI for TCs that are mapped to the + * TC bitmap stored previously for the VSI. + * + * Context: It is expected that the VSI queues have been quisced before + * calling this function. + * + * Return: 0 on success, negative value on failure + **/ +static int i40e_vsi_reconfig_tc(struct i40e_vsi *vsi) +{ + u8 enabled_tc; + + enabled_tc = vsi->tc_config.enabled_tc; + vsi->tc_config.enabled_tc = 0; + + return i40e_vsi_config_tc(vsi, enabled_tc); +} + /** * i40e_get_link_speed - Returns link speed for the interface * @vsi: VSI to be configured @@ -14279,7 +14301,6 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) struct i40e_vsi *main_vsi; u16 alloc_queue_pairs; struct i40e_pf *pf; - u8 enabled_tc; int ret; if (!vsi) @@ -14312,10 +14333,8 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) * layout configurations. */ main_vsi = i40e_pf_get_main_vsi(pf); - enabled_tc = main_vsi->tc_config.enabled_tc; - main_vsi->tc_config.enabled_tc = 0; main_vsi->seid = pf->main_vsi_seid; - i40e_vsi_config_tc(main_vsi, enabled_tc); + i40e_vsi_reconfig_tc(main_vsi); if (vsi->type == I40E_VSI_MAIN) i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr); @@ -15074,11 +15093,8 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui } } else { /* force a reset of TC and queue layout configurations */ - u8 enabled_tc = main_vsi->tc_config.enabled_tc; - - main_vsi->tc_config.enabled_tc = 0; main_vsi->seid = pf->main_vsi_seid; - i40e_vsi_config_tc(main_vsi, enabled_tc); + i40e_vsi_reconfig_tc(main_vsi); } i40e_vlan_stripping_disable(main_vsi); -- cgit v1.2.3 From dcb3fba6fa34dd2f98bfcd1d20b26fab3842d1ee Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Tue, 30 Apr 2024 17:36:34 +0530 Subject: net: ti: icssg_prueth: Add SW TX / RX Coalescing based on hrtimers Add SW IRQ coalescing based on hrtimers for RX and TX data path for ICSSG driver, which can be enabled by ethtool commands: - RX coalescing ethtool -C eth1 rx-usecs 50 - TX coalescing can be enabled per TX queue - by default enables coalescing for TX0 ethtool -C eth1 tx-usecs 50 - configure TX0 ethtool -Q eth0 queue_mask 1 --coalesce tx-usecs 100 - configure TX1 ethtool -Q eth0 queue_mask 2 --coalesce tx-usecs 100 - configure TX0 and TX1 ethtool -Q eth0 queue_mask 3 --coalesce tx-usecs 100 --coalesce tx-usecs 100 Minimum value for both rx-usecs and tx-usecs is 20us. Compared to gro_flush_timeout and napi_defer_hard_irqs this patch allows to enable IRQ coalescing for RX path separately. Benchmarking numbers: =============================================================== | Method | Tput_TX | CPU_TX | Tput_RX | CPU_RX | | ============================================================== | Default Driver 943 Mbps 31% 517 Mbps 38% | | IRQ Coalescing (Patch) 943 Mbps 28% 518 Mbps 25% | =============================================================== Signed-off-by: MD Danish Anwar Reviewed-by: Andrew Lunn Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240430120634.1558998-1-danishanwar@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_common.c | 41 ++++++++++-- drivers/net/ethernet/ti/icssg/icssg_ethtool.c | 93 +++++++++++++++++++++++++++ drivers/net/ethernet/ti/icssg/icssg_prueth.c | 18 +++++- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 11 +++- 4 files changed, 155 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 284f97876054..088ab8076db4 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -122,7 +122,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn, } int emac_tx_complete_packets(struct prueth_emac *emac, int chn, - int budget) + int budget, bool *tdown) { struct net_device *ndev = emac->ndev; struct cppi5_host_desc_t *desc_tx; @@ -145,6 +145,7 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, if (cppi5_desc_is_tdcm(desc_dma)) { if (atomic_dec_and_test(&emac->tdown_cnt)) complete(&emac->tdown_complete); + *tdown = true; break; } @@ -190,19 +191,37 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, return num_tx; } +static enum hrtimer_restart emac_tx_timer_callback(struct hrtimer *timer) +{ + struct prueth_tx_chn *tx_chns = + container_of(timer, struct prueth_tx_chn, tx_hrtimer); + + enable_irq(tx_chns->irq); + return HRTIMER_NORESTART; +} + static int emac_napi_tx_poll(struct napi_struct *napi_tx, int budget) { struct prueth_tx_chn *tx_chn = prueth_napi_to_tx_chn(napi_tx); struct prueth_emac *emac = tx_chn->emac; + bool tdown = false; int num_tx_packets; - num_tx_packets = emac_tx_complete_packets(emac, tx_chn->id, budget); + num_tx_packets = emac_tx_complete_packets(emac, tx_chn->id, budget, + &tdown); if (num_tx_packets >= budget) return budget; - if (napi_complete_done(napi_tx, num_tx_packets)) - enable_irq(tx_chn->irq); + if (napi_complete_done(napi_tx, num_tx_packets)) { + if (unlikely(tx_chn->tx_pace_timeout_ns && !tdown)) { + hrtimer_start(&tx_chn->tx_hrtimer, + ns_to_ktime(tx_chn->tx_pace_timeout_ns), + HRTIMER_MODE_REL_PINNED); + } else { + enable_irq(tx_chn->irq); + } + } return num_tx_packets; } @@ -226,6 +245,9 @@ int prueth_ndev_add_tx_napi(struct prueth_emac *emac) struct prueth_tx_chn *tx_chn = &emac->tx_chns[i]; netif_napi_add_tx(emac->ndev, &tx_chn->napi_tx, emac_napi_tx_poll); + hrtimer_init(&tx_chn->tx_hrtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_PINNED); + tx_chn->tx_hrtimer.function = &emac_tx_timer_callback; ret = request_irq(tx_chn->irq, prueth_tx_irq, IRQF_TRIGGER_HIGH, tx_chn->name, tx_chn); @@ -871,8 +893,15 @@ int emac_napi_rx_poll(struct napi_struct *napi_rx, int budget) break; } - if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) - enable_irq(emac->rx_chns.irq[rx_flow]); + if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) { + if (unlikely(emac->rx_pace_timeout_ns)) { + hrtimer_start(&emac->rx_hrtimer, + ns_to_ktime(emac->rx_pace_timeout_ns), + HRTIMER_MODE_REL_PINNED); + } else { + enable_irq(emac->rx_chns.irq[rx_flow]); + } + } return num_rx; } diff --git a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c index ca20325d4d3e..c8d0f45cc5b1 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c +++ b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c @@ -201,6 +201,93 @@ static void emac_get_rmon_stats(struct net_device *ndev, rmon_stats->hist_tx[4] = emac_get_stat_by_name(emac, "tx_bucket5_frames"); } +static int emac_get_coalesce(struct net_device *ndev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct prueth_emac *emac = netdev_priv(ndev); + struct prueth_tx_chn *tx_chn; + + tx_chn = &emac->tx_chns[0]; + + coal->rx_coalesce_usecs = emac->rx_pace_timeout_ns / 1000; + coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout_ns / 1000; + + return 0; +} + +static int emac_get_per_queue_coalesce(struct net_device *ndev, u32 queue, + struct ethtool_coalesce *coal) +{ + struct prueth_emac *emac = netdev_priv(ndev); + struct prueth_tx_chn *tx_chn; + + if (queue >= PRUETH_MAX_TX_QUEUES) + return -EINVAL; + + tx_chn = &emac->tx_chns[queue]; + + coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout_ns / 1000; + + return 0; +} + +static int emac_set_coalesce(struct net_device *ndev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct prueth_emac *emac = netdev_priv(ndev); + struct prueth *prueth = emac->prueth; + struct prueth_tx_chn *tx_chn; + + tx_chn = &emac->tx_chns[0]; + + if (coal->rx_coalesce_usecs && + coal->rx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) { + dev_info(prueth->dev, "defaulting to min value of %dus for rx-usecs\n", + ICSSG_MIN_COALESCE_USECS); + coal->rx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS; + } + + if (coal->tx_coalesce_usecs && + coal->tx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) { + dev_info(prueth->dev, "defaulting to min value of %dus for tx-usecs\n", + ICSSG_MIN_COALESCE_USECS); + coal->tx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS; + } + + emac->rx_pace_timeout_ns = coal->rx_coalesce_usecs * 1000; + tx_chn->tx_pace_timeout_ns = coal->tx_coalesce_usecs * 1000; + + return 0; +} + +static int emac_set_per_queue_coalesce(struct net_device *ndev, u32 queue, + struct ethtool_coalesce *coal) +{ + struct prueth_emac *emac = netdev_priv(ndev); + struct prueth *prueth = emac->prueth; + struct prueth_tx_chn *tx_chn; + + if (queue >= PRUETH_MAX_TX_QUEUES) + return -EINVAL; + + tx_chn = &emac->tx_chns[queue]; + + if (coal->tx_coalesce_usecs && + coal->tx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) { + dev_info(prueth->dev, "defaulting to min value of %dus for tx-usecs for tx-%u\n", + ICSSG_MIN_COALESCE_USECS, queue); + coal->tx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS; + } + + tx_chn->tx_pace_timeout_ns = coal->tx_coalesce_usecs * 1000; + + return 0; +} + const struct ethtool_ops icssg_ethtool_ops = { .get_drvinfo = emac_get_drvinfo, .get_msglevel = emac_get_msglevel, @@ -209,6 +296,12 @@ const struct ethtool_ops icssg_ethtool_ops = { .get_ethtool_stats = emac_get_ethtool_stats, .get_strings = emac_get_strings, .get_ts_info = emac_get_ts_info, + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | + ETHTOOL_COALESCE_TX_USECS, + .get_coalesce = emac_get_coalesce, + .set_coalesce = emac_set_coalesce, + .get_per_queue_coalesce = emac_get_per_queue_coalesce, + .set_per_queue_coalesce = emac_set_per_queue_coalesce, .get_channels = emac_get_channels, .set_channels = emac_set_channels, .get_link_ksettings = emac_get_link_ksettings, diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 186b0365c2e5..7c9e9518f555 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -243,6 +243,16 @@ static void emac_adjust_link(struct net_device *ndev) } } +static enum hrtimer_restart emac_rx_timer_callback(struct hrtimer *timer) +{ + struct prueth_emac *emac = + container_of(timer, struct prueth_emac, rx_hrtimer); + int rx_flow = PRUETH_RX_FLOW_DATA; + + enable_irq(emac->rx_chns.irq[rx_flow]); + return HRTIMER_NORESTART; +} + static int emac_phy_connect(struct prueth_emac *emac) { struct prueth *prueth = emac->prueth; @@ -582,8 +592,10 @@ static int emac_ndo_stop(struct net_device *ndev) netdev_err(ndev, "tx teardown timeout\n"); prueth_reset_tx_chan(emac, emac->tx_ch_num, true); - for (i = 0; i < emac->tx_ch_num; i++) + for (i = 0; i < emac->tx_ch_num; i++) { napi_disable(&emac->tx_chns[i].napi_tx); + hrtimer_cancel(&emac->tx_chns[i].tx_hrtimer); + } max_rx_flows = PRUETH_MAX_RX_FLOWS; k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true); @@ -591,6 +603,7 @@ static int emac_ndo_stop(struct net_device *ndev) prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true); napi_disable(&emac->napi_rx); + hrtimer_cancel(&emac->rx_hrtimer); cancel_work_sync(&emac->rx_mode_work); @@ -801,6 +814,9 @@ static int prueth_netdev_init(struct prueth *prueth, ndev->features = ndev->hw_features; netif_napi_add(ndev, &emac->napi_rx, emac_napi_rx_poll); + hrtimer_init(&emac->rx_hrtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_PINNED); + emac->rx_hrtimer.function = &emac_rx_timer_callback; prueth->emac[mac] = emac; return 0; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 82e38ef5635b..a78c5eb75fb8 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -108,6 +108,8 @@ struct prueth_tx_chn { u32 descs_num; unsigned int irq; char name[32]; + struct hrtimer tx_hrtimer; + unsigned long tx_pace_timeout_ns; }; struct prueth_rx_chn { @@ -127,6 +129,9 @@ struct prueth_rx_chn { #define PRUETH_MAX_TX_TS_REQUESTS 50 /* Max simultaneous TX_TS requests */ +/* Minimum coalesce time in usecs for both Tx and Rx */ +#define ICSSG_MIN_COALESCE_USECS 20 + /* data for each emac port */ struct prueth_emac { bool is_sr1; @@ -183,6 +188,10 @@ struct prueth_emac { struct delayed_work stats_work; u64 stats[ICSSG_NUM_STATS]; + + /* RX IRQ Coalescing Related */ + struct hrtimer rx_hrtimer; + unsigned long rx_pace_timeout_ns; }; /** @@ -320,7 +329,7 @@ void prueth_ndev_del_tx_napi(struct prueth_emac *emac, int num); void prueth_xmit_free(struct prueth_tx_chn *tx_chn, struct cppi5_host_desc_t *desc); int emac_tx_complete_packets(struct prueth_emac *emac, int chn, - int budget); + int budget, bool *tdown); int prueth_ndev_add_tx_napi(struct prueth_emac *emac); int prueth_init_tx_chns(struct prueth_emac *emac); int prueth_init_rx_chns(struct prueth_emac *emac, -- cgit v1.2.3 From a75fbb3aa47a62d76d8b07b49db9e2f0e08fbba7 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Tue, 30 Apr 2024 17:30:51 -0700 Subject: bnxt_en: share NQ ring sw_stats memory with subrings On P5_PLUS chips and later, the NQ rings have subrings for RX and TX completions respectively. These subrings are passed to the poll function instead of the base NQ, but each ring carries its own copy of the software ring statistics. For stats to be conveniently accessible in __bnxt_poll_work(), the statistics memory should either be shared between the NQ and its subrings or the subrings need to be included in the ethtool stats aggregation logic. This patch opts for the former, because it's more efficient and less confusing having the software statistics for a ring exist in a single place. Before this patch, the counter will not be displayed if the "wrong" cpr->sw_stats was used to increment a counter. Link: https://lore.kernel.org/netdev/CACKFLikEhVAJA+osD7UjQNotdGte+fth7zOy7yDdLkTyFk9Pyw@mail.gmail.com/ Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240501003056.100607-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 40 ++++++++++++++--------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 +-- 3 files changed, 27 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index be96bb494ae6..98bff01b89ff 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1811,7 +1811,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping); if (!skb) { bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; + cpr->sw_stats->rx.rx_oom_discards += 1; return NULL; } } else { @@ -1821,7 +1821,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, new_data = __bnxt_alloc_rx_frag(bp, &new_mapping, GFP_ATOMIC); if (!new_data) { bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; + cpr->sw_stats->rx.rx_oom_discards += 1; return NULL; } @@ -1837,7 +1837,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, if (!skb) { skb_free_frag(data); bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; + cpr->sw_stats->rx.rx_oom_discards += 1; return NULL; } skb_reserve(skb, bp->rx_offset); @@ -1848,7 +1848,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, idx, agg_bufs, true); if (!skb) { /* Page reuse already handled by bnxt_rx_pages(). */ - cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; + cpr->sw_stats->rx.rx_oom_discards += 1; return NULL; } } @@ -2106,7 +2106,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, rc = -EIO; if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) { - bnapi->cp_ring.sw_stats.rx.rx_buf_errors++; + bnapi->cp_ring.sw_stats->rx.rx_buf_errors++; if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && !(bp->fw_cap & BNXT_FW_CAP_RING_MONITOR)) { netdev_warn_once(bp->dev, "RX buffer error %x\n", @@ -2222,7 +2222,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } else { if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L4_CS_ERR_BITS) { if (dev->features & NETIF_F_RXCSUM) - bnapi->cp_ring.sw_stats.rx.rx_l4_csum_errors++; + bnapi->cp_ring.sw_stats->rx.rx_l4_csum_errors++; } } @@ -2259,7 +2259,7 @@ next_rx_no_prod_no_len: return rc; oom_next_rx: - cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; + cpr->sw_stats->rx.rx_oom_discards += 1; rc = -ENOMEM; goto next_rx; } @@ -2308,7 +2308,7 @@ static int bnxt_force_rx_discard(struct bnxt *bp, } rc = bnxt_rx_pkt(bp, cpr, raw_cons, event); if (rc && rc != -EBUSY) - cpr->bnapi->cp_ring.sw_stats.rx.rx_netpoll_discards += 1; + cpr->sw_stats->rx.rx_netpoll_discards += 1; return rc; } @@ -3951,6 +3951,7 @@ static int bnxt_alloc_cp_rings(struct bnxt *bp) if (rc) return rc; cpr2->bnapi = bnapi; + cpr2->sw_stats = cpr->sw_stats; cpr2->cp_idx = k; if (!k && rx) { bp->rx_ring[i].rx_cpr = cpr2; @@ -4792,6 +4793,9 @@ static void bnxt_free_ring_stats(struct bnxt *bp) struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; bnxt_free_stats_mem(bp, &cpr->stats); + + kfree(cpr->sw_stats); + cpr->sw_stats = NULL; } } @@ -4806,6 +4810,10 @@ static int bnxt_alloc_stats(struct bnxt *bp) struct bnxt_napi *bnapi = bp->bnapi[i]; struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; + cpr->sw_stats = kzalloc(sizeof(*cpr->sw_stats), GFP_KERNEL); + if (!cpr->sw_stats) + return -ENOMEM; + cpr->stats.len = size; rc = bnxt_alloc_stats_mem(bp, &cpr->stats, !i); if (rc) @@ -10811,9 +10819,9 @@ static void bnxt_disable_napi(struct bnxt *bp) cpr = &bnapi->cp_ring; if (bnapi->tx_fault) - cpr->sw_stats.tx.tx_resets++; + cpr->sw_stats->tx.tx_resets++; if (bnapi->in_reset) - cpr->sw_stats.rx.rx_resets++; + cpr->sw_stats->rx.rx_resets++; napi_disable(&bnapi->napi); if (bnapi->rx_ring) cancel_work_sync(&cpr->dim.work); @@ -12338,8 +12346,8 @@ static void bnxt_get_ring_stats(struct bnxt *bp, stats->tx_dropped += BNXT_GET_RING_STATS64(sw, tx_error_pkts); stats->rx_dropped += - cpr->sw_stats.rx.rx_netpoll_discards + - cpr->sw_stats.rx.rx_oom_discards; + cpr->sw_stats->rx.rx_netpoll_discards + + cpr->sw_stats->rx.rx_oom_discards; } } @@ -12406,7 +12414,7 @@ static void bnxt_get_one_ring_err_stats(struct bnxt *bp, struct bnxt_total_ring_err_stats *stats, struct bnxt_cp_ring_info *cpr) { - struct bnxt_sw_stats *sw_stats = &cpr->sw_stats; + struct bnxt_sw_stats *sw_stats = cpr->sw_stats; u64 *hw_stats = cpr->stats.sw_stats; stats->rx_total_l4_csum_errors += sw_stats->rx.rx_l4_csum_errors; @@ -13249,7 +13257,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) rxr->bnapi->in_reset = false; bnxt_alloc_one_rx_ring(bp, i); cpr = &rxr->bnapi->cp_ring; - cpr->sw_stats.rx.rx_resets++; + cpr->sw_stats->rx.rx_resets++; if (bp->flags & BNXT_FLAG_AGG_RINGS) bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); @@ -13461,7 +13469,7 @@ static void bnxt_chk_missed_irq(struct bnxt *bp) bnxt_dbg_hwrm_ring_info_get(bp, DBG_RING_INFO_GET_REQ_RING_TYPE_L2_CMPL, fw_ring_id, &val[0], &val[1]); - cpr->sw_stats.cmn.missed_irqs++; + cpr->sw_stats->cmn.missed_irqs++; } } } @@ -14769,7 +14777,7 @@ static void bnxt_get_queue_stats_rx(struct net_device *dev, int i, stats->bytes += BNXT_GET_RING_STATS64(sw, rx_mcast_bytes); stats->bytes += BNXT_GET_RING_STATS64(sw, rx_bcast_bytes); - stats->alloc_fail = cpr->sw_stats.rx.rx_oom_discards; + stats->alloc_fail = cpr->sw_stats->rx.rx_oom_discards; } static void bnxt_get_queue_stats_tx(struct net_device *dev, int i, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index ad57ef051798..631b0039d72b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1152,7 +1152,7 @@ struct bnxt_cp_ring_info { struct bnxt_stats_mem stats; u32 hw_stats_ctx_id; - struct bnxt_sw_stats sw_stats; + struct bnxt_sw_stats *sw_stats; struct bnxt_ring_struct cp_ring_struct; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 68444234b268..6de3cfcea90f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -631,13 +631,13 @@ static void bnxt_get_ethtool_stats(struct net_device *dev, buf[j] = sw_stats[k]; skip_tpa_ring_stats: - sw = (u64 *)&cpr->sw_stats.rx; + sw = (u64 *)&cpr->sw_stats->rx; if (is_rx_ring(bp, i)) { for (k = 0; k < NUM_RING_RX_SW_STATS; j++, k++) buf[j] = sw[k]; } - sw = (u64 *)&cpr->sw_stats.cmn; + sw = (u64 *)&cpr->sw_stats->cmn; for (k = 0; k < NUM_RING_CMN_SW_STATS; j++, k++) buf[j] = sw[k]; } -- cgit v1.2.3 From 895621f1c81695da7660fe909173e9f98619e89c Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 30 Apr 2024 17:30:52 -0700 Subject: bnxt_en: Don't support offline self test when RoCE driver is loaded Offline self test is a very disruptive operation for RoCE and requires all active QPs to be destroyed. With a large number of QPs, it can take a long time to destroy all the QPs and can timeout. Do not allow ethtool offline self test if the RoCE driver is registered on the device. Reviewed-by: Selvin Thyparampil Xavier Reviewed-by: Vikas Gupta Reviewed-by: Pavan Chebbi Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240501003056.100607-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 6de3cfcea90f..8763f8a01457 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -4820,6 +4820,14 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, if (!bp->num_tests || !BNXT_PF(bp)) return; + + if (etest->flags & ETH_TEST_FL_OFFLINE && + bnxt_ulp_registered(bp->edev)) { + etest->flags |= ETH_TEST_FL_FAILED; + netdev_warn(dev, "Offline tests cannot be run with RoCE driver loaded\n"); + return; + } + memset(buf, 0, sizeof(u64) * bp->num_tests); if (!netif_running(dev)) { etest->flags |= ETH_TEST_FL_FAILED; @@ -4850,7 +4858,6 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, if (!offline) { bnxt_run_fw_tests(bp, test_mask, &test_results); } else { - bnxt_ulp_stop(bp); bnxt_close_nic(bp, true, false); bnxt_run_fw_tests(bp, test_mask, &test_results); @@ -4861,7 +4868,6 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, if (rc) { bnxt_hwrm_mac_loopback(bp, false); etest->flags |= ETH_TEST_FL_FAILED; - bnxt_ulp_start(bp, rc); return; } if (bnxt_run_loopback(bp)) @@ -4888,7 +4894,6 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, bnxt_hwrm_phy_loopback(bp, false, false); bnxt_half_close_nic(bp); rc = bnxt_open_nic(bp, true, true); - bnxt_ulp_start(bp, rc); } if (rc || bnxt_test_irq(bp)) { buf[BNXT_IRQ_TEST_IDX] = 1; -- cgit v1.2.3 From f79d7a9f1c9d0cba9ae3d0cfd743c277d78dcf45 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 30 Apr 2024 17:30:53 -0700 Subject: bnxt_en: Don't call ULP_STOP/ULP_START during L2 reset There is no need to call ULP_STOP and ULP_START before and after the L2 reset in bnxt_reset_task(). This L2 reset is done after detecting TX timeout, RX ring errors, or VF config changes. The L2 reset does not affect RoCE since the firmware is not reset and the backing store is left alone. Reviewed-by: Andy Gospodarek Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240501003056.100607-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 98bff01b89ff..a4ab1b09b27b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13089,17 +13089,8 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent) if (!silent) bnxt_dbg_dump_states(bp); if (netif_running(bp->dev)) { - int rc; - - if (silent) { - bnxt_close_nic(bp, false, false); - bnxt_open_nic(bp, false, false); - } else { - bnxt_ulp_stop(bp); - bnxt_close_nic(bp, true, false); - rc = bnxt_open_nic(bp, true, false); - bnxt_ulp_start(bp, rc); - } + bnxt_close_nic(bp, !silent, false); + bnxt_open_nic(bp, !silent, false); } } -- cgit v1.2.3 From de21ec442d411b17a2386cb6683acd18b047506d Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 30 Apr 2024 17:30:54 -0700 Subject: bnxt_en: Add a mutex to synchronize ULP operations The current scheme relies heavily on the RTNL lock for all ULP operations between the L2 and the RoCE driver. Add a new en_dev_lock mutex so that the asynchronous ULP_STOP and ULP_START operations can be serialized with bnxt_register_dev() and bnxt_unregister_dev() calls without relying on the RTNL lock. The next patch will remove the RTNL lock from the ULP_STOP and ULP_START calls. Reviewed-by: Selvin Thyparampil Xavier Reviewed-by: Vikas Gupta Reviewed-by: Pavan Chebbi Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240501003056.100607-5-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 20 +++++++++++++++++++- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h | 3 +++ 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index d8927838f1cf..ba3fa1c2e5d9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -113,6 +113,7 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, int rc = 0; rtnl_lock(); + mutex_lock(&edev->en_dev_lock); if (!bp->irq_tbl) { rc = -ENODEV; goto exit; @@ -136,6 +137,7 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, bnxt_fill_msix_vecs(bp, bp->edev->msix_entries); edev->flags |= BNXT_EN_FLAG_MSIX_REQUESTED; exit: + mutex_unlock(&edev->en_dev_lock); rtnl_unlock(); return rc; } @@ -150,6 +152,7 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) ulp = edev->ulp_tbl; rtnl_lock(); + mutex_lock(&edev->en_dev_lock); if (ulp->msix_requested) edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED; edev->ulp_tbl->msix_requested = 0; @@ -165,6 +168,7 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) msleep(100); i++; } + mutex_unlock(&edev->en_dev_lock); rtnl_unlock(); return; } @@ -223,6 +227,12 @@ void bnxt_ulp_stop(struct bnxt *bp) if (!edev) return; + mutex_lock(&edev->en_dev_lock); + if (!bnxt_ulp_registered(edev)) { + mutex_unlock(&edev->en_dev_lock); + return; + } + edev->flags |= BNXT_EN_FLAG_ULP_STOPPED; if (aux_priv) { struct auxiliary_device *adev; @@ -237,6 +247,7 @@ void bnxt_ulp_stop(struct bnxt *bp) adrv->suspend(adev, pm); } } + mutex_unlock(&edev->en_dev_lock); } void bnxt_ulp_start(struct bnxt *bp, int err) @@ -252,6 +263,12 @@ void bnxt_ulp_start(struct bnxt *bp, int err) if (err) return; + mutex_lock(&edev->en_dev_lock); + if (!bnxt_ulp_registered(edev)) { + mutex_unlock(&edev->en_dev_lock); + return; + } + if (edev->ulp_tbl->msix_requested) bnxt_fill_msix_vecs(bp, edev->msix_entries); @@ -267,7 +284,7 @@ void bnxt_ulp_start(struct bnxt *bp, int err) adrv->resume(adev); } } - + mutex_unlock(&edev->en_dev_lock); } void bnxt_ulp_irq_stop(struct bnxt *bp) @@ -383,6 +400,7 @@ static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp) edev->l2_db_size = bp->db_size; edev->l2_db_size_nc = bp->db_size; edev->l2_db_offset = bp->db_offset; + mutex_init(&edev->en_dev_lock); if (bp->flags & BNXT_FLAG_ROCEV1_CAP) edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index b86baf901a5d..4eafe6ec0abf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -88,6 +88,9 @@ struct bnxt_en_dev { u16 ulp_num_msix_vec; u16 ulp_num_ctxs; + + /* serialize ulp operations */ + struct mutex en_dev_lock; }; static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev) -- cgit v1.2.3 From 3c163f35bd50314d4e70ed9e83e1d8d83c473325 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 30 Apr 2024 17:30:55 -0700 Subject: bnxt_en: Optimize recovery path ULP locking in the driver In the error recovery path (AER, firmware recovery, etc), the driver notifies the RoCE driver via ULP_STOP before the reset and via ULP_START after the reset, all under RTNL_LOCK. The RoCE driver can take a long time if there are a lot of QPs to destroy, so it is not ideal to hold the global RTNL lock. Rely on the new en_dev_lock mutex instead for ULP_STOP and ULP_START. For the most part, we move the ULP_STOP call before we take the RTNL lock and move the ULP_START after RTNL unlock. Note that SRIOV re-enablement must be done after ULP_START or RoCE on the VFs will not resume properly after reset. The one scenario in bnxt_hwrm_if_change() where the RTNL lock is already taken in the .ndo_open() context requires the ULP restart to be deferred to the bnxt_sp_task() workqueue. Reviewed-by: Selvin Thyparampil Xavier Reviewed-by: Vikas Gupta Reviewed-by: Pavan Chebbi Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240501003056.100607-6-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 62 ++++++++++++++--------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 7 ++- 3 files changed, 44 insertions(+), 26 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a4ab1b09b27b..ccab7817c036 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11556,7 +11556,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (fw_reset) { set_bit(BNXT_STATE_FW_RESET_DET, &bp->state); if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) - bnxt_ulp_stop(bp); + bnxt_ulp_irq_stop(bp); bnxt_free_ctx_mem(bp); bnxt_dcb_free(bp); rc = bnxt_fw_init_one(bp); @@ -12111,10 +12111,9 @@ static int bnxt_open(struct net_device *dev) bnxt_hwrm_if_change(bp, false); } else { if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) { - if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { - bnxt_ulp_start(bp, 0); - bnxt_reenable_sriov(bp); - } + if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) + bnxt_queue_sp_work(bp, + BNXT_RESTART_ULP_SP_EVENT); } } @@ -13270,7 +13269,6 @@ static void bnxt_fw_fatal_close(struct bnxt *bp) static void bnxt_fw_reset_close(struct bnxt *bp) { - bnxt_ulp_stop(bp); /* When firmware is in fatal state, quiesce device and disable * bus master to prevent any potential bad DMAs before freeing * kernel memory. @@ -13351,6 +13349,7 @@ void bnxt_fw_exception(struct bnxt *bp) { netdev_warn(bp->dev, "Detected firmware fatal condition, initiating reset\n"); set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); + bnxt_ulp_stop(bp); bnxt_rtnl_lock_sp(bp); bnxt_force_fw_reset(bp); bnxt_rtnl_unlock_sp(bp); @@ -13382,6 +13381,7 @@ static int bnxt_get_registered_vfs(struct bnxt *bp) void bnxt_fw_reset(struct bnxt *bp) { + bnxt_ulp_stop(bp); bnxt_rtnl_lock_sp(bp); if (test_bit(BNXT_STATE_OPEN, &bp->state) && !test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { @@ -13506,6 +13506,12 @@ static void bnxt_fw_echo_reply(struct bnxt *bp) hwrm_req_send(bp, req); } +static void bnxt_ulp_restart(struct bnxt *bp) +{ + bnxt_ulp_stop(bp); + bnxt_ulp_start(bp, 0); +} + static void bnxt_sp_task(struct work_struct *work) { struct bnxt *bp = container_of(work, struct bnxt, sp_task); @@ -13517,6 +13523,11 @@ static void bnxt_sp_task(struct work_struct *work) return; } + if (test_and_clear_bit(BNXT_RESTART_ULP_SP_EVENT, &bp->sp_event)) { + bnxt_ulp_restart(bp); + bnxt_reenable_sriov(bp); + } + if (test_and_clear_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event)) bnxt_cfg_rx_mode(bp); @@ -13973,10 +13984,8 @@ static bool bnxt_fw_reset_timeout(struct bnxt *bp) static void bnxt_fw_reset_abort(struct bnxt *bp, int rc) { clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) { - bnxt_ulp_start(bp, rc); + if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) bnxt_dl_health_fw_status_update(bp, false); - } bp->fw_reset_state = 0; dev_close(bp->dev); } @@ -14007,7 +14016,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_state = 0; netdev_err(bp->dev, "Firmware reset aborted, bnxt_get_registered_vfs() returns %d\n", n); - return; + goto ulp_start; } bnxt_queue_fw_reset_work(bp, HZ / 10); return; @@ -14017,7 +14026,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { bnxt_fw_reset_abort(bp, rc); rtnl_unlock(); - return; + goto ulp_start; } bnxt_fw_reset_close(bp); if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) { @@ -14110,7 +14119,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); bnxt_fw_reset_abort(bp, rc); rtnl_unlock(); - return; + goto ulp_start; } if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) && @@ -14122,10 +14131,6 @@ static void bnxt_fw_reset_task(struct work_struct *work) /* Make sure fw_reset_state is 0 before clearing the flag */ smp_mb__before_atomic(); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - bnxt_ulp_start(bp, 0); - bnxt_reenable_sriov(bp); - bnxt_vf_reps_alloc(bp); - bnxt_vf_reps_open(bp); bnxt_ptp_reapply_pps(bp); clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state); if (test_and_clear_bit(BNXT_STATE_RECOVER, &bp->state)) { @@ -14133,6 +14138,12 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_dl_health_fw_status_update(bp, true); } rtnl_unlock(); + bnxt_ulp_start(bp, 0); + bnxt_reenable_sriov(bp); + rtnl_lock(); + bnxt_vf_reps_alloc(bp); + bnxt_vf_reps_open(bp); + rtnl_unlock(); break; } return; @@ -14148,6 +14159,8 @@ fw_reset_abort: rtnl_lock(); bnxt_fw_reset_abort(bp, rc); rtnl_unlock(); +ulp_start: + bnxt_ulp_start(bp, rc); } static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) @@ -15534,8 +15547,9 @@ static int bnxt_suspend(struct device *device) struct bnxt *bp = netdev_priv(dev); int rc = 0; - rtnl_lock(); bnxt_ulp_stop(bp); + + rtnl_lock(); if (netif_running(dev)) { netif_device_detach(dev); rc = bnxt_close(dev); @@ -15590,10 +15604,10 @@ static int bnxt_resume(struct device *device) } resume_exit: + rtnl_unlock(); bnxt_ulp_start(bp, rc); if (!rc) bnxt_reenable_sriov(bp); - rtnl_unlock(); return rc; } @@ -15623,11 +15637,11 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, netdev_info(netdev, "PCI I/O error detected\n"); + bnxt_ulp_stop(bp); + rtnl_lock(); netif_device_detach(netdev); - bnxt_ulp_stop(bp); - if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { netdev_err(bp->dev, "Firmware reset already in progress\n"); abort = true; @@ -15763,13 +15777,13 @@ static void bnxt_io_resume(struct pci_dev *pdev) if (!err && netif_running(netdev)) err = bnxt_open(netdev); - bnxt_ulp_start(bp, err); - if (!err) { - bnxt_reenable_sriov(bp); + if (!err) netif_device_attach(netdev); - } rtnl_unlock(); + bnxt_ulp_start(bp, err); + if (!err) + bnxt_reenable_sriov(bp); } static const struct pci_error_handlers bnxt_err_handler = { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 631b0039d72b..1e15a25b77c7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2440,6 +2440,7 @@ struct bnxt { #define BNXT_LINK_CFG_CHANGE_SP_EVENT 21 #define BNXT_THERMAL_THRESHOLD_SP_EVENT 22 #define BNXT_FW_ECHO_REQUEST_SP_EVENT 23 +#define BNXT_RESTART_ULP_SP_EVENT 24 struct delayed_work fw_reset_task; int fw_reset_state; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index d9ea6fa23923..4cb0fabf977e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -437,18 +437,20 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: { + bnxt_ulp_stop(bp); rtnl_lock(); if (bnxt_sriov_cfg(bp)) { NL_SET_ERR_MSG_MOD(extack, "reload is unsupported while VFs are allocated or being configured"); rtnl_unlock(); + bnxt_ulp_start(bp, 0); return -EOPNOTSUPP; } if (bp->dev->reg_state == NETREG_UNREGISTERED) { rtnl_unlock(); + bnxt_ulp_start(bp, 0); return -ENODEV; } - bnxt_ulp_stop(bp); if (netif_running(bp->dev)) bnxt_close_nic(bp, true, true); bnxt_vf_reps_free(bp); @@ -516,7 +518,6 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti bnxt_vf_reps_alloc(bp); if (netif_running(bp->dev)) rc = bnxt_open_nic(bp, true, true); - bnxt_ulp_start(bp, rc); if (!rc) { bnxt_reenable_sriov(bp); bnxt_ptp_reapply_pps(bp); @@ -570,6 +571,8 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti dev_close(bp->dev); } rtnl_unlock(); + if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT) + bnxt_ulp_start(bp, rc); return rc; } -- cgit v1.2.3 From 54d0b84f400290df93127ef9a562745464908ffb Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Tue, 30 Apr 2024 17:30:56 -0700 Subject: bnxt_en: Add VF PCI ID for 5760X (P7) chips No driver logic changes are required to support the VFs, so just add the VF PCI ID. Reviewed-by: Selvin Thyparampil Xavier Signed-off-by: Ajit Khaparde Signed-off-by: Michael Chan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240501003056.100607-7-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ccab7817c036..78ba383d2fa0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -137,6 +137,7 @@ static const struct { [NETXTREME_E_VF_HV] = { "Broadcom NetXtreme-E Virtual Function for Hyper-V" }, [NETXTREME_E_P5_VF] = { "Broadcom BCM5750X NetXtreme-E Ethernet Virtual Function" }, [NETXTREME_E_P5_VF_HV] = { "Broadcom BCM5750X NetXtreme-E Virtual Function for Hyper-V" }, + [NETXTREME_E_P7_VF] = { "Broadcom BCM5760X Virtual Function" }, }; static const struct pci_device_id bnxt_pci_tbl[] = { @@ -211,6 +212,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x1807), .driver_data = NETXTREME_E_P5_VF }, { PCI_VDEVICE(BROADCOM, 0x1808), .driver_data = NETXTREME_E_P5_VF_HV }, { PCI_VDEVICE(BROADCOM, 0x1809), .driver_data = NETXTREME_E_P5_VF_HV }, + { PCI_VDEVICE(BROADCOM, 0x1819), .driver_data = NETXTREME_E_P7_VF }, { PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF }, #endif { 0 } @@ -294,7 +296,7 @@ static bool bnxt_vf_pciid(enum board_idx idx) return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF || idx == NETXTREME_S_VF || idx == NETXTREME_C_VF_HV || idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF || - idx == NETXTREME_E_P5_VF_HV); + idx == NETXTREME_E_P5_VF_HV || idx == NETXTREME_E_P7_VF); } #define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 1e15a25b77c7..34d82aaa49ed 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2013,6 +2013,7 @@ enum board_idx { NETXTREME_E_VF_HV, NETXTREME_E_P5_VF, NETXTREME_E_P5_VF_HV, + NETXTREME_E_P7_VF, }; struct bnxt { -- cgit v1.2.3 From 3d549c382297f4d7646e327c817107a88d3f931b Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Mon, 22 Apr 2024 15:27:27 +0000 Subject: net/mlx5e: flower: check for unsupported control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use flow_rule_is_supp_control_flags() to reject filters with unsupported control flags. In case any unsupported control flags are masked, flow_rule_is_supp_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Remove FLOW_DIS_FIRST_FRAG specific error message, and treat it as any other unsupported control flag. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Jianbo Liu Reviewed-by: Simon Horman Acked-by: Tariq Toukan Link: https://lore.kernel.org/r/20240422152728.175677-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index aeb32cb27182..30673292e15f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2801,12 +2801,6 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, flow_rule_match_control(rule, &match); addr_type = match.key->addr_type; - /* the HW doesn't support frag first/later */ - if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { - NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported"); - return -EOPNOTSUPP; - } - if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, @@ -2819,6 +2813,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, else *match_level = MLX5_MATCH_L3; } + + if (!flow_rule_is_supp_control_flags(FLOW_DIS_IS_FRAGMENT, + match.mask->flags, extack)) + return -EOPNOTSUPP; } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { -- cgit v1.2.3 From 5bfadc573711a1e68d05d25e10ae747385c4c253 Mon Sep 17 00:00:00 2001 From: David Wei Date: Thu, 2 May 2024 13:37:57 -0700 Subject: bnxt: fix bnxt_get_avail_msix() returning negative values Current net-next/main does not boot for older chipsets e.g. Stratus. Sample dmesg: [ 11.368315] bnxt_en 0000:02:00.0 (unnamed net_device) (uninitialized): Able to reserve only 0 out of 9 requested RX rings [ 11.390181] bnxt_en 0000:02:00.0 (unnamed net_device) (uninitialized): Unable to reserve tx rings [ 11.438780] bnxt_en 0000:02:00.0 (unnamed net_device) (uninitialized): 2nd rings reservation failed. [ 11.487559] bnxt_en 0000:02:00.0 (unnamed net_device) (uninitialized): Not enough rings available. [ 11.506012] bnxt_en 0000:02:00.0: probe with driver bnxt_en failed with error -12 This is caused by bnxt_get_avail_msix() returning a negative value for these chipsets not using the new resource manager i.e. !BNXT_NEW_RM. This in turn causes hwr.cp in __bnxt_reserve_rings() to be set to 0. In the current call stack, __bnxt_reserve_rings() is called from bnxt_set_dflt_rings() before bnxt_init_int_mode(). Therefore, bp->total_irqs is always 0 and for !BNXT_NEW_RM bnxt_get_avail_msix() always returns a negative number. Historically, MSIX vectors were requested by the RoCE driver during run-time and bnxt_get_avail_msix() was used for this purpose. Today, RoCE MSIX vectors are statically allocated. bnxt_get_avail_msix() should only be called for the BNXT_NEW_RM() case to reserve the MSIX ahead of time for RoCE use. bnxt_get_avail_msix() is also be simplified to handle the BNXT_NEW_RM() case only. Fixes: d630624ebd70 ("bnxt_en: Utilize ulp client resources if RoCE is not registered") Signed-off-by: David Wei Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20240502203757.3761827-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 +++++------------ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 - 2 files changed, 5 insertions(+), 13 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 78ba383d2fa0..0d1ed6e5dfbd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7469,6 +7469,8 @@ static bool bnxt_rings_ok(struct bnxt *bp, struct bnxt_hw_rings *hwr) hwr->stat && (hwr->cp_p5 || !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)); } +static int bnxt_get_avail_msix(struct bnxt *bp, int num); + static int __bnxt_reserve_rings(struct bnxt *bp) { struct bnxt_hw_rings hwr = {0}; @@ -7481,7 +7483,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp) if (!bnxt_need_reserve_rings(bp)) return 0; - if (!bnxt_ulp_registered(bp->edev)) { + if (BNXT_NEW_RM(bp) && !bnxt_ulp_registered(bp->edev)) { ulp_msix = bnxt_get_avail_msix(bp, bp->ulp_num_msix_want); if (!ulp_msix) bnxt_set_ulp_stat_ctxs(bp, 0); @@ -10484,19 +10486,10 @@ unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp) return bnxt_get_max_func_stat_ctxs(bp) - bnxt_get_func_stat_ctxs(bp); } -int bnxt_get_avail_msix(struct bnxt *bp, int num) +static int bnxt_get_avail_msix(struct bnxt *bp, int num) { - int max_cp = bnxt_get_max_func_cp_rings(bp); int max_irq = bnxt_get_max_func_irqs(bp); int total_req = bp->cp_nr_rings + num; - int max_idx, avail_msix; - - max_idx = bp->total_irqs; - if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) - max_idx = min_t(int, bp->total_irqs, max_cp); - avail_msix = max_idx - bp->cp_nr_rings; - if (!BNXT_NEW_RM(bp) || avail_msix >= num) - return avail_msix; if (max_irq < total_req) { num = max_irq - bp->cp_nr_rings; @@ -10629,7 +10622,7 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init) if (!bnxt_need_reserve_rings(bp)) return 0; - if (!bnxt_ulp_registered(bp->edev)) { + if (BNXT_NEW_RM(bp) && !bnxt_ulp_registered(bp->edev)) { int ulp_msix = bnxt_get_avail_msix(bp, bp->ulp_num_msix_want); if (ulp_msix > bp->ulp_num_msix_want) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 34d82aaa49ed..656ab81c0272 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2738,7 +2738,6 @@ unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp); unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp); unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp); unsigned int bnxt_get_avail_cp_rings_for_en(struct bnxt *bp); -int bnxt_get_avail_msix(struct bnxt *bp, int num); int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init); void bnxt_tx_disable(struct bnxt *bp); void bnxt_tx_enable(struct bnxt *bp); -- cgit v1.2.3 From 173e7622ccb3f46834bd4176ed363f435e142942 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 2 May 2024 10:54:22 -0700 Subject: Revert "net: mirror skb frag ref/unref helpers" This reverts commit a580ea994fd37f4105028f5a85c38ff6508a2b25. This revert is to resolve Dragos's report of page_pool leak here: https://lore.kernel.org/lkml/20240424165646.1625690-2-dtatulea@nvidia.com/ The reverted patch interacts very badly with commit 2cc3aeb5eccc ("skbuff: Fix a potential race while recycling page_pool packets"). The reverted commit hopes that the pp_recycle + is_pp_page variables do not change between the skb_frag_ref and skb_frag_unref operation. If such a change occurs, the skb_frag_ref/unref will not operate on the same reference type. In the case of Dragos's report, the grabbed ref was a pp ref, but the unref was a page ref, because the pp_recycle setting on the skb was changed. Attempting to fix this issue on the fly is risky. Lets revert and I hope to reland this with better understanding and testing to ensure we don't regress some edge case while streamlining skb reffing. Fixes: a580ea994fd3 ("net: mirror skb frag ref/unref helpers") Reported-by: Dragos Tatulea Signed-off-by: Mina Almasry Link: https://lore.kernel.org/r/20240502175423.2456544-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 2 +- drivers/net/ethernet/sun/cassini.c | 4 +- drivers/net/veth.c | 2 +- include/linux/skbuff_ref.h | 39 ++---------------- net/core/skbuff.c | 46 ++++++++++++++++++++-- net/tls/tls_device_fallback.c | 2 +- 6 files changed, 51 insertions(+), 44 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 3832c2e8ea5a..e8e460a92e0e 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1659,7 +1659,7 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb, for (i = 0; i < record->num_frags; i++) { skb_shinfo(nskb)->frags[i] = record->frags[i]; /* increase the frag ref count */ - __skb_frag_ref(&skb_shinfo(nskb)->frags[i], nskb->pp_recycle); + __skb_frag_ref(&skb_shinfo(nskb)->frags[i]); } skb_shinfo(nskb)->nr_frags = record->num_frags; diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index f058e154a3bc..8f1f43dbb76d 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -2000,7 +2000,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, skb->len += hlen - swivel; skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel); - __skb_frag_ref(frag, skb->pp_recycle); + __skb_frag_ref(frag); /* any more data? */ if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) { @@ -2024,7 +2024,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, frag++; skb_frag_fill_page_desc(frag, page->buffer, 0, hlen); - __skb_frag_ref(frag, skb->pp_recycle); + __skb_frag_ref(frag); RX_USED_ADD(page, hlen + cp->crc_size); } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 0b0293629329..426e68a95067 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -717,7 +717,7 @@ static void veth_xdp_get(struct xdp_buff *xdp) return; for (i = 0; i < sinfo->nr_frags; i++) - __skb_frag_ref(&sinfo->frags[i], false); + __skb_frag_ref(&sinfo->frags[i]); } static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 4dcdbe9fbc5f..11f0a4063403 100644 --- a/include/linux/skbuff_ref.h +++ b/include/linux/skbuff_ref.h @@ -8,47 +8,16 @@ #define _LINUX_SKBUFF_REF_H #include -#include - -#ifdef CONFIG_PAGE_POOL -static inline bool is_pp_page(struct page *page) -{ - return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; -} - -static inline bool napi_pp_get_page(struct page *page) -{ - page = compound_head(page); - - if (!is_pp_page(page)) - return false; - - page_pool_ref_page(page); - return true; -} -#endif - -static inline void skb_page_ref(struct page *page, bool recycle) -{ -#ifdef CONFIG_PAGE_POOL - if (recycle && napi_pp_get_page(page)) - return; -#endif - get_page(page); -} /** * __skb_frag_ref - take an addition reference on a paged fragment. * @frag: the paged fragment - * @recycle: skb->pp_recycle param of the parent skb. False if no parent skb. * - * Takes an additional reference on the paged fragment @frag. Obtains the - * correct reference count depending on whether skb->pp_recycle is set and - * whether the frag is a page pool frag. + * Takes an additional reference on the paged fragment @frag. */ -static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle) +static inline void __skb_frag_ref(skb_frag_t *frag) { - skb_page_ref(skb_frag_page(frag), recycle); + get_page(skb_frag_page(frag)); } /** @@ -60,7 +29,7 @@ static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle) */ static inline void skb_frag_ref(struct sk_buff *skb, int f) { - __skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle); + __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } bool napi_pp_put_page(struct page *page); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 28cd640a6ea9..466999a7515e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -904,6 +904,11 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } +static bool is_pp_page(struct page *page) +{ + return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; +} + int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom) { @@ -1025,6 +1030,37 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data) return napi_pp_put_page(virt_to_page(data)); } +/** + * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb + * @skb: page pool aware skb + * + * Increase the fragment reference count (pp_ref_count) of a skb. This is + * intended to gain fragment references only for page pool aware skbs, + * i.e. when skb->pp_recycle is true, and not for fragments in a + * non-pp-recycling skb. It has a fallback to increase references on normal + * pages, as page pool aware skbs may also have normal page fragments. + */ +static int skb_pp_frag_ref(struct sk_buff *skb) +{ + struct skb_shared_info *shinfo; + struct page *head_page; + int i; + + if (!skb->pp_recycle) + return -EINVAL; + + shinfo = skb_shinfo(skb); + + for (i = 0; i < shinfo->nr_frags; i++) { + head_page = compound_head(skb_frag_page(&shinfo->frags[i])); + if (likely(is_pp_page(head_page))) + page_pool_ref_page(head_page); + else + page_ref_inc(head_page); + } + return 0; +} + static void skb_kfree_head(void *head, unsigned int end_offset) { if (end_offset == SKB_SMALL_HEAD_HEADROOM) @@ -4160,7 +4196,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) to++; } else { - __skb_frag_ref(fragfrom, skb->pp_recycle); + __skb_frag_ref(fragfrom); skb_frag_page_copy(fragto, fragfrom); skb_frag_off_copy(fragto, fragfrom); skb_frag_size_set(fragto, todo); @@ -4810,7 +4846,7 @@ normal: } *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag; - __skb_frag_ref(nskb_frag, nskb->pp_recycle); + __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); if (pos < offset) { @@ -5941,8 +5977,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, /* if the skb is not cloned this does nothing * since we set nr_frags to 0. */ - for (i = 0; i < from_shinfo->nr_frags; i++) - __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle); + if (skb_pp_frag_ref(from)) { + for (i = 0; i < from_shinfo->nr_frags; i++) + __skb_frag_ref(&from_shinfo->frags[i]); + } to->truesize += delta; to->len += len; diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 9237dded4467..f9e3d3d90dcf 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -278,7 +278,7 @@ static int fill_sg_in(struct scatterlist *sg_in, for (i = 0; remaining > 0; i++) { skb_frag_t *frag = &record->frags[i]; - __skb_frag_ref(frag, false); + __skb_frag_ref(frag); sg_set_page(sg_in + i, skb_frag_page(frag), skb_frag_size(frag), skb_frag_off(frag)); -- cgit v1.2.3 From dcecfcf21bd139f90f94f630ae86122a98267685 Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Wed, 1 May 2024 23:25:41 +0000 Subject: gve: Make the GQ RX free queue funcs idempotent Although this is not fixing any existing double free bug, making these functions idempotent allows for a simpler implementation of future ndo hooks that act on a single queue. Tested-by: Mina Almasry Reviewed-by: Praveen Kaligineedi Reviewed-by: Harshitha Ramamurthy Signed-off-by: Shailend Chand Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 9b56e89c4f43..0a3f88170411 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -30,6 +30,9 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, u32 slots = rx->mask + 1; int i; + if (!rx->data.page_info) + return; + if (rx->data.raw_addressing) { for (i = 0; i < slots; i++) gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i], @@ -69,20 +72,26 @@ static void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, int idx = rx->q_num; size_t bytes; - bytes = sizeof(struct gve_rx_desc) * cfg->ring_size; - dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); - rx->desc.desc_ring = NULL; + if (rx->desc.desc_ring) { + bytes = sizeof(struct gve_rx_desc) * cfg->ring_size; + dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); + rx->desc.desc_ring = NULL; + } - dma_free_coherent(dev, sizeof(*rx->q_resources), - rx->q_resources, rx->q_resources_bus); - rx->q_resources = NULL; + if (rx->q_resources) { + dma_free_coherent(dev, sizeof(*rx->q_resources), + rx->q_resources, rx->q_resources_bus); + rx->q_resources = NULL; + } gve_rx_unfill_pages(priv, rx, cfg); - bytes = sizeof(*rx->data.data_ring) * slots; - dma_free_coherent(dev, bytes, rx->data.data_ring, - rx->data.data_bus); - rx->data.data_ring = NULL; + if (rx->data.data_ring) { + bytes = sizeof(*rx->data.data_ring) * slots; + dma_free_coherent(dev, bytes, rx->data.data_ring, + rx->data.data_bus); + rx->data.data_ring = NULL; + } kvfree(rx->qpl_copy_pool); rx->qpl_copy_pool = NULL; -- cgit v1.2.3 From 242f30fe692e5f6407a27a3e9b64f23c9c9b5c83 Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Wed, 1 May 2024 23:25:42 +0000 Subject: gve: Add adminq funcs to add/remove a single Rx queue This allows for implementing future ndo hooks that act on a single queue. Tested-by: Mina Almasry Reviewed-by: Praveen Kaligineedi Reviewed-by: Harshitha Ramamurthy Signed-off-by: Shailend Chand Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_adminq.c | 79 ++++++++++++++++++---------- drivers/net/ethernet/google/gve/gve_adminq.h | 2 + 2 files changed, 54 insertions(+), 27 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index b2b619aa2310..3df8243680d9 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -630,14 +630,15 @@ int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_que return gve_adminq_kick_and_wait(priv); } -static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) +static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv, + union gve_adminq_command *cmd, + u32 queue_index) { struct gve_rx_ring *rx = &priv->rx[queue_index]; - union gve_adminq_command cmd; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE); - cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) { + memset(cmd, 0, sizeof(*cmd)); + cmd->opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE); + cmd->create_rx_queue = (struct gve_adminq_create_rx_queue) { .queue_id = cpu_to_be32(queue_index), .ntfy_id = cpu_to_be32(rx->ntfy_id), .queue_resources_addr = cpu_to_be64(rx->q_resources_bus), @@ -648,13 +649,13 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ? GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id; - cmd.create_rx_queue.rx_desc_ring_addr = + cmd->create_rx_queue.rx_desc_ring_addr = cpu_to_be64(rx->desc.bus), - cmd.create_rx_queue.rx_data_ring_addr = + cmd->create_rx_queue.rx_data_ring_addr = cpu_to_be64(rx->data.data_bus), - cmd.create_rx_queue.index = cpu_to_be32(queue_index); - cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size); + cmd->create_rx_queue.index = cpu_to_be32(queue_index); + cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); + cmd->create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size); } else { u32 qpl_id = 0; @@ -662,25 +663,40 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) qpl_id = GVE_RAW_ADDRESSING_QPL_ID; else qpl_id = rx->dqo.qpl->id; - cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_rx_queue.rx_desc_ring_addr = + cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); + cmd->create_rx_queue.rx_desc_ring_addr = cpu_to_be64(rx->dqo.complq.bus); - cmd.create_rx_queue.rx_data_ring_addr = + cmd->create_rx_queue.rx_data_ring_addr = cpu_to_be64(rx->dqo.bufq.bus); - cmd.create_rx_queue.packet_buffer_size = + cmd->create_rx_queue.packet_buffer_size = cpu_to_be16(priv->data_buffer_size_dqo); - cmd.create_rx_queue.rx_buff_ring_size = + cmd->create_rx_queue.rx_buff_ring_size = cpu_to_be16(priv->rx_desc_cnt); - cmd.create_rx_queue.enable_rsc = + cmd->create_rx_queue.enable_rsc = !!(priv->dev->features & NETIF_F_LRO); if (priv->header_split_enabled) - cmd.create_rx_queue.header_buffer_size = + cmd->create_rx_queue.header_buffer_size = cpu_to_be16(priv->header_buf_size); } +} + +static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; + gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index); return gve_adminq_issue_cmd(priv, &cmd); } +/* Unlike gve_adminq_create_rx_queue, this actually rings the doorbell */ +int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; + + gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index); + return gve_adminq_execute_cmd(priv, &cmd); +} + int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues) { int err; @@ -727,22 +743,31 @@ int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_qu return gve_adminq_kick_and_wait(priv); } +static void gve_adminq_make_destroy_rx_queue_cmd(union gve_adminq_command *cmd, + u32 queue_index) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE); + cmd->destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) { + .queue_id = cpu_to_be32(queue_index), + }; +} + static int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index) { union gve_adminq_command cmd; - int err; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE); - cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) { - .queue_id = cpu_to_be32(queue_index), - }; + gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index); + return gve_adminq_issue_cmd(priv, &cmd); +} - err = gve_adminq_issue_cmd(priv, &cmd); - if (err) - return err; +/* Unlike gve_adminq_destroy_rx_queue, this actually rings the doorbell */ +int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; - return 0; + gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index); + return gve_adminq_execute_cmd(priv, &cmd); } int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index beedf2353847..e64f0dbe744d 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -451,7 +451,9 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv, int gve_adminq_deconfigure_device_resources(struct gve_priv *priv); int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues); int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues); +int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index); int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues); +int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index); int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id); int gve_adminq_register_page_list(struct gve_priv *priv, struct gve_queue_page_list *qpl); -- cgit v1.2.3 From 5abc37bdcbc5a32f7c5cb21f5c0334cbf0e81752 Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Wed, 1 May 2024 23:25:43 +0000 Subject: gve: Make gve_turn(up|down) ignore stopped queues Currently the queues are either all live or all dead, toggling from one state to the other via the ndo open and stop hooks. The future addition of single-queue ndo hooks changes this, and thus gve_turnup and gve_turndown should evolve to account for a state where some queues are live and some aren't. Tested-by: Mina Almasry Reviewed-by: Praveen Kaligineedi Reviewed-by: Harshitha Ramamurthy Signed-off-by: Shailend Chand Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 61039e3dd2bb..469a914c71d6 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1937,12 +1937,16 @@ static void gve_turndown(struct gve_priv *priv) int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + if (!gve_tx_was_added_to_block(priv, idx)) + continue; napi_disable(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + if (!gve_rx_was_added_to_block(priv, idx)) + continue; napi_disable(&block->napi); } @@ -1965,6 +1969,9 @@ static void gve_turnup(struct gve_priv *priv) int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + if (!gve_tx_was_added_to_block(priv, idx)) + continue; + napi_enable(&block->napi); if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); @@ -1977,6 +1984,9 @@ static void gve_turnup(struct gve_priv *priv) int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + if (!gve_rx_was_added_to_block(priv, idx)) + continue; + napi_enable(&block->napi); if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); -- cgit v1.2.3 From 864616d97a4505a719d5f67c29d31776038d39ef Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Wed, 1 May 2024 23:25:44 +0000 Subject: gve: Make gve_turnup work for nonempty queues gVNIC has a requirement that all queues have to be quiesced before any queue is operated on (created or destroyed). To enable the implementation of future ndo hooks that work on a single queue, we need to evolve gve_turnup to account for queues already having some unprocessed descriptors in the ring. Say rxq 4 is being stopped and started via the queue api. Due to gve's requirement of quiescence, queues 0 through 3 are not processing their rings while queue 4 is being toggled. Once they are made live, these queues need to be poked to cause them to check their rings for descriptors that were written during their brief period of quiescence. Tested-by: Mina Almasry Reviewed-by: Praveen Kaligineedi Reviewed-by: Harshitha Ramamurthy Signed-off-by: Shailend Chand Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 469a914c71d6..ef902b72b9a9 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1979,6 +1979,13 @@ static void gve_turnup(struct gve_priv *priv) gve_set_itr_coalesce_usecs_dqo(priv, block, priv->tx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -1994,6 +2001,13 @@ static void gve_turnup(struct gve_priv *priv) gve_set_itr_coalesce_usecs_dqo(priv, block, priv->rx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } gve_set_napi_enabled(priv); -- cgit v1.2.3 From 9a5e0776d11f1ac9c740a6e24ff0e0facb6e3ddb Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Wed, 1 May 2024 23:25:45 +0000 Subject: gve: Avoid rescheduling napi if on wrong cpu In order to make possible the implementation of per-queue ndo hooks, gve_turnup was changed in a previous patch to account for queues already having some unprocessed descriptors: it does a one-off napi_schdule to handle them. If conditions of consistent high traffic persist in the immediate aftermath of this, the poll routine for a queue can be "stuck" on the cpu on which the ndo hooks ran, instead of the cpu its irq has affinity with. This situation is exacerbated by the fact that the ndo hooks for all the queues are invoked on the same cpu, potentially causing all the napi poll routines to be residing on the same cpu. A self correcting mechanism in the poll method itself solves this problem. Tested-by: Mina Almasry Reviewed-by: Praveen Kaligineedi Reviewed-by: Harshitha Ramamurthy Signed-off-by: Shailend Chand Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 1 + drivers/net/ethernet/google/gve/gve_main.c | 33 ++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 53b5244dc7bc..f27a6d5fbecf 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -610,6 +610,7 @@ struct gve_notify_block { struct gve_priv *priv; struct gve_tx_ring *tx; /* tx rings on this block */ struct gve_rx_ring *rx; /* rx rings on this block */ + u32 irq; }; /* Tracks allowed and current queue settings */ diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index ef902b72b9a9..79b7a677ec0b 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -253,6 +254,18 @@ static irqreturn_t gve_intr_dqo(int irq, void *arg) return IRQ_HANDLED; } +static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) +{ + int cpu_curr = smp_processor_id(); + const struct cpumask *aff_mask; + + aff_mask = irq_get_effective_affinity_mask(irq); + if (unlikely(!aff_mask)) + return 1; + + return cpumask_test_cpu(cpu_curr, aff_mask); +} + int gve_napi_poll(struct napi_struct *napi, int budget) { struct gve_notify_block *block; @@ -322,8 +335,21 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget) reschedule |= work_done == budget; } - if (reschedule) - return budget; + if (reschedule) { + /* Reschedule by returning budget only if already on the correct + * cpu. + */ + if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) + return budget; + + /* If not on the cpu with which this queue's irq has affinity + * with, we avoid rescheduling napi and arm the irq instead so + * that napi gets rescheduled back eventually onto the right + * cpu. + */ + if (work_done == budget) + work_done--; + } if (likely(napi_complete_done(napi, work_done))) { /* Enable interrupts again. @@ -428,6 +454,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) "Failed to receive msix vector %d\n", i); goto abort_with_some_ntfy_blocks; } + block->irq = priv->msix_vectors[msix_idx].vector; irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, get_cpu_mask(i % active_cpus)); block->irq_db_index = &priv->irq_db_indices[i].index; @@ -441,6 +468,7 @@ abort_with_some_ntfy_blocks: irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } kvfree(priv->ntfy_blocks); priv->ntfy_blocks = NULL; @@ -474,6 +502,7 @@ static void gve_free_notify_blocks(struct gve_priv *priv) irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); kvfree(priv->ntfy_blocks); -- cgit v1.2.3 From 770f52d5a0ed9ea3e3b8f04927eac520cab97935 Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Wed, 1 May 2024 23:25:46 +0000 Subject: gve: Reset Rx ring state in the ring-stop funcs This does not fix any existing bug. In anticipation of the ndo queue api hooks that alloc/free/start/stop a single Rx queue, the already existing per-queue stop functions are being made more robust. Specifically for this use case: rx_queue_n.stop() + rx_queue_n.start() Note that this is not the use case being used in devmem tcp (the first place these new ndo hooks would be used). There the usecase is: new_queue.alloc() + old_queue.stop() + new_queue.start() + old_queue.free() Tested-by: Mina Almasry Reviewed-by: Praveen Kaligineedi Reviewed-by: Harshitha Ramamurthy Signed-off-by: Shailend Chand Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx.c | 48 ++++++++++--- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 102 ++++++++++++++++++++++----- 2 files changed, 120 insertions(+), 30 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 0a3f88170411..79c1d8f63621 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -53,6 +53,41 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, rx->data.page_info = NULL; } +static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx) +{ + ctx->skb_head = NULL; + ctx->skb_tail = NULL; + ctx->total_size = 0; + ctx->frag_cnt = 0; + ctx->drop_pkt = false; +} + +static void gve_rx_init_ring_state_gqi(struct gve_rx_ring *rx) +{ + rx->desc.seqno = 1; + rx->cnt = 0; + gve_rx_ctx_clear(&rx->ctx); +} + +static void gve_rx_reset_ring_gqi(struct gve_priv *priv, int idx) +{ + struct gve_rx_ring *rx = &priv->rx[idx]; + const u32 slots = priv->rx_desc_cnt; + size_t size; + + /* Reset desc ring */ + if (rx->desc.desc_ring) { + size = slots * sizeof(rx->desc.desc_ring[0]); + memset(rx->desc.desc_ring, 0, size); + } + + /* Reset q_resources */ + if (rx->q_resources) + memset(rx->q_resources, 0, sizeof(*rx->q_resources)); + + gve_rx_init_ring_state_gqi(rx); +} + void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -62,6 +97,7 @@ void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx) gve_remove_napi(priv, ntfy_idx); gve_rx_remove_from_block(priv, idx); + gve_rx_reset_ring_gqi(priv, idx); } static void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, @@ -222,15 +258,6 @@ alloc_err_rda: return err; } -static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx) -{ - ctx->skb_head = NULL; - ctx->skb_tail = NULL; - ctx->total_size = 0; - ctx->frag_cnt = 0; - ctx->drop_pkt = false; -} - void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -309,9 +336,8 @@ static int gve_rx_alloc_ring_gqi(struct gve_priv *priv, err = -ENOMEM; goto abort_with_q_resources; } - rx->cnt = 0; rx->db_threshold = slots / 2; - rx->desc.seqno = 1; + gve_rx_init_ring_state_gqi(rx); rx->packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_rx_ctx_clear(&rx->ctx); diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 53fd2d87233f..7c2980c212f4 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -211,6 +211,82 @@ static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) } } +static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx, + const u32 buffer_queue_slots, + const u32 completion_queue_slots) +{ + int i; + + /* Set buffer queue state */ + rx->dqo.bufq.mask = buffer_queue_slots - 1; + rx->dqo.bufq.head = 0; + rx->dqo.bufq.tail = 0; + + /* Set completion queue state */ + rx->dqo.complq.num_free_slots = completion_queue_slots; + rx->dqo.complq.mask = completion_queue_slots - 1; + rx->dqo.complq.cur_gen_bit = 0; + rx->dqo.complq.head = 0; + + /* Set RX SKB context */ + rx->ctx.skb_head = NULL; + rx->ctx.skb_tail = NULL; + + /* Set up linked list of buffer IDs */ + if (rx->dqo.buf_states) { + for (i = 0; i < rx->dqo.num_buf_states - 1; i++) + rx->dqo.buf_states[i].next = i + 1; + rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; + } + + rx->dqo.free_buf_states = 0; + rx->dqo.recycled_buf_states.head = -1; + rx->dqo.recycled_buf_states.tail = -1; + rx->dqo.used_buf_states.head = -1; + rx->dqo.used_buf_states.tail = -1; +} + +static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx) +{ + struct gve_rx_ring *rx = &priv->rx[idx]; + size_t size; + int i; + + const u32 buffer_queue_slots = priv->rx_desc_cnt; + const u32 completion_queue_slots = priv->rx_desc_cnt; + + /* Reset buffer queue */ + if (rx->dqo.bufq.desc_ring) { + size = sizeof(rx->dqo.bufq.desc_ring[0]) * + buffer_queue_slots; + memset(rx->dqo.bufq.desc_ring, 0, size); + } + + /* Reset completion queue */ + if (rx->dqo.complq.desc_ring) { + size = sizeof(rx->dqo.complq.desc_ring[0]) * + completion_queue_slots; + memset(rx->dqo.complq.desc_ring, 0, size); + } + + /* Reset q_resources */ + if (rx->q_resources) + memset(rx->q_resources, 0, sizeof(*rx->q_resources)); + + /* Reset buf states */ + if (rx->dqo.buf_states) { + for (i = 0; i < rx->dqo.num_buf_states; i++) { + struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; + + if (bs->page_info.page) + gve_free_page_dqo(priv, bs, !rx->dqo.qpl); + } + } + + gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, + completion_queue_slots); +} + void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -220,6 +296,7 @@ void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) gve_remove_napi(priv, ntfy_idx); gve_rx_remove_from_block(priv, idx); + gve_rx_reset_ring_dqo(priv, idx); } static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, @@ -273,10 +350,10 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); } -static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) +static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx, + const u32 buf_count) { struct device *hdev = &priv->pdev->dev; - int buf_count = rx->dqo.bufq.mask + 1; rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count, &rx->dqo.hdr_bufs.addr, GFP_KERNEL); @@ -301,7 +378,6 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, { struct device *hdev = &priv->pdev->dev; size_t size; - int i; const u32 buffer_queue_slots = cfg->ring_size; const u32 completion_queue_slots = cfg->ring_size; @@ -311,11 +387,6 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, memset(rx, 0, sizeof(*rx)); rx->gve = priv; rx->q_num = idx; - rx->dqo.bufq.mask = buffer_queue_slots - 1; - rx->dqo.complq.num_free_slots = completion_queue_slots; - rx->dqo.complq.mask = completion_queue_slots - 1; - rx->ctx.skb_head = NULL; - rx->ctx.skb_tail = NULL; rx->dqo.num_buf_states = cfg->raw_addressing ? min_t(s16, S16_MAX, buffer_queue_slots * 4) : @@ -328,19 +399,9 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, /* Allocate header buffers for header-split */ if (cfg->enable_header_split) - if (gve_rx_alloc_hdr_bufs(priv, rx)) + if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots)) goto err; - /* Set up linked list of buffer IDs */ - for (i = 0; i < rx->dqo.num_buf_states - 1; i++) - rx->dqo.buf_states[i].next = i + 1; - - rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; - rx->dqo.recycled_buf_states.head = -1; - rx->dqo.recycled_buf_states.tail = -1; - rx->dqo.used_buf_states.head = -1; - rx->dqo.used_buf_states.tail = -1; - /* Allocate RX completion queue */ size = sizeof(rx->dqo.complq.desc_ring[0]) * completion_queue_slots; @@ -368,6 +429,9 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, if (!rx->q_resources) goto err; + gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, + completion_queue_slots); + return 0; err: -- cgit v1.2.3 From af9bcf910b1f86244f39e15e701b2dc564b469a6 Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Wed, 1 May 2024 23:25:47 +0000 Subject: gve: Account for stopped queues when reading NIC stats We now account for the fact that the NIC might send us stats for a subset of queues. Without this change, gve_get_ethtool_stats might make an invalid access on the priv->stats_report->stats array. Tested-by: Mina Almasry Reviewed-by: Praveen Kaligineedi Reviewed-by: Harshitha Ramamurthy Signed-off-by: Shailend Chand Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_ethtool.c | 41 +++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index bd7632eed776..a606670a9a39 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -8,6 +8,7 @@ #include "gve.h" #include "gve_adminq.h" #include "gve_dqo.h" +#include "gve_utils.h" static void gve_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) @@ -165,6 +166,8 @@ gve_get_ethtool_stats(struct net_device *netdev, struct stats *report_stats; int *rx_qid_to_stats_idx; int *tx_qid_to_stats_idx; + int num_stopped_rxqs = 0; + int num_stopped_txqs = 0; struct gve_priv *priv; bool skip_nic_stats; unsigned int start; @@ -181,12 +184,23 @@ gve_get_ethtool_stats(struct net_device *netdev, sizeof(int), GFP_KERNEL); if (!rx_qid_to_stats_idx) return; + for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { + rx_qid_to_stats_idx[ring] = -1; + if (!gve_rx_was_added_to_block(priv, ring)) + num_stopped_rxqs++; + } tx_qid_to_stats_idx = kmalloc_array(num_tx_queues, sizeof(int), GFP_KERNEL); if (!tx_qid_to_stats_idx) { kfree(rx_qid_to_stats_idx); return; } + for (ring = 0; ring < num_tx_queues; ring++) { + tx_qid_to_stats_idx[ring] = -1; + if (!gve_tx_was_added_to_block(priv, ring)) + num_stopped_txqs++; + } + for (rx_pkts = 0, rx_bytes = 0, rx_hsplit_pkt = 0, rx_skb_alloc_fail = 0, rx_buf_alloc_fail = 0, rx_desc_err_dropped_pkt = 0, rx_hsplit_unsplit_pkt = 0, @@ -260,7 +274,13 @@ gve_get_ethtool_stats(struct net_device *netdev, /* For rx cross-reporting stats, start from nic rx stats in report */ base_stats_idx = GVE_TX_STATS_REPORT_NUM * num_tx_queues + GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues; - max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues + + /* The boundary between driver stats and NIC stats shifts if there are + * stopped queues. + */ + base_stats_idx += NIC_RX_STATS_REPORT_NUM * num_stopped_rxqs + + NIC_TX_STATS_REPORT_NUM * num_stopped_txqs; + max_stats_idx = NIC_RX_STATS_REPORT_NUM * + (priv->rx_cfg.num_queues - num_stopped_rxqs) + base_stats_idx; /* Preprocess the stats report for rx, map queue id to start index */ skip_nic_stats = false; @@ -274,6 +294,10 @@ gve_get_ethtool_stats(struct net_device *netdev, skip_nic_stats = true; break; } + if (queue_id < 0 || queue_id >= priv->rx_cfg.num_queues) { + net_err_ratelimited("Invalid rxq id in NIC stats\n"); + continue; + } rx_qid_to_stats_idx[queue_id] = stats_idx; } /* walk RX rings */ @@ -308,11 +332,11 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx->rx_copybreak_pkt; data[i++] = rx->rx_copied_pkt; /* stats from NIC */ - if (skip_nic_stats) { + stats_idx = rx_qid_to_stats_idx[ring]; + if (skip_nic_stats || stats_idx < 0) { /* skip NIC rx stats */ i += NIC_RX_STATS_REPORT_NUM; } else { - stats_idx = rx_qid_to_stats_idx[ring]; for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) { u64 value = be64_to_cpu(report_stats[stats_idx + j].value); @@ -338,7 +362,8 @@ gve_get_ethtool_stats(struct net_device *netdev, /* For tx cross-reporting stats, start from nic tx stats in report */ base_stats_idx = max_stats_idx; - max_stats_idx = NIC_TX_STATS_REPORT_NUM * num_tx_queues + + max_stats_idx = NIC_TX_STATS_REPORT_NUM * + (num_tx_queues - num_stopped_txqs) + max_stats_idx; /* Preprocess the stats report for tx, map queue id to start index */ skip_nic_stats = false; @@ -352,6 +377,10 @@ gve_get_ethtool_stats(struct net_device *netdev, skip_nic_stats = true; break; } + if (queue_id < 0 || queue_id >= num_tx_queues) { + net_err_ratelimited("Invalid txq id in NIC stats\n"); + continue; + } tx_qid_to_stats_idx[queue_id] = stats_idx; } /* walk TX rings */ @@ -383,11 +412,11 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = gve_tx_load_event_counter(priv, tx); data[i++] = tx->dma_mapping_error; /* stats from NIC */ - if (skip_nic_stats) { + stats_idx = tx_qid_to_stats_idx[ring]; + if (skip_nic_stats || stats_idx < 0) { /* skip NIC tx stats */ i += NIC_TX_STATS_REPORT_NUM; } else { - stats_idx = tx_qid_to_stats_idx[ring]; for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) { u64 value = be64_to_cpu(report_stats[stats_idx + j].value); -- cgit v1.2.3 From ee24284e2a1075966f0f2c5499c59b7d2b9bc2de Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Wed, 1 May 2024 23:25:48 +0000 Subject: gve: Alloc and free QPLs with the rings Every tx and rx ring has its own queue-page-list (QPL) that serves as the bounce buffer. Previously we were allocating QPLs for all queues before the queues themselves were allocated and later associating a QPL with a queue. This is avoidable complexity: it is much more natural for each queue to allocate and free its own QPL. Moreover, the advent of new queue-manipulating ndo hooks make it hard to keep things as is: we would need to transfer a QPL from an old queue to a new queue, and that is unpleasant. Tested-by: Mina Almasry Reviewed-by: Praveen Kaligineedi Reviewed-by: Harshitha Ramamurthy Signed-off-by: Shailend Chand Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 30 +-- drivers/net/ethernet/google/gve/gve_ethtool.c | 7 +- drivers/net/ethernet/google/gve/gve_main.c | 343 +++++++------------------- drivers/net/ethernet/google/gve/gve_rx.c | 43 ++-- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 23 +- drivers/net/ethernet/google/gve/gve_tx.c | 33 ++- drivers/net/ethernet/google/gve/gve_tx_dqo.c | 23 +- 7 files changed, 171 insertions(+), 331 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index f27a6d5fbecf..9e0a433c991c 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -638,26 +638,10 @@ struct gve_ptype_lut { struct gve_ptype ptypes[GVE_NUM_PTYPES]; }; -/* Parameters for allocating queue page lists */ -struct gve_qpls_alloc_cfg { - struct gve_queue_config *tx_cfg; - struct gve_queue_config *rx_cfg; - - u16 num_xdp_queues; - bool raw_addressing; - bool is_gqi; - - /* Allocated resources are returned here */ - struct gve_queue_page_list *qpls; -}; - /* Parameters for allocating resources for tx queues */ struct gve_tx_alloc_rings_cfg { struct gve_queue_config *qcfg; - /* qpls must already be allocated */ - struct gve_queue_page_list *qpls; - u16 ring_size; u16 start_idx; u16 num_rings; @@ -673,9 +657,6 @@ struct gve_rx_alloc_rings_cfg { struct gve_queue_config *qcfg; struct gve_queue_config *qcfg_tx; - /* qpls must already be allocated */ - struct gve_queue_page_list *qpls; - u16 ring_size; u16 packet_buffer_size; bool raw_addressing; @@ -701,7 +682,6 @@ struct gve_priv { struct net_device *dev; struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */ struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */ - struct gve_queue_page_list *qpls; /* array of num qpls */ struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */ struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */ dma_addr_t irq_db_indices_bus; @@ -1025,7 +1005,6 @@ static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid) return priv->tx_cfg.max_queues + rx_qid; } -/* Returns the index into priv->qpls where a certain rx queue's QPL resides */ static inline u32 gve_get_rx_qpl_id(const struct gve_queue_config *tx_cfg, int rx_qid) { return tx_cfg->max_queues + rx_qid; @@ -1036,7 +1015,6 @@ static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv) return gve_tx_qpl_id(priv, 0); } -/* Returns the index into priv->qpls where the first rx queue's QPL resides */ static inline u32 gve_rx_start_qpl_id(const struct gve_queue_config *tx_cfg) { return gve_get_rx_qpl_id(tx_cfg, 0); @@ -1090,6 +1068,12 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, enum dma_data_direction, gfp_t gfp_flags); void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, enum dma_data_direction); +/* qpls */ +struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, + u32 id, int pages); +void gve_free_queue_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl, + u32 id); /* tx handling */ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -1126,11 +1110,9 @@ int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split); void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); void gve_get_curr_alloc_cfgs(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); int gve_adjust_config(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); int gve_adjust_queues(struct gve_priv *priv, diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index a606670a9a39..156b7e128b53 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -538,20 +538,17 @@ static int gve_adjust_ring_sizes(struct gve_priv *priv, { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; int err; /* get current queue configuration */ - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); /* copy over the new ring_size from ethtool */ tx_alloc_cfg.ring_size = new_tx_desc_cnt; rx_alloc_cfg.ring_size = new_rx_desc_cnt; if (netif_running(priv->dev)) { - err = gve_adjust_config(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) return err; } diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 79b7a677ec0b..e22ac764ec4f 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -611,37 +611,36 @@ static void gve_teardown_device_resources(struct gve_priv *priv) gve_clear_device_resources_ok(priv); } -static int gve_unregister_qpl(struct gve_priv *priv, u32 i) +static int gve_unregister_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) { int err; - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); + if (!qpl) + return 0; + + err = gve_adminq_unregister_page_list(priv, qpl->id); if (err) { netif_err(priv, drv, priv->dev, "Failed to unregister queue page list %d\n", - priv->qpls[i].id); + qpl->id); return err; } - priv->num_registered_pages -= priv->qpls[i].num_entries; + priv->num_registered_pages -= qpl->num_entries; return 0; } -static int gve_register_qpl(struct gve_priv *priv, u32 i) +static int gve_register_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) { - int num_rx_qpls; int pages; int err; - /* Rx QPLs succeed Tx QPLs in the priv->qpls array. */ - num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); - if (i >= gve_rx_start_qpl_id(&priv->tx_cfg) + num_rx_qpls) { - netif_err(priv, drv, priv->dev, - "Cannot register nonexisting QPL at index %d\n", i); - return -EINVAL; - } + if (!qpl) + return 0; - pages = priv->qpls[i].num_entries; + pages = qpl->num_entries; if (pages + priv->num_registered_pages > priv->max_registered_pages) { netif_err(priv, drv, priv->dev, @@ -651,14 +650,11 @@ static int gve_register_qpl(struct gve_priv *priv, u32 i) return -EINVAL; } - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); + err = gve_adminq_register_page_list(priv, qpl); if (err) { netif_err(priv, drv, priv->dev, "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ + qpl->id); return err; } @@ -666,6 +662,26 @@ static int gve_register_qpl(struct gve_priv *priv, u32 i) return 0; } +static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) +{ + struct gve_tx_ring *tx = &priv->tx[idx]; + + if (gve_is_gqi(priv)) + return tx->tx_fifo.qpl; + else + return tx->dqo.qpl; +} + +static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) +{ + struct gve_rx_ring *rx = &priv->rx[idx]; + + if (gve_is_gqi(priv)) + return rx->data.qpl; + else + return rx->dqo.qpl; +} + static int gve_register_xdp_qpls(struct gve_priv *priv) { int start_id; @@ -674,7 +690,7 @@ static int gve_register_xdp_qpls(struct gve_priv *priv) start_id = gve_xdp_tx_start_queue_id(priv); for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_register_qpl(priv, i); + err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); /* This failure will trigger a reset - no need to clean up */ if (err) return err; @@ -685,7 +701,6 @@ static int gve_register_xdp_qpls(struct gve_priv *priv) static int gve_register_qpls(struct gve_priv *priv) { int num_tx_qpls, num_rx_qpls; - int start_id; int err; int i; @@ -694,15 +709,13 @@ static int gve_register_qpls(struct gve_priv *priv) num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); for (i = 0; i < num_tx_qpls; i++) { - err = gve_register_qpl(priv, i); + err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); if (err) return err; } - /* there might be a gap between the tx and rx qpl ids */ - start_id = gve_rx_start_qpl_id(&priv->tx_cfg); for (i = 0; i < num_rx_qpls; i++) { - err = gve_register_qpl(priv, start_id + i); + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); if (err) return err; } @@ -718,7 +731,7 @@ static int gve_unregister_xdp_qpls(struct gve_priv *priv) start_id = gve_xdp_tx_start_queue_id(priv); for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_unregister_qpl(priv, i); + err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); /* This failure will trigger a reset - no need to clean */ if (err) return err; @@ -729,7 +742,6 @@ static int gve_unregister_xdp_qpls(struct gve_priv *priv) static int gve_unregister_qpls(struct gve_priv *priv) { int num_tx_qpls, num_rx_qpls; - int start_id; int err; int i; @@ -738,15 +750,14 @@ static int gve_unregister_qpls(struct gve_priv *priv) num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); for (i = 0; i < num_tx_qpls; i++) { - err = gve_unregister_qpl(priv, i); + err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); /* This failure will trigger a reset - no need to clean */ if (err) return err; } - start_id = gve_rx_start_qpl_id(&priv->tx_cfg); for (i = 0; i < num_rx_qpls; i++) { - err = gve_unregister_qpl(priv, start_id + i); + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); /* This failure will trigger a reset - no need to clean */ if (err) return err; @@ -857,7 +868,6 @@ static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, { cfg->qcfg = &priv->tx_cfg; cfg->raw_addressing = !gve_is_qpl(priv); - cfg->qpls = priv->qpls; cfg->ring_size = priv->tx_desc_cnt; cfg->start_idx = 0; cfg->num_rings = gve_num_tx_queues(priv); @@ -914,9 +924,9 @@ static int gve_alloc_xdp_rings(struct gve_priv *priv) return 0; } -static int gve_alloc_rings(struct gve_priv *priv, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +static int gve_queues_mem_alloc(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int err; @@ -1002,9 +1012,9 @@ static void gve_free_xdp_rings(struct gve_priv *priv) } } -static void gve_free_rings(struct gve_priv *priv, - struct gve_tx_alloc_rings_cfg *tx_cfg, - struct gve_rx_alloc_rings_cfg *rx_cfg) +static void gve_queues_mem_free(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_cfg, + struct gve_rx_alloc_rings_cfg *rx_cfg) { if (gve_is_gqi(priv)) { gve_tx_free_rings_gqi(priv, tx_cfg); @@ -1033,35 +1043,41 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, return 0; } -static int gve_alloc_queue_page_list(struct gve_priv *priv, - struct gve_queue_page_list *qpl, - u32 id, int pages) +struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, + u32 id, int pages) { + struct gve_queue_page_list *qpl; int err; int i; + qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); + if (!qpl) + return NULL; + qpl->id = id; qpl->num_entries = 0; qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); - /* caller handles clean up */ if (!qpl->pages) - return -ENOMEM; + goto abort; + qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); - /* caller handles clean up */ if (!qpl->page_buses) - return -ENOMEM; + goto abort; for (i = 0; i < pages; i++) { err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], &qpl->page_buses[i], gve_qpl_dma_dir(priv, id), GFP_KERNEL); - /* caller handles clean up */ if (err) - return -ENOMEM; + goto abort; qpl->num_entries++; } - return 0; + return qpl; + +abort: + gve_free_queue_page_list(priv, qpl, id); + return NULL; } void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, @@ -1073,14 +1089,16 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, put_page(page); } -static void gve_free_queue_page_list(struct gve_priv *priv, - struct gve_queue_page_list *qpl, - int id) +void gve_free_queue_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl, + u32 id) { int i; - if (!qpl->pages) + if (!qpl) return; + if (!qpl->pages) + goto free_qpl; if (!qpl->page_buses) goto free_pages; @@ -1093,109 +1111,8 @@ static void gve_free_queue_page_list(struct gve_priv *priv, free_pages: kvfree(qpl->pages); qpl->pages = NULL; -} - -static void gve_free_n_qpls(struct gve_priv *priv, - struct gve_queue_page_list *qpls, - int start_id, - int num_qpls) -{ - int i; - - for (i = start_id; i < start_id + num_qpls; i++) - gve_free_queue_page_list(priv, &qpls[i], i); -} - -static int gve_alloc_n_qpls(struct gve_priv *priv, - struct gve_queue_page_list *qpls, - int page_count, - int start_id, - int num_qpls) -{ - int err; - int i; - - for (i = start_id; i < start_id + num_qpls; i++) { - err = gve_alloc_queue_page_list(priv, &qpls[i], i, page_count); - if (err) - goto free_qpls; - } - - return 0; - -free_qpls: - /* Must include the failing QPL too for gve_alloc_queue_page_list fails - * without cleaning up. - */ - gve_free_n_qpls(priv, qpls, start_id, i - start_id + 1); - return err; -} - -static int gve_alloc_qpls(struct gve_priv *priv, struct gve_qpls_alloc_cfg *cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) -{ - int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues; - int rx_start_id, tx_num_qpls, rx_num_qpls; - struct gve_queue_page_list *qpls; - u32 page_count; - int err; - - if (cfg->raw_addressing) - return 0; - - qpls = kvcalloc(max_queues, sizeof(*qpls), GFP_KERNEL); - if (!qpls) - return -ENOMEM; - - /* Allocate TX QPLs */ - page_count = priv->tx_pages_per_qpl; - tx_num_qpls = gve_num_tx_qpls(cfg->tx_cfg, cfg->num_xdp_queues, - gve_is_qpl(priv)); - err = gve_alloc_n_qpls(priv, qpls, page_count, 0, tx_num_qpls); - if (err) - goto free_qpl_array; - - /* Allocate RX QPLs */ - rx_start_id = gve_rx_start_qpl_id(cfg->tx_cfg); - /* For GQI_QPL number of pages allocated have 1:1 relationship with - * number of descriptors. For DQO, number of pages required are - * more than descriptors (because of out of order completions). - * Set it to twice the number of descriptors. - */ - if (cfg->is_gqi) - page_count = rx_alloc_cfg->ring_size; - else - page_count = gve_get_rx_pages_per_qpl_dqo(rx_alloc_cfg->ring_size); - rx_num_qpls = gve_num_rx_qpls(cfg->rx_cfg, gve_is_qpl(priv)); - err = gve_alloc_n_qpls(priv, qpls, page_count, rx_start_id, rx_num_qpls); - if (err) - goto free_tx_qpls; - - cfg->qpls = qpls; - return 0; - -free_tx_qpls: - gve_free_n_qpls(priv, qpls, 0, tx_num_qpls); -free_qpl_array: - kvfree(qpls); - return err; -} - -static void gve_free_qpls(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *cfg) -{ - int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues; - struct gve_queue_page_list *qpls = cfg->qpls; - int i; - - if (!qpls) - return; - - for (i = 0; i < max_queues; i++) - gve_free_queue_page_list(priv, &qpls[i], i); - - kvfree(qpls); - cfg->qpls = NULL; +free_qpl: + kvfree(qpl); } /* Use this to schedule a reset when the device is capable of continuing @@ -1299,17 +1216,6 @@ static void gve_drain_page_cache(struct gve_priv *priv) page_frag_cache_drain(&priv->rx[i].page_cache); } -static void gve_qpls_get_curr_alloc_cfg(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *cfg) -{ - cfg->raw_addressing = !gve_is_qpl(priv); - cfg->is_gqi = gve_is_gqi(priv); - cfg->num_xdp_queues = priv->num_xdp_queues; - cfg->tx_cfg = &priv->tx_cfg; - cfg->rx_cfg = &priv->rx_cfg; - cfg->qpls = priv->qpls; -} - static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, struct gve_rx_alloc_rings_cfg *cfg) { @@ -1317,7 +1223,6 @@ static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, cfg->qcfg_tx = &priv->tx_cfg; cfg->raw_addressing = !gve_is_qpl(priv); cfg->enable_header_split = priv->header_split_enabled; - cfg->qpls = priv->qpls; cfg->ring_size = priv->rx_desc_cnt; cfg->packet_buffer_size = gve_is_gqi(priv) ? GVE_DEFAULT_RX_BUFFER_SIZE : @@ -1326,11 +1231,9 @@ static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, } void gve_get_curr_alloc_cfgs(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { - gve_qpls_get_curr_alloc_cfg(priv, qpls_alloc_cfg); gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); } @@ -1362,53 +1265,13 @@ static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) } } -static void gve_queues_mem_free(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) -{ - gve_free_rings(priv, tx_alloc_cfg, rx_alloc_cfg); - gve_free_qpls(priv, qpls_alloc_cfg); -} - -static int gve_queues_mem_alloc(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) -{ - int err; - - err = gve_alloc_qpls(priv, qpls_alloc_cfg, rx_alloc_cfg); - if (err) { - netif_err(priv, drv, priv->dev, "Failed to alloc QPLs\n"); - return err; - } - tx_alloc_cfg->qpls = qpls_alloc_cfg->qpls; - rx_alloc_cfg->qpls = qpls_alloc_cfg->qpls; - err = gve_alloc_rings(priv, tx_alloc_cfg, rx_alloc_cfg); - if (err) { - netif_err(priv, drv, priv->dev, "Failed to alloc rings\n"); - goto free_qpls; - } - - return 0; - -free_qpls: - gve_free_qpls(priv, qpls_alloc_cfg); - return err; -} - static void gve_queues_mem_remove(struct gve_priv *priv) { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); - gve_queues_mem_free(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); - priv->qpls = NULL; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); priv->tx = NULL; priv->rx = NULL; } @@ -1417,7 +1280,6 @@ static void gve_queues_mem_remove(struct gve_priv *priv) * No memory is allocated. Passed-in memory is freed on errors. */ static int gve_queues_start(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { @@ -1425,7 +1287,6 @@ static int gve_queues_start(struct gve_priv *priv, int err; /* Record new resources into priv */ - priv->qpls = qpls_alloc_cfg->qpls; priv->tx = tx_alloc_cfg->tx; priv->rx = rx_alloc_cfg->rx; @@ -1497,23 +1358,19 @@ static int gve_open(struct net_device *dev) { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(dev); int err; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); - err = gve_queues_mem_alloc(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) return err; /* No need to free on error: ownership of resources is lost after * calling gve_queues_start. */ - err = gve_queues_start(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) return err; @@ -1572,11 +1429,8 @@ static int gve_close(struct net_device *dev) static int gve_remove_xdp_queues(struct gve_priv *priv) { - int qpl_start_id; int err; - qpl_start_id = gve_xdp_tx_start_queue_id(priv); - err = gve_destroy_xdp_rings(priv); if (err) return err; @@ -1588,27 +1442,19 @@ static int gve_remove_xdp_queues(struct gve_priv *priv) gve_unreg_xdp_info(priv); gve_free_xdp_rings(priv); - gve_free_n_qpls(priv, priv->qpls, qpl_start_id, gve_num_xdp_qpls(priv)); priv->num_xdp_queues = 0; return 0; } static int gve_add_xdp_queues(struct gve_priv *priv) { - int start_id; int err; priv->num_xdp_queues = priv->rx_cfg.num_queues; - start_id = gve_xdp_tx_start_queue_id(priv); - err = gve_alloc_n_qpls(priv, priv->qpls, priv->tx_pages_per_qpl, - start_id, gve_num_xdp_qpls(priv)); - if (err) - goto err; - err = gve_alloc_xdp_rings(priv); if (err) - goto free_xdp_qpls; + goto err; err = gve_reg_xdp_info(priv, priv->dev); if (err) @@ -1626,8 +1472,6 @@ static int gve_add_xdp_queues(struct gve_priv *priv) free_xdp_rings: gve_free_xdp_rings(priv); -free_xdp_qpls: - gve_free_n_qpls(priv, priv->qpls, start_id, gve_num_xdp_qpls(priv)); err: priv->num_xdp_queues = 0; return err; @@ -1878,15 +1722,13 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) } int gve_adjust_config(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int err; /* Allocate resources for the new confiugration */ - err = gve_queues_mem_alloc(priv, qpls_alloc_cfg, - tx_alloc_cfg, rx_alloc_cfg); + err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); if (err) { netif_err(priv, drv, priv->dev, "Adjust config failed to alloc new queues"); @@ -1898,14 +1740,12 @@ int gve_adjust_config(struct gve_priv *priv, if (err) { netif_err(priv, drv, priv->dev, "Adjust config failed to close old queues"); - gve_queues_mem_free(priv, qpls_alloc_cfg, - tx_alloc_cfg, rx_alloc_cfg); + gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); return err; } /* Bring the device back up again with the new resources. */ - err = gve_queues_start(priv, qpls_alloc_cfg, - tx_alloc_cfg, rx_alloc_cfg); + err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); if (err) { netif_err(priv, drv, priv->dev, "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); @@ -1925,23 +1765,18 @@ int gve_adjust_queues(struct gve_priv *priv, { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; int err; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); /* Relay the new config from ethtool */ - qpls_alloc_cfg.tx_cfg = &new_tx_config; tx_alloc_cfg.qcfg = &new_tx_config; rx_alloc_cfg.qcfg_tx = &new_tx_config; - qpls_alloc_cfg.rx_cfg = &new_rx_config; rx_alloc_cfg.qcfg = &new_rx_config; tx_alloc_cfg.num_rings = new_tx_config.num_queues; if (netif_carrier_ok(priv->dev)) { - err = gve_adjust_config(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); return err; } /* Set the config for the next up. */ @@ -2106,7 +1941,6 @@ int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; bool enable_hdr_split; int err = 0; @@ -2126,15 +1960,13 @@ int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) if (enable_hdr_split == priv->header_split_enabled) return 0; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); rx_alloc_cfg.enable_header_split = enable_hdr_split; rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); if (netif_running(priv->dev)) - err = gve_adjust_config(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); return err; } @@ -2144,18 +1976,15 @@ static int gve_set_features(struct net_device *netdev, const netdev_features_t orig_features = netdev->features; struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(netdev); int err; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; if (netif_carrier_ok(netdev)) { - err = gve_adjust_config(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) { /* Revert the change on error. */ netdev->features = orig_features; diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 79c1d8f63621..70aca2f2c8c3 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -41,7 +41,6 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, for (i = 0; i < slots; i++) page_ref_sub(rx->data.page_info[i].page, rx->data.page_info[i].pagecnt_bias - 1); - rx->data.qpl = NULL; for (i = 0; i < rx->qpl_copy_pool_mask + 1; i++) { page_ref_sub(rx->qpl_copy_pool[i].page, @@ -107,6 +106,7 @@ static void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, u32 slots = rx->mask + 1; int idx = rx->q_num; size_t bytes; + u32 qpl_id; if (rx->desc.desc_ring) { bytes = sizeof(struct gve_rx_desc) * cfg->ring_size; @@ -132,6 +132,12 @@ static void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, kvfree(rx->qpl_copy_pool); rx->qpl_copy_pool = NULL; + if (rx->data.qpl) { + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, idx); + gve_free_queue_page_list(priv, rx->data.qpl, qpl_id); + rx->data.qpl = NULL; + } + netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); } @@ -188,12 +194,6 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx, if (!rx->data.page_info) return -ENOMEM; - if (!rx->data.raw_addressing) { - u32 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); - - rx->data.qpl = &cfg->qpls[qpl_id]; - } - for (i = 0; i < slots; i++) { if (!rx->data.raw_addressing) { struct page *page = rx->data.qpl->pages[i]; @@ -246,8 +246,6 @@ alloc_err_qpl: page_ref_sub(rx->data.page_info[i].page, rx->data.page_info[i].pagecnt_bias - 1); - rx->data.qpl = NULL; - return err; alloc_err_rda: @@ -274,6 +272,8 @@ static int gve_rx_alloc_ring_gqi(struct gve_priv *priv, struct device *hdev = &priv->pdev->dev; u32 slots = cfg->ring_size; int filled_pages; + int qpl_page_cnt; + u32 qpl_id = 0; size_t bytes; int err; @@ -306,10 +306,22 @@ static int gve_rx_alloc_ring_gqi(struct gve_priv *priv, goto abort_with_slots; } + if (!rx->data.raw_addressing) { + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + qpl_page_cnt = cfg->ring_size; + + rx->data.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); + if (!rx->data.qpl) { + err = -ENOMEM; + goto abort_with_copy_pool; + } + } + filled_pages = gve_rx_prefill_pages(rx, cfg); if (filled_pages < 0) { err = -ENOMEM; - goto abort_with_copy_pool; + goto abort_with_qpl; } rx->fill_cnt = filled_pages; /* Ensure data ring slots (packet buffers) are visible. */ @@ -350,6 +362,11 @@ abort_with_q_resources: rx->q_resources = NULL; abort_filled: gve_rx_unfill_pages(priv, rx, cfg); +abort_with_qpl: + if (!rx->data.raw_addressing) { + gve_free_queue_page_list(priv, rx->data.qpl, qpl_id); + rx->data.qpl = NULL; + } abort_with_copy_pool: kvfree(rx->qpl_copy_pool); rx->qpl_copy_pool = NULL; @@ -368,12 +385,6 @@ int gve_rx_alloc_rings_gqi(struct gve_priv *priv, int err = 0; int i, j; - if (!cfg->raw_addressing && !cfg->qpls) { - netif_err(priv, drv, priv->dev, - "Cannot alloc QPL ring before allocing QPLs\n"); - return -EINVAL; - } - rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring), GFP_KERNEL); if (!rx) diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 7c2980c212f4..4ea8ecc3b2d5 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -307,6 +307,7 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, size_t buffer_queue_slots; int idx = rx->q_num; size_t size; + u32 qpl_id; int i; completion_queue_slots = rx->dqo.complq.mask + 1; @@ -325,7 +326,11 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, gve_free_page_dqo(priv, bs, !rx->dqo.qpl); } - rx->dqo.qpl = NULL; + if (rx->dqo.qpl) { + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id); + rx->dqo.qpl = NULL; + } if (rx->dqo.bufq.desc_ring) { size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots; @@ -377,7 +382,9 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) { struct device *hdev = &priv->pdev->dev; + int qpl_page_cnt; size_t size; + u32 qpl_id; const u32 buffer_queue_slots = cfg->ring_size; const u32 completion_queue_slots = cfg->ring_size; @@ -418,9 +425,13 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, goto err; if (!cfg->raw_addressing) { - u32 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); - rx->dqo.qpl = &cfg->qpls[qpl_id]; + rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); + if (!rx->dqo.qpl) + goto err; rx->dqo.next_qpl_page_idx = 0; } @@ -454,12 +465,6 @@ int gve_rx_alloc_rings_dqo(struct gve_priv *priv, int err; int i; - if (!cfg->raw_addressing && !cfg->qpls) { - netif_err(priv, drv, priv->dev, - "Cannot alloc QPL ring before allocing QPLs\n"); - return -EINVAL; - } - rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring), GFP_KERNEL); if (!rx) diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index f805700d67e7..24a64ec1073e 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -216,6 +216,7 @@ static void gve_tx_free_ring_gqi(struct gve_priv *priv, struct gve_tx_ring *tx, struct device *hdev = &priv->pdev->dev; int idx = tx->q_num; size_t bytes; + u32 qpl_id; u32 slots; slots = tx->mask + 1; @@ -223,8 +224,12 @@ static void gve_tx_free_ring_gqi(struct gve_priv *priv, struct gve_tx_ring *tx, tx->q_resources, tx->q_resources_bus); tx->q_resources = NULL; - if (!tx->raw_addressing) { - gve_tx_fifo_release(priv, &tx->tx_fifo); + if (tx->tx_fifo.qpl) { + if (tx->tx_fifo.base) + gve_tx_fifo_release(priv, &tx->tx_fifo); + + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + gve_free_queue_page_list(priv, tx->tx_fifo.qpl, qpl_id); tx->tx_fifo.qpl = NULL; } @@ -255,6 +260,8 @@ static int gve_tx_alloc_ring_gqi(struct gve_priv *priv, int idx) { struct device *hdev = &priv->pdev->dev; + int qpl_page_cnt; + u32 qpl_id = 0; size_t bytes; /* Make sure everything is zeroed to start */ @@ -279,12 +286,17 @@ static int gve_tx_alloc_ring_gqi(struct gve_priv *priv, tx->raw_addressing = cfg->raw_addressing; tx->dev = hdev; if (!tx->raw_addressing) { - u32 qpl_id = gve_tx_qpl_id(priv, tx->q_num); + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + qpl_page_cnt = priv->tx_pages_per_qpl; + + tx->tx_fifo.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); + if (!tx->tx_fifo.qpl) + goto abort_with_desc; - tx->tx_fifo.qpl = &cfg->qpls[qpl_id]; /* map Tx FIFO */ if (gve_tx_fifo_init(priv, &tx->tx_fifo)) - goto abort_with_desc; + goto abort_with_qpl; } tx->q_resources = @@ -300,6 +312,11 @@ static int gve_tx_alloc_ring_gqi(struct gve_priv *priv, abort_with_fifo: if (!tx->raw_addressing) gve_tx_fifo_release(priv, &tx->tx_fifo); +abort_with_qpl: + if (!tx->raw_addressing) { + gve_free_queue_page_list(priv, tx->tx_fifo.qpl, qpl_id); + tx->tx_fifo.qpl = NULL; + } abort_with_desc: dma_free_coherent(hdev, bytes, tx->desc, tx->bus); tx->desc = NULL; @@ -316,12 +333,6 @@ int gve_tx_alloc_rings_gqi(struct gve_priv *priv, int err = 0; int i, j; - if (!cfg->raw_addressing && !cfg->qpls) { - netif_err(priv, drv, priv->dev, - "Cannot alloc QPL ring before allocing QPLs\n"); - return -EINVAL; - } - if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) { netif_err(priv, drv, priv->dev, "Cannot alloc more than the max num of Tx rings\n"); diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 3d825e406c4b..fe1b26a4d736 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -209,6 +209,7 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, struct device *hdev = &priv->pdev->dev; int idx = tx->q_num; size_t bytes; + u32 qpl_id; if (tx->q_resources) { dma_free_coherent(hdev, sizeof(*tx->q_resources), @@ -236,7 +237,11 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, kvfree(tx->dqo.tx_qpl_buf_next); tx->dqo.tx_qpl_buf_next = NULL; - tx->dqo.qpl = NULL; + if (tx->dqo.qpl) { + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + gve_free_queue_page_list(priv, tx->dqo.qpl, qpl_id); + tx->dqo.qpl = NULL; + } netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx); } @@ -282,7 +287,9 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, { struct device *hdev = &priv->pdev->dev; int num_pending_packets; + int qpl_page_cnt; size_t bytes; + u32 qpl_id; int i; memset(tx, 0, sizeof(*tx)); @@ -349,9 +356,13 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, goto err; if (!cfg->raw_addressing) { - u32 qpl_id = gve_tx_qpl_id(priv, tx->q_num); + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + qpl_page_cnt = priv->tx_pages_per_qpl; - tx->dqo.qpl = &cfg->qpls[qpl_id]; + tx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); + if (!tx->dqo.qpl) + goto err; if (gve_tx_qpl_buf_init(tx)) goto err; @@ -371,12 +382,6 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv, int err = 0; int i, j; - if (!cfg->raw_addressing && !cfg->qpls) { - netif_err(priv, drv, priv->dev, - "Cannot alloc QPL ring before allocing QPLs\n"); - return -EINVAL; - } - if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) { netif_err(priv, drv, priv->dev, "Cannot alloc more than the max num of Tx rings\n"); -- cgit v1.2.3 From b1de3c0df7abc41dc41862c0b08386411f2799d7 Mon Sep 17 00:00:00 2001 From: Rengarajan S Date: Thu, 2 May 2024 10:33:00 +0530 Subject: net: microchip: lan743x: Reduce PTP timeout on HW failure The PTP_CMD_CTL is a self clearing register which controls the PTP clock values. In the current implementation driver waits for a duration of 20 sec in case of HW failure to clear the PTP_CMD_CTL register bit. This timeout of 20 sec is very long to recognize a HW failure, as it is typically cleared in one clock(<16ns). Hence reducing the timeout to 1 sec would be sufficient to conclude if there is any HW failure observed. The usleep_range will sleep somewhere between 1 msec to 20 msec for each iteration. By setting the PTP_CMD_CTL_TIMEOUT_CNT to 50 the max timeout is extended to 1 sec. Signed-off-by: Rengarajan S Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240502050300.38689-1-rengarajan.s@microchip.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/lan743x_ptp.c | 2 +- drivers/net/ethernet/microchip/lan743x_ptp.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c index 80d9680b3830..dcea6652d56d 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.c +++ b/drivers/net/ethernet/microchip/lan743x_ptp.c @@ -58,7 +58,7 @@ int lan743x_gpio_init(struct lan743x_adapter *adapter) static void lan743x_ptp_wait_till_cmd_done(struct lan743x_adapter *adapter, u32 bit_mask) { - int timeout = 1000; + int timeout = PTP_CMD_CTL_TIMEOUT_CNT; u32 data = 0; while (timeout && diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.h b/drivers/net/ethernet/microchip/lan743x_ptp.h index e26d4eff7133..0d29914cd460 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.h +++ b/drivers/net/ethernet/microchip/lan743x_ptp.h @@ -21,6 +21,7 @@ #define LAN743X_PTP_N_EXTTS 4 #define LAN743X_PTP_N_PPS 0 #define PCI11X1X_PTP_IO_MAX_CHANNELS 8 +#define PTP_CMD_CTL_TIMEOUT_CNT 50 struct lan743x_adapter; -- cgit v1.2.3 From 4fd104018cb87188143f39e8bc0bf0a9fd32e53e Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Thu, 28 Mar 2024 21:07:07 -0400 Subject: ice: add additional E830 device ids Add support for additional E830 device ids which are supported by the driver: - 0x12D5: Intel(R) Ethernet Controller E830-C for backplane - 0x12D8: Intel(R) Ethernet Controller E830-C for QSFP - 0x12DA: Intel(R) Ethernet Controller E830-C for SFP - 0x12DC: Intel(R) Ethernet Controller E830-XXV for backplane - 0x12DD: Intel(R) Ethernet Controller E830-XXV for QSFP - 0x12DE: Intel(R) Ethernet Controller E830-XXV for SFP Reviewed-by: Wojciech Drewek Signed-off-by: Paul Greenwalt Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 6 ++++++ drivers/net/ethernet/intel/ice/ice_devids.h | 12 ++++++++++++ drivers/net/ethernet/intel/ice/ice_main.c | 6 ++++++ 3 files changed, 24 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index f4bc8723ffa9..847d22f9fc6c 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -164,6 +164,12 @@ static int ice_set_mac_type(struct ice_hw *hw) case ICE_DEV_ID_E830_QSFP56: case ICE_DEV_ID_E830_SFP: case ICE_DEV_ID_E830_SFP_DD: + case ICE_DEV_ID_E830C_BACKPLANE: + case ICE_DEV_ID_E830_XXV_BACKPLANE: + case ICE_DEV_ID_E830C_QSFP: + case ICE_DEV_ID_E830_XXV_QSFP: + case ICE_DEV_ID_E830C_SFP: + case ICE_DEV_ID_E830_XXV_SFP: hw->mac_type = ICE_MAC_E830; break; default: diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index 9dfae9bce758..c37b2b450b02 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -24,6 +24,18 @@ #define ICE_DEV_ID_E830_SFP 0x12D3 /* Intel(R) Ethernet Controller E830-C for SFP-DD */ #define ICE_DEV_ID_E830_SFP_DD 0x12D4 +/* Intel(R) Ethernet Controller E830-C for backplane */ +#define ICE_DEV_ID_E830C_BACKPLANE 0x12D5 +/* Intel(R) Ethernet Controller E830-C for QSFP */ +#define ICE_DEV_ID_E830C_QSFP 0x12D8 +/* Intel(R) Ethernet Controller E830-C for SFP */ +#define ICE_DEV_ID_E830C_SFP 0x12DA +/* Intel(R) Ethernet Controller E830-XXV for backplane */ +#define ICE_DEV_ID_E830_XXV_BACKPLANE 0x12DC +/* Intel(R) Ethernet Controller E830-XXV for QSFP */ +#define ICE_DEV_ID_E830_XXV_QSFP 0x12DD +/* Intel(R) Ethernet Controller E830-XXV for SFP */ +#define ICE_DEV_ID_E830_XXV_SFP 0x12DE /* Intel(R) Ethernet Controller E810-C for backplane */ #define ICE_DEV_ID_E810C_BACKPLANE 0x1591 /* Intel(R) Ethernet Controller E810-C for QSFP */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 06549dae4cca..4968331cb1d5 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5809,6 +5809,12 @@ static const struct pci_device_id ice_pci_tbl[] = { { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_QSFP56) }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_SFP) }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_SFP_DD) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830C_BACKPLANE), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_XXV_BACKPLANE), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830C_QSFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_XXV_QSFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830C_SFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_XXV_SFP), }, /* required last entry */ {} }; -- cgit v1.2.3 From a8e682f03748f72e82e89f178c1838305e789bb2 Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Thu, 28 Mar 2024 21:07:08 -0400 Subject: ice: update E830 device ids and comments Update existing E830 device ids and comments to align with new naming 'C' for 100G and 'CC' for 200G. Reviewed-by: Wojciech Drewek Signed-off-by: Paul Greenwalt Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 8 ++++---- drivers/net/ethernet/intel/ice/ice_devids.h | 16 ++++++++-------- drivers/net/ethernet/intel/ice/ice_main.c | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 847d22f9fc6c..5649b257e631 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -160,10 +160,10 @@ static int ice_set_mac_type(struct ice_hw *hw) case ICE_DEV_ID_E825C_SGMII: hw->mac_type = ICE_MAC_GENERIC_3K_E825; break; - case ICE_DEV_ID_E830_BACKPLANE: - case ICE_DEV_ID_E830_QSFP56: - case ICE_DEV_ID_E830_SFP: - case ICE_DEV_ID_E830_SFP_DD: + case ICE_DEV_ID_E830CC_BACKPLANE: + case ICE_DEV_ID_E830CC_QSFP56: + case ICE_DEV_ID_E830CC_SFP: + case ICE_DEV_ID_E830CC_SFP_DD: case ICE_DEV_ID_E830C_BACKPLANE: case ICE_DEV_ID_E830_XXV_BACKPLANE: case ICE_DEV_ID_E830C_QSFP: diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index c37b2b450b02..34fd604132f5 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -16,14 +16,14 @@ #define ICE_DEV_ID_E823L_1GBE 0x124F /* Intel(R) Ethernet Connection E823-L for QSFP */ #define ICE_DEV_ID_E823L_QSFP 0x151D -/* Intel(R) Ethernet Controller E830-C for backplane */ -#define ICE_DEV_ID_E830_BACKPLANE 0x12D1 -/* Intel(R) Ethernet Controller E830-C for QSFP */ -#define ICE_DEV_ID_E830_QSFP56 0x12D2 -/* Intel(R) Ethernet Controller E830-C for SFP */ -#define ICE_DEV_ID_E830_SFP 0x12D3 -/* Intel(R) Ethernet Controller E830-C for SFP-DD */ -#define ICE_DEV_ID_E830_SFP_DD 0x12D4 +/* Intel(R) Ethernet Controller E830-CC for backplane */ +#define ICE_DEV_ID_E830CC_BACKPLANE 0x12D1 +/* Intel(R) Ethernet Controller E830-CC for QSFP */ +#define ICE_DEV_ID_E830CC_QSFP56 0x12D2 +/* Intel(R) Ethernet Controller E830-CC for SFP */ +#define ICE_DEV_ID_E830CC_SFP 0x12D3 +/* Intel(R) Ethernet Controller E830-CC for SFP-DD */ +#define ICE_DEV_ID_E830CC_SFP_DD 0x12D4 /* Intel(R) Ethernet Controller E830-C for backplane */ #define ICE_DEV_ID_E830C_BACKPLANE 0x12D5 /* Intel(R) Ethernet Controller E830-C for QSFP */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4968331cb1d5..8513deb266eb 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5805,10 +5805,10 @@ static const struct pci_device_id ice_pci_tbl[] = { { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_QSFP), }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_SFP), }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_SGMII), }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_BACKPLANE) }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_QSFP56) }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_SFP) }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_SFP_DD) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830CC_BACKPLANE) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830CC_QSFP56) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830CC_SFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830CC_SFP_DD) }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830C_BACKPLANE), }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_XXV_BACKPLANE), }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830C_QSFP), }, -- cgit v1.2.3 From c5e6bd977d7eb840d08b9f071b7dad6721c409a0 Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Mon, 15 Apr 2024 10:49:07 +0200 Subject: ice: Deduplicate tc action setup ice_tc_setup_redirect_action() and ice_tc_setup_mirror_action() are almost identical, except for setting filter action. Reduce them to one function with an extra param, which handles both cases. Reviewed-by: Mateusz Polchlopek Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 56 ++++++++--------------------- 1 file changed, 15 insertions(+), 41 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 4d8f808f4898..8553c56dc95d 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -669,13 +669,19 @@ static bool ice_tc_is_dev_uplink(struct net_device *dev) return netif_is_ice(dev) || ice_is_tunnel_supported(dev); } -static int ice_tc_setup_redirect_action(struct net_device *filter_dev, - struct ice_tc_flower_fltr *fltr, - struct net_device *target_dev) +static int ice_tc_setup_action(struct net_device *filter_dev, + struct ice_tc_flower_fltr *fltr, + struct net_device *target_dev, + enum ice_sw_fwd_act_type action) { struct ice_repr *repr; - fltr->action.fltr_act = ICE_FWD_TO_VSI; + if (action != ICE_FWD_TO_VSI && action != ICE_MIRROR_PACKET) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action to setup provided"); + return -EINVAL; + } + + fltr->action.fltr_act = action; if (ice_is_port_repr_netdev(filter_dev) && ice_is_port_repr_netdev(target_dev)) { @@ -723,41 +729,6 @@ ice_tc_setup_drop_action(struct net_device *filter_dev, return 0; } -static int ice_tc_setup_mirror_action(struct net_device *filter_dev, - struct ice_tc_flower_fltr *fltr, - struct net_device *target_dev) -{ - struct ice_repr *repr; - - fltr->action.fltr_act = ICE_MIRROR_PACKET; - - if (ice_is_port_repr_netdev(filter_dev) && - ice_is_port_repr_netdev(target_dev)) { - repr = ice_netdev_to_repr(target_dev); - - fltr->dest_vsi = repr->src_vsi; - fltr->direction = ICE_ESWITCH_FLTR_EGRESS; - } else if (ice_is_port_repr_netdev(filter_dev) && - ice_tc_is_dev_uplink(target_dev)) { - repr = ice_netdev_to_repr(filter_dev); - - fltr->dest_vsi = repr->src_vsi->back->eswitch.uplink_vsi; - fltr->direction = ICE_ESWITCH_FLTR_EGRESS; - } else if (ice_tc_is_dev_uplink(filter_dev) && - ice_is_port_repr_netdev(target_dev)) { - repr = ice_netdev_to_repr(target_dev); - - fltr->dest_vsi = repr->src_vsi; - fltr->direction = ICE_ESWITCH_FLTR_INGRESS; - } else { - NL_SET_ERR_MSG_MOD(fltr->extack, - "Unsupported netdevice in switchdev mode"); - return -EINVAL; - } - - return 0; -} - static int ice_eswitch_tc_parse_action(struct net_device *filter_dev, struct ice_tc_flower_fltr *fltr, struct flow_action_entry *act) @@ -773,16 +744,19 @@ static int ice_eswitch_tc_parse_action(struct net_device *filter_dev, break; case FLOW_ACTION_REDIRECT: - err = ice_tc_setup_redirect_action(filter_dev, fltr, act->dev); + err = ice_tc_setup_action(filter_dev, fltr, + act->dev, ICE_FWD_TO_VSI); if (err) return err; break; case FLOW_ACTION_MIRRED: - err = ice_tc_setup_mirror_action(filter_dev, fltr, act->dev); + err = ice_tc_setup_action(filter_dev, fltr, + act->dev, ICE_MIRROR_PACKET); if (err) return err; + break; default: -- cgit v1.2.3 From deea427ffc0b3937c7d0bbca7f7c71711a5651d1 Mon Sep 17 00:00:00 2001 From: Mateusz Polchlopek Date: Fri, 19 Apr 2024 05:11:04 -0400 Subject: ice: refactor struct ice_vsi_cfg_params to be inside of struct ice_vsi Refactor struct ice_vsi_cfg_params to be embedded into struct ice_vsi. Prior to that the members of the struct were scattered around ice_vsi, and were copy-pasted for purposes of reinit. Now we have struct handy, and it is easier to have something sticky in the flags field. Suggested-by: Przemek Kitszel Reviewed-by: Przemek Kitszel Reviewed-by: Vaishnavi Tipireddy Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/devlink/devlink.c | 6 ++-- drivers/net/ethernet/intel/ice/ice.h | 14 +++++---- drivers/net/ethernet/intel/ice/ice_lib.c | 33 +++++++------------- drivers/net/ethernet/intel/ice/ice_lib.h | 39 +----------------------- drivers/net/ethernet/intel/ice/ice_main.c | 8 ++--- drivers/net/ethernet/intel/ice/ice_sriov.c | 2 +- drivers/net/ethernet/intel/ice/ice_vf_lib.c | 8 ++--- 7 files changed, 30 insertions(+), 80 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index d191c5709899..c4b69655cdf5 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -1193,18 +1193,16 @@ static int ice_devlink_set_parent(struct devlink_rate *devlink_rate, static int ice_devlink_reinit_up(struct ice_pf *pf) { struct ice_vsi *vsi = ice_get_main_vsi(pf); - struct ice_vsi_cfg_params params; int err; err = ice_init_dev(pf); if (err) return err; - params = ice_vsi_to_params(vsi); - params.flags = ICE_VSI_FLAG_INIT; + vsi->flags = ICE_VSI_FLAG_INIT; rtnl_lock(); - err = ice_vsi_cfg(vsi, ¶ms); + err = ice_vsi_cfg(vsi); rtnl_unlock(); if (err) goto err_vsi_cfg; diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 67a3236ab1fc..6ad8002b22e1 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -331,7 +331,6 @@ struct ice_vsi { struct net_device *netdev; struct ice_sw *vsw; /* switch this VSI is on */ struct ice_pf *back; /* back pointer to PF */ - struct ice_port_info *port_info; /* back pointer to port_info */ struct ice_rx_ring **rx_rings; /* Rx ring array */ struct ice_tx_ring **tx_rings; /* Tx ring array */ struct ice_q_vector **q_vectors; /* q_vector array */ @@ -349,12 +348,9 @@ struct ice_vsi { /* tell if only dynamic irq allocation is allowed */ bool irq_dyn_alloc; - enum ice_vsi_type type; u16 vsi_num; /* HW (absolute) index of this VSI */ u16 idx; /* software index in pf->vsi[] */ - struct ice_vf *vf; /* VF associated with this VSI */ - u16 num_gfltr; u16 num_bfltr; @@ -446,12 +442,18 @@ struct ice_vsi { u8 old_numtc; u16 old_ena_tc; - struct ice_channel *ch; - /* setup back reference, to which aggregator node this VSI * corresponds to */ struct ice_agg_node *agg_node; + + struct_group_tagged(ice_vsi_cfg_params, params, + struct ice_port_info *port_info; /* back pointer to port_info */ + struct ice_channel *ch; /* VSI's channel structure, may be NULL */ + struct ice_vf *vf; /* VF associated with this VSI, may be NULL */ + u32 flags; /* VSI flags used for rebuild and configuration */ + enum ice_vsi_type type; /* the type of the VSI */ + ); } ____cacheline_internodealigned_in_smp; /* struct that defines an interrupt vector */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index d06e7c82c433..5371e91f6bbb 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2227,10 +2227,8 @@ static int ice_vsi_cfg_tc_lan(struct ice_pf *pf, struct ice_vsi *vsi) /** * ice_vsi_cfg_def - configure default VSI based on the type * @vsi: pointer to VSI - * @params: the parameters to configure this VSI with */ -static int -ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) +static int ice_vsi_cfg_def(struct ice_vsi *vsi) { struct device *dev = ice_pf_to_dev(vsi->back); struct ice_pf *pf = vsi->back; @@ -2238,7 +2236,7 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) vsi->vsw = pf->first_sw; - ret = ice_vsi_alloc_def(vsi, params->ch); + ret = ice_vsi_alloc_def(vsi, vsi->ch); if (ret) return ret; @@ -2263,7 +2261,7 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) ice_vsi_set_tc_cfg(vsi); /* create the VSI */ - ret = ice_vsi_init(vsi, params->flags); + ret = ice_vsi_init(vsi, vsi->flags); if (ret) goto unroll_get_qs; @@ -2383,23 +2381,16 @@ unroll_vsi_alloc: /** * ice_vsi_cfg - configure a previously allocated VSI * @vsi: pointer to VSI - * @params: parameters used to configure this VSI */ -int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) +int ice_vsi_cfg(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; int ret; - if (WARN_ON(params->type == ICE_VSI_VF && !params->vf)) + if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf)) return -EINVAL; - vsi->type = params->type; - vsi->port_info = params->pi; - - /* For VSIs which don't have a connected VF, this will be NULL */ - vsi->vf = params->vf; - - ret = ice_vsi_cfg_def(vsi, params); + ret = ice_vsi_cfg_def(vsi); if (ret) return ret; @@ -2485,7 +2476,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params) * a port_info structure for it. */ if (WARN_ON(!(params->flags & ICE_VSI_FLAG_INIT)) || - WARN_ON(!params->pi)) + WARN_ON(!params->port_info)) return NULL; vsi = ice_vsi_alloc(pf); @@ -2494,7 +2485,8 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params) return NULL; } - ret = ice_vsi_cfg(vsi, params); + vsi->params = *params; + ret = ice_vsi_cfg(vsi); if (ret) goto err_vsi_cfg; @@ -3041,7 +3033,6 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi) */ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) { - struct ice_vsi_cfg_params params = {}; struct ice_coalesce_stored *coalesce; int prev_num_q_vectors; struct ice_pf *pf; @@ -3050,9 +3041,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) if (!vsi) return -EINVAL; - params = ice_vsi_to_params(vsi); - params.flags = vsi_flags; - + vsi->flags = vsi_flags; pf = vsi->back; if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf)) return -EINVAL; @@ -3062,7 +3051,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) goto err_vsi_cfg; ice_vsi_decfg(vsi); - ret = ice_vsi_cfg_def(vsi, ¶ms); + ret = ice_vsi_cfg_def(vsi); if (ret) goto err_vsi_cfg; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 9cd23afe5f15..94ce8964dda6 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -11,43 +11,6 @@ #define ICE_VSI_FLAG_INIT BIT(0) #define ICE_VSI_FLAG_NO_INIT 0 -/** - * struct ice_vsi_cfg_params - VSI configuration parameters - * @pi: pointer to the port_info instance for the VSI - * @ch: pointer to the channel structure for the VSI, may be NULL - * @vf: pointer to the VF associated with this VSI, may be NULL - * @type: the type of VSI to configure - * @flags: VSI flags used for rebuild and configuration - * - * Parameter structure used when configuring a new VSI. - */ -struct ice_vsi_cfg_params { - struct ice_port_info *pi; - struct ice_channel *ch; - struct ice_vf *vf; - enum ice_vsi_type type; - u32 flags; -}; - -/** - * ice_vsi_to_params - Get parameters for an existing VSI - * @vsi: the VSI to get parameters for - * - * Fill a parameter structure for reconfiguring a VSI with its current - * parameters, such as during a rebuild operation. - */ -static inline struct ice_vsi_cfg_params ice_vsi_to_params(struct ice_vsi *vsi) -{ - struct ice_vsi_cfg_params params = {}; - - params.pi = vsi->port_info; - params.ch = vsi->ch; - params.vf = vsi->vf; - params.type = vsi->type; - - return params; -} - const char *ice_vsi_type_str(enum ice_vsi_type vsi_type); bool ice_pf_state_is_nominal(struct ice_pf *pf); @@ -101,7 +64,7 @@ void ice_vsi_decfg(struct ice_vsi *vsi); void ice_dis_vsi(struct ice_vsi *vsi, bool locked); int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags); -int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params); +int ice_vsi_cfg(struct ice_vsi *vsi); bool ice_is_reset_in_progress(unsigned long *state); int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 8513deb266eb..01c32a785ea4 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3685,7 +3685,7 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi_cfg_params params = {}; params.type = ICE_VSI_PF; - params.pi = pi; + params.port_info = pi; params.flags = ICE_VSI_FLAG_INIT; return ice_vsi_setup(pf, ¶ms); @@ -3698,7 +3698,7 @@ ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, struct ice_vsi_cfg_params params = {}; params.type = ICE_VSI_CHNL; - params.pi = pi; + params.port_info = pi; params.ch = ch; params.flags = ICE_VSI_FLAG_INIT; @@ -3719,7 +3719,7 @@ ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi_cfg_params params = {}; params.type = ICE_VSI_CTRL; - params.pi = pi; + params.port_info = pi; params.flags = ICE_VSI_FLAG_INIT; return ice_vsi_setup(pf, ¶ms); @@ -3739,7 +3739,7 @@ ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi_cfg_params params = {}; params.type = ICE_VSI_LB; - params.pi = pi; + params.port_info = pi; params.flags = ICE_VSI_FLAG_INIT; return ice_vsi_setup(pf, ¶ms); diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index a60dacf8942a..067712f4923f 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -225,7 +225,7 @@ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf) struct ice_vsi *vsi; params.type = ICE_VSI_VF; - params.pi = ice_vf_get_port_info(vf); + params.port_info = ice_vf_get_port_info(vf); params.vf = vf; params.flags = ICE_VSI_FLAG_INIT; diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index dab25d333bd1..48a8d462d76a 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -259,20 +259,18 @@ static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf) int ice_vf_reconfig_vsi(struct ice_vf *vf) { struct ice_vsi *vsi = ice_get_vf_vsi(vf); - struct ice_vsi_cfg_params params = {}; struct ice_pf *pf = vf->pf; int err; if (WARN_ON(!vsi)) return -EINVAL; - params = ice_vsi_to_params(vsi); - params.flags = ICE_VSI_FLAG_NO_INIT; + vsi->flags = ICE_VSI_FLAG_NO_INIT; ice_vsi_decfg(vsi); ice_fltr_remove_all(vsi); - err = ice_vsi_cfg(vsi, ¶ms); + err = ice_vsi_cfg(vsi); if (err) { dev_err(ice_pf_to_dev(pf), "Failed to reconfigure the VF%u's VSI, error %d\n", @@ -1243,7 +1241,7 @@ struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) struct ice_vsi *vsi; params.type = ICE_VSI_CTRL; - params.pi = ice_vf_get_port_info(vf); + params.port_info = ice_vf_get_port_info(vf); params.vf = vf; params.flags = ICE_VSI_FLAG_INIT; -- cgit v1.2.3 From c93462b914dbf46b0c0256f7784cc79f7c368e45 Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Wed, 1 May 2024 23:25:49 +0000 Subject: gve: Implement queue api The new netdev queue api is implemented for gve. Tested-by: Mina Almasry Reviewed-by: Mina Almasry Reviewed-by: Praveen Kaligineedi Reviewed-by: Harshitha Ramamurthy Signed-off-by: Shailend Chand Link: https://lore.kernel.org/all/20240501232549.1327174-11-shailend@google.com/ Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve.h | 6 + drivers/net/ethernet/google/gve/gve_dqo.h | 6 + drivers/net/ethernet/google/gve/gve_main.c | 177 +++++++++++++++++++++++++-- drivers/net/ethernet/google/gve/gve_rx.c | 12 +- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 12 +- 5 files changed, 189 insertions(+), 24 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 9e0a433c991c..ae1e21c9b0a5 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -1096,6 +1096,12 @@ bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx); void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx); int gve_rx_poll(struct gve_notify_block *block, int budget); bool gve_rx_work_pending(struct gve_rx_ring *rx); +int gve_rx_alloc_ring_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx); +void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg); int gve_rx_alloc_rings(struct gve_priv *priv); int gve_rx_alloc_rings_gqi(struct gve_priv *priv, struct gve_rx_alloc_rings_cfg *cfg); diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h index b81584829c40..e83773fb891f 100644 --- a/drivers/net/ethernet/google/gve/gve_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_dqo.h @@ -44,6 +44,12 @@ void gve_tx_free_rings_dqo(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *cfg); void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx); void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx); +int gve_rx_alloc_ring_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx); +void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg); int gve_rx_alloc_rings_dqo(struct gve_priv *priv, struct gve_rx_alloc_rings_cfg *cfg); void gve_rx_free_rings_dqo(struct gve_priv *priv, diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index e22ac764ec4f..cabf7d4bcecb 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include "gve.h" @@ -1238,16 +1239,28 @@ void gve_get_curr_alloc_cfgs(struct gve_priv *priv, gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); } +static void gve_rx_start_ring(struct gve_priv *priv, int i) +{ + if (gve_is_gqi(priv)) + gve_rx_start_ring_gqi(priv, i); + else + gve_rx_start_ring_dqo(priv, i); +} + static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) { int i; - for (i = 0; i < num_rings; i++) { - if (gve_is_gqi(priv)) - gve_rx_start_ring_gqi(priv, i); - else - gve_rx_start_ring_dqo(priv, i); - } + for (i = 0; i < num_rings; i++) + gve_rx_start_ring(priv, i); +} + +static void gve_rx_stop_ring(struct gve_priv *priv, int i) +{ + if (gve_is_gqi(priv)) + gve_rx_stop_ring_gqi(priv, i); + else + gve_rx_stop_ring_dqo(priv, i); } static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) @@ -1257,12 +1270,8 @@ static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) if (!priv->rx) return; - for (i = 0; i < num_rings; i++) { - if (gve_is_gqi(priv)) - gve_rx_stop_ring_gqi(priv, i); - else - gve_rx_stop_ring_dqo(priv, i); - } + for (i = 0; i < num_rings; i++) + gve_rx_stop_ring(priv, i); } static void gve_queues_mem_remove(struct gve_priv *priv) @@ -1877,6 +1886,15 @@ static void gve_turnup(struct gve_priv *priv) gve_set_napi_enabled(priv); } +static void gve_turnup_and_check_status(struct gve_priv *priv) +{ + u32 status; + + gve_turnup(priv); + status = ioread32be(&priv->reg_bar0->device_status); + gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); +} + static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gve_notify_block *block; @@ -2323,6 +2341,140 @@ static void gve_write_version(u8 __iomem *driver_version_register) writeb('\n', driver_version_register); } +static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + /* Destroying queue 0 while other queues exist is not supported in DQO */ + if (!gve_is_gqi(priv) && idx == 0) + return -ERANGE; + + /* Single-queue destruction requires quiescence on all queues */ + gve_turndown(priv); + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_destroy_single_rx_queue(priv, idx); + if (err) + return err; + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + return err; + } + + gve_rx_stop_ring(priv, idx); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + *gve_per_q_mem = priv->rx[idx]; + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return 0; +} + +static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); + else + gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); +} + +static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, + int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); + else + err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); + + return err; +} + +static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + priv->rx[idx] = *gve_per_q_mem; + + /* Single-queue creation requires quiescence on all queues */ + gve_turndown(priv); + + gve_rx_start_ring(priv, idx); + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + goto abort; + } + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_create_single_rx_queue(priv, idx); + if (err) + goto abort; + + if (gve_is_gqi(priv)) + gve_rx_write_doorbell(priv, &priv->rx[idx]); + else + gve_rx_post_buffers_dqo(&priv->rx[idx]); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + return 0; + +abort: + gve_rx_stop_ring(priv, idx); + + /* All failures in this func result in a reset, by clearing the struct + * at idx, we prevent a double free when that reset runs. The reset, + * which needs the rtnl lock, will not run till this func returns and + * its caller gives up the lock. + */ + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return err; +} + +static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct gve_rx_ring), + .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, + .ndo_queue_mem_free = gve_rx_queue_mem_free, + .ndo_queue_start = gve_rx_queue_start, + .ndo_queue_stop = gve_rx_queue_stop, +}; + static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int max_tx_queues, max_rx_queues; @@ -2377,6 +2529,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); dev->ethtool_ops = &gve_ethtool_ops; dev->netdev_ops = &gve_netdev_ops; + dev->queue_mgmt_ops = &gve_queue_mgmt_ops; /* Set default and supported features. * diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 70aca2f2c8c3..acb73d4d0de6 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -99,8 +99,8 @@ void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx) gve_rx_reset_ring_gqi(priv, idx); } -static void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, - struct gve_rx_alloc_rings_cfg *cfg) +void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg) { struct device *dev = &priv->pdev->dev; u32 slots = rx->mask + 1; @@ -264,10 +264,10 @@ void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx) gve_add_napi(priv, ntfy_idx, gve_napi_poll); } -static int gve_rx_alloc_ring_gqi(struct gve_priv *priv, - struct gve_rx_alloc_rings_cfg *cfg, - struct gve_rx_ring *rx, - int idx) +int gve_rx_alloc_ring_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx) { struct device *hdev = &priv->pdev->dev; u32 slots = cfg->ring_size; diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 4ea8ecc3b2d5..c1c912de59c7 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -299,8 +299,8 @@ void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) gve_rx_reset_ring_dqo(priv, idx); } -static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, - struct gve_rx_alloc_rings_cfg *cfg) +void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg) { struct device *hdev = &priv->pdev->dev; size_t completion_queue_slots; @@ -376,10 +376,10 @@ void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx) gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); } -static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, - struct gve_rx_alloc_rings_cfg *cfg, - struct gve_rx_ring *rx, - int idx) +int gve_rx_alloc_ring_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx) { struct device *hdev = &priv->pdev->dev; int qpl_page_cnt; -- cgit v1.2.3 From 6bee69422590b333de8f18193e73f68a23e12047 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 3 May 2024 12:11:58 +0100 Subject: octeontx2-pf: Treat truncation of IRQ name as an error According to GCC, the constriction of irq_name in otx2_open() may, theoretically, be truncated. This patch takes the approach of treating such a situation as an error which it detects by making use of the return value of snprintf, which is the total number of bytes, excluding the trailing '\0', that would have been written. Based on the approach taken to a similar problem in commit 54b909436ede ("rtc: fix snprintf() checking in is_rtc_hctosys()") Flagged by gcc-13 W=1 builds as: .../otx2_pf.c:1933:58: warning: 'snprintf' output may be truncated before the last format character [-Wformat-truncation=] 1933 | snprintf(irq_name, NAME_SIZE, "%s-rxtx-%d", pf->netdev->name, | ^ .../otx2_pf.c:1933:17: note: 'snprintf' output between 8 and 33 bytes into a destination of size 32 1933 | snprintf(irq_name, NAME_SIZE, "%s-rxtx-%d", pf->netdev->name, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1934 | qidx); | ~~~~~ Compile tested only. Tested-by: Geetha sowjanya Reviewed-by: Andrew Lunn Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20240503-octeon2-pf-irq_name-truncation-v2-1-91099177b942@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 6a44dacff508..14bccff0ee5c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1886,9 +1886,17 @@ int otx2_open(struct net_device *netdev) vec = pf->hw.nix_msixoff + NIX_LF_CINT_VEC_START; for (qidx = 0; qidx < pf->hw.cint_cnt; qidx++) { irq_name = &pf->hw.irq_name[vec * NAME_SIZE]; + int name_len; - snprintf(irq_name, NAME_SIZE, "%s-rxtx-%d", pf->netdev->name, - qidx); + name_len = snprintf(irq_name, NAME_SIZE, "%s-rxtx-%d", + pf->netdev->name, qidx); + if (name_len >= NAME_SIZE) { + dev_err(pf->dev, + "RVUPF%d: IRQ registration failed for CQ%d, irq name is too long\n", + rvu_get_pf(pf->pcifunc), qidx); + err = -EINVAL; + goto err_free_cints; + } err = request_irq(pci_irq_vector(pf->pdev, vec), otx2_cq_intr_handler, 0, irq_name, -- cgit v1.2.3 From 146817ec32095abb8655dadc413fbfc3016e4da4 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Fri, 3 May 2024 10:55:01 +0000 Subject: net: qede: use return from qede_parse_actions() for flow_spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In qede_flow_spec_to_rule(), when calling qede_parse_actions() then the return code was only used for a non-zero check, and then -EINVAL was returned. qede_parse_actions() can currently fail with: * -EINVAL * -EOPNOTSUPP Commit 319a1d19471e ("flow_offload: check for basic action hw stats type") broke the implicit assumption that it could only fail with -EINVAL, by changing it to return -EOPNOTSUPP, when hardware stats are requested. However AFAICT it's not possible to trigger qede_parse_actions() to return -EOPNOTSUPP, when called from qede_flow_spec_to_rule(), as hardware stats can't be requested by ethtool_rx_flow_rule_create(). This patch changes qede_flow_spec_to_rule() to use the actual return code from qede_parse_actions(), so it's no longer assumed that all errors are -EINVAL. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index cb6b33a228ea..d5ca4bf6dba5 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1943,6 +1943,8 @@ static int qede_flow_spec_validate(struct qede_dev *edev, struct qede_arfs_tuple *t, __u32 location) { + int err; + if (location >= QEDE_RFS_MAX_FLTR) { DP_INFO(edev, "Location out-of-bounds\n"); return -EINVAL; @@ -1963,8 +1965,9 @@ static int qede_flow_spec_validate(struct qede_dev *edev, return -EINVAL; } - if (qede_parse_actions(edev, flow_action, NULL)) - return -EINVAL; + err = qede_parse_actions(edev, flow_action, NULL); + if (err) + return err; return 0; } -- cgit v1.2.3 From e5ed2f0349bf764555bcdb870a43a9bb6b1546db Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Fri, 3 May 2024 10:55:02 +0000 Subject: net: qede: use return from qede_flow_spec_validate_unused() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling qede_flow_spec_validate_unused() then the return code was only used for a non-zero check, and then -EOPNOTSUPP was returned. qede_flow_spec_validate_unused() can currently fail with: * -EOPNOTSUPP This patch changes qede_flow_spec_to_rule() to use the actual return code from qede_flow_spec_validate_unused(), so it's no longer assumed that all errors are -EOPNOTSUPP. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index d5ca4bf6dba5..07af0464eb1e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1979,10 +1979,11 @@ static int qede_flow_spec_to_rule(struct qede_dev *edev, struct ethtool_rx_flow_spec_input input = {}; struct ethtool_rx_flow_rule *flow; __be16 proto; - int err = 0; + int err; - if (qede_flow_spec_validate_unused(edev, fs)) - return -EOPNOTSUPP; + err = qede_flow_spec_validate_unused(edev, fs); + if (err) + return err; switch ((fs->flow_type & ~FLOW_EXT)) { case TCP_V4_FLOW: -- cgit v1.2.3 From c0c66eba6322a300a27cffb2295af70ad7f63d40 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Fri, 3 May 2024 10:55:03 +0000 Subject: net: qede: use return from qede_flow_parse_ports() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling qede_flow_parse_ports(), then the return code was only used for a non-zero check, and then -EINVAL was returned. qede_flow_parse_ports() can currently fail with: * -EINVAL This patch changes qede_flow_parse_v{4,6}_common() to use the actual return code from qede_flow_parse_ports(), so it's no longer assumed that all errors are -EINVAL. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 07af0464eb1e..ded48523c383 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1725,6 +1725,7 @@ qede_flow_parse_v6_common(struct qede_dev *edev, struct flow_rule *rule, struct qede_arfs_tuple *t) { struct in6_addr zero_addr, addr; + int err; memset(&zero_addr, 0, sizeof(addr)); memset(&addr, 0xff, sizeof(addr)); @@ -1746,8 +1747,9 @@ qede_flow_parse_v6_common(struct qede_dev *edev, struct flow_rule *rule, memcpy(&t->dst_ipv6, &match.key->dst, sizeof(addr)); } - if (qede_flow_parse_ports(edev, rule, t)) - return -EINVAL; + err = qede_flow_parse_ports(edev, rule, t); + if (err) + return err; return qede_set_v6_tuple_to_profile(edev, t, &zero_addr); } @@ -1756,6 +1758,8 @@ static int qede_flow_parse_v4_common(struct qede_dev *edev, struct flow_rule *rule, struct qede_arfs_tuple *t) { + int err; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { struct flow_match_ipv4_addrs match; @@ -1770,8 +1774,9 @@ qede_flow_parse_v4_common(struct qede_dev *edev, struct flow_rule *rule, t->dst_ipv4 = match.key->dst; } - if (qede_flow_parse_ports(edev, rule, t)) - return -EINVAL; + err = qede_flow_parse_ports(edev, rule, t); + if (err) + return err; return qede_set_v4_tuple_to_profile(edev, t); } -- cgit v1.2.3 From 1eb2cded45b35816085c1f962933c187d970f9dc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 May 2024 10:28:12 +0000 Subject: net: annotate writes on dev->mtu from ndo_change_mtu() Simon reported that ndo_change_mtu() methods were never updated to use WRITE_ONCE(dev->mtu, new_mtu) as hinted in commit 501a90c94510 ("inet: protect against too small mtu values.") We read dev->mtu without holding RTNL in many places, with READ_ONCE() annotations. It is time to take care of ndo_change_mtu() methods to use corresponding WRITE_ONCE() Signed-off-by: Eric Dumazet Reported-by: Simon Horman Closes: https://lore.kernel.org/netdev/20240505144608.GB67882@kernel.org/ Reviewed-by: Jacob Keller Reviewed-by: Sabrina Dubroca Reviewed-by: Simon Horman Acked-by: Shannon Nelson Link: https://lore.kernel.org/r/20240506102812.3025432-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++-- drivers/net/bonding/bond_main.c | 2 +- drivers/net/can/dev/dev.c | 2 +- drivers/net/can/vcan.c | 2 +- drivers/net/can/vxcan.c | 2 +- drivers/net/ethernet/agere/et131x.c | 2 +- drivers/net/ethernet/alteon/acenic.c | 2 +- drivers/net/ethernet/altera/altera_tse_main.c | 2 +- drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +- drivers/net/ethernet/amd/amd8111e.c | 6 +++--- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 +- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 2 +- drivers/net/ethernet/aquantia/atlantic/aq_main.c | 2 +- drivers/net/ethernet/atheros/ag71xx.c | 2 +- drivers/net/ethernet/atheros/alx/main.c | 2 +- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 2 +- drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 2 +- drivers/net/ethernet/atheros/atlx/atl1.c | 2 +- drivers/net/ethernet/atheros/atlx/atl2.c | 2 +- drivers/net/ethernet/broadcom/b44.c | 4 ++-- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 2 +- drivers/net/ethernet/broadcom/bnx2.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/tg3.c | 2 +- drivers/net/ethernet/brocade/bna/bnad.c | 2 +- drivers/net/ethernet/cadence/macb_main.c | 2 +- drivers/net/ethernet/calxeda/xgmac.c | 2 +- drivers/net/ethernet/cavium/liquidio/lio_core.c | 2 +- drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c | 2 +- drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 2 +- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 2 +- drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 2 +- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 2 +- drivers/net/ethernet/cisco/enic/enic_main.c | 2 +- drivers/net/ethernet/cortina/gemini.c | 2 +- drivers/net/ethernet/dlink/sundance.c | 2 +- drivers/net/ethernet/faraday/ftmac100.c | 2 +- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 2 +- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 2 +- drivers/net/ethernet/freescale/gianfar.c | 2 +- drivers/net/ethernet/fungible/funeth/funeth_main.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +- drivers/net/ethernet/huawei/hinic/hinic_main.c | 2 +- drivers/net/ethernet/ibm/emac/core.c | 4 ++-- drivers/net/ethernet/ibm/ibmveth.c | 2 +- drivers/net/ethernet/intel/e1000/e1000_main.c | 2 +- drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/iavf/iavf_main.c | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- drivers/net/ethernet/intel/idpf/idpf_lib.c | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- drivers/net/ethernet/intel/igbvf/netdev.c | 2 +- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- drivers/net/ethernet/jme.c | 2 +- drivers/net/ethernet/lantiq_etop.c | 2 +- drivers/net/ethernet/lantiq_xrx200.c | 4 ++-- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- drivers/net/ethernet/marvell/mvneta.c | 2 +- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 2 +- drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c | 2 +- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 +- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 +- drivers/net/ethernet/marvell/prestera/prestera_main.c | 2 +- drivers/net/ethernet/marvell/pxa168_eth.c | 2 +- drivers/net/ethernet/marvell/skge.c | 4 ++-- drivers/net/ethernet/marvell/sky2.c | 4 ++-- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- drivers/net/ethernet/micrel/ksz884x.c | 2 +- drivers/net/ethernet/microchip/lan743x_main.c | 2 +- drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 2 +- drivers/net/ethernet/microsoft/mana/mana_en.c | 4 ++-- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 8 ++++---- drivers/net/ethernet/natsemi/natsemi.c | 2 +- drivers/net/ethernet/neterion/s2io.c | 2 +- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 +- drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 2 +- drivers/net/ethernet/ni/nixge.c | 2 +- drivers/net/ethernet/nvidia/forcedeth.c | 2 +- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 2 +- drivers/net/ethernet/pasemi/pasemi_mac.c | 2 +- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 ++-- drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c | 2 +- drivers/net/ethernet/qualcomm/emac/emac.c | 2 +- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 2 +- drivers/net/ethernet/realtek/8139cp.c | 4 ++-- drivers/net/ethernet/realtek/r8169_main.c | 2 +- drivers/net/ethernet/renesas/ravb_main.c | 2 +- drivers/net/ethernet/renesas/sh_eth.c | 2 +- drivers/net/ethernet/rocker/rocker_main.c | 2 +- drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c | 2 +- drivers/net/ethernet/sfc/efx_common.c | 2 +- drivers/net/ethernet/sfc/falcon/efx.c | 2 +- drivers/net/ethernet/sfc/siena/efx_common.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/ethernet/sun/cassini.c | 2 +- drivers/net/ethernet/sun/niu.c | 2 +- drivers/net/ethernet/sun/sungem.c | 2 +- drivers/net/ethernet/synopsys/dwc-xlgmac-net.c | 2 +- drivers/net/ethernet/tehuti/tehuti.c | 2 +- drivers/net/ethernet/via/via-velocity.c | 4 ++-- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 2 +- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +- drivers/net/ethernet/xscale/ixp4xx_eth.c | 2 +- drivers/net/fjes/fjes_main.c | 2 +- drivers/net/geneve.c | 2 +- drivers/net/hyperv/netvsc_drv.c | 4 ++-- drivers/net/macsec.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/net_failover.c | 2 +- drivers/net/netdevsim/netdev.c | 2 +- drivers/net/ntb_netdev.c | 4 ++-- drivers/net/slip/slip.c | 2 +- drivers/net/team/team_core.c | 2 +- drivers/net/usb/aqc111.c | 2 +- drivers/net/usb/asix_devices.c | 2 +- drivers/net/usb/ax88179_178a.c | 2 +- drivers/net/usb/cdc_ncm.c | 2 +- drivers/net/usb/lan78xx.c | 2 +- drivers/net/usb/r8152.c | 4 ++-- drivers/net/usb/usbnet.c | 2 +- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- drivers/net/vsockmon.c | 2 +- drivers/net/vxlan/vxlan_core.c | 2 +- drivers/net/xen-netback/interface.c | 2 +- drivers/net/xen-netfront.c | 2 +- drivers/s390/net/ctcm_main.c | 2 +- net/8021q/vlan_dev.c | 2 +- net/batman-adv/soft-interface.c | 2 +- net/bridge/br_device.c | 2 +- net/dsa/user.c | 2 +- net/hsr/hsr_device.c | 2 +- net/hsr/hsr_main.c | 2 +- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_tunnel.c | 4 ++-- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/ip6_vti.c | 3 ++- net/sched/sch_teql.c | 2 +- 153 files changed, 174 insertions(+), 173 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6f2a688fccbf..32f6a28b02b1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -238,7 +238,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", priv->mcast_mtu); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } @@ -265,7 +265,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) if (carrier_status) netif_carrier_on(dev); } else { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); } return ret; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b3a7d60c3a5c..3b0c7758ea4f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4710,7 +4710,7 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) } } - bond_dev->mtu = new_mtu; + WRITE_ONCE(bond_dev->mtu, new_mtu); return 0; diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 3a3be5cdfc1f..83e724e0ab87 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -338,7 +338,7 @@ int can_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } EXPORT_SYMBOL_GPL(can_change_mtu); diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index 285635c23443..f67e85807100 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -140,7 +140,7 @@ static int vcan_change_mtu(struct net_device *dev, int new_mtu) !can_is_canxl_dev_mtu(new_mtu)) return -EINVAL; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index f7fabba707ea..9e1b7d41005f 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -135,7 +135,7 @@ static int vxcan_change_mtu(struct net_device *dev, int new_mtu) !can_is_canxl_dev_mtu(new_mtu)) return -EINVAL; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 3d9220f9c9fe..b325e0cef120 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3852,7 +3852,7 @@ static int et131x_change_mtu(struct net_device *netdev, int new_mtu) et131x_disable_txrx(netdev); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); et131x_adapter_memory_free(adapter); diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index eafef84fe3be..3d8ac63132fb 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -2539,7 +2539,7 @@ static int ace_change_mtu(struct net_device *dev, int new_mtu) struct ace_regs __iomem *regs = ap->regs; writel(new_mtu + ETH_HLEN + 4, ®s->IfMtu); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); if (new_mtu > ACE_STD_MTU) { if (!(ap->jumbo)) { diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 1c8763be0e4b..3c112c18ae6a 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -788,7 +788,7 @@ static int tse_change_mtu(struct net_device *dev, int new_mtu) return -EBUSY; } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); netdev_update_features(dev); return 0; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index be5acfa41ee0..28eaedaf713d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -104,7 +104,7 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu) if (!ret) { netif_dbg(adapter, drv, dev, "Set MTU to %d\n", new_mtu); update_rx_ring_mtu(adapter, new_mtu); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); } else { netif_err(adapter, drv, dev, "Failed to set MTU to %d\n", new_mtu); diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index c1b5e9a94308..f64f96fa17cf 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1520,9 +1520,9 @@ static int amd8111e_change_mtu(struct net_device *dev, int new_mtu) if (!netif_running(dev)) { /* new_mtu will be used - * when device starts netxt time + * when device starts next time */ - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } @@ -1531,7 +1531,7 @@ static int amd8111e_change_mtu(struct net_device *dev, int new_mtu) /* stop the chip */ writel(RUN, lp->mmio + CMD0); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); err = amd8111e_restart(dev); spin_unlock_irq(&lp->lock); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 6b73648b3779..c4a4e316683f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2070,7 +2070,7 @@ static int xgbe_change_mtu(struct net_device *netdev, int mtu) return ret; pdata->rx_buf_size = ret; - netdev->mtu = mtu; + WRITE_ONCE(netdev->mtu, mtu); xgbe_restart_dev(pdata); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 44900026d11b..4af9d89d5f88 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1530,7 +1530,7 @@ static int xgene_change_mtu(struct net_device *ndev, int new_mtu) frame_size = (new_mtu > ETH_DATA_LEN) ? (new_mtu + 18) : 0x600; xgene_enet_close(ndev); - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); pdata->mac_ops->set_framesize(pdata, frame_size); xgene_enet_open(ndev); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 0b2a52199914..c1d1673c5749 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -146,7 +146,7 @@ static int aq_ndev_change_mtu(struct net_device *ndev, int new_mtu) if (err < 0) goto err_exit; - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); err_exit: return err; diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 0f2f400b5bc4..a38be924cdaa 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1788,7 +1788,7 @@ static int ag71xx_change_mtu(struct net_device *ndev, int new_mtu) { struct ag71xx *ag = netdev_priv(ndev); - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); ag71xx_wr(ag, AG71XX_REG_MAC_MFL, ag71xx_max_frame_len(ndev->mtu)); diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 49bb9a8f00e6..3d28654e5df7 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1176,7 +1176,7 @@ static int alx_change_mtu(struct net_device *netdev, int mtu) struct alx_priv *alx = netdev_priv(netdev); int max_frame = ALX_MAX_FRAME_LEN(mtu); - netdev->mtu = mtu; + WRITE_ONCE(netdev->mtu, mtu); alx->hw.mtu = mtu; alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE); netdev_update_features(netdev); diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 46cdc32b4e31..c571614b1d50 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -561,7 +561,7 @@ static int atl1c_change_mtu(struct net_device *netdev, int new_mtu) if (netif_running(netdev)) { while (test_and_set_bit(__AT_RESETTING, &adapter->flags)) msleep(1); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); adapter->hw.max_frame_size = new_mtu; atl1c_set_rxbufsize(adapter, netdev); atl1c_down(adapter); diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 5f2a6fcba967..9b778b34b67e 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -428,7 +428,7 @@ static int atl1e_change_mtu(struct net_device *netdev, int new_mtu) if (netif_running(netdev)) { while (test_and_set_bit(__AT_RESETTING, &adapter->flags)) msleep(1); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); adapter->hw.max_frame_size = new_mtu; adapter->hw.rx_jumbo_th = (max_frame + 7) >> 3; atl1e_down(adapter); diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index a9014d7932db..3afd3627ce48 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2687,7 +2687,7 @@ static int atl1_change_mtu(struct net_device *netdev, int new_mtu) adapter->rx_buffer_len = (max_frame + 7) & ~7; adapter->hw.rx_jumbo_th = adapter->rx_buffer_len / 8; - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) { atl1_down(adapter); atl1_up(adapter); diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index bcfc9488125b..fa9a4919f25d 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -905,7 +905,7 @@ static int atl2_change_mtu(struct net_device *netdev, int new_mtu) struct atl2_hw *hw = &adapter->hw; /* set MTU */ - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); hw->max_frame_size = new_mtu; ATL2_WRITE_REG(hw, REG_MTU, new_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 1be6d14030bc..e5809ad5eb82 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1042,13 +1042,13 @@ static int b44_change_mtu(struct net_device *dev, int new_mtu) /* We'll just catch it later when the * device is up'd. */ - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } spin_lock_irq(&bp->lock); b44_halt(bp); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); b44_init_rings(bp); b44_init_hw(bp, B44_FULL_RESET); spin_unlock_irq(&bp->lock); diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 3196c4dea076..3c0e3b9828be 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1652,7 +1652,7 @@ static int bcm_enet_change_mtu(struct net_device *dev, int new_mtu) priv->rx_frag_size = SKB_DATA_ALIGN(priv->rx_buf_offset + priv->rx_buf_size) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index b65b8592ad75..6ec773e61182 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -7912,7 +7912,7 @@ bnx2_change_mtu(struct net_device *dev, int new_mtu) { struct bnx2 *bp = netdev_priv(dev); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return bnx2_change_ring_size(bp, bp->rx_ring_size, bp->tx_ring_size, false); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index c9b6acd8c892..a8e07e51418f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4902,7 +4902,7 @@ int bnx2x_change_mtu(struct net_device *dev, int new_mtu) * because the actual alloc size is * only updated as part of load */ - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); if (!bnx2x_mtu_allows_gro(new_mtu)) dev->features &= ~NETIF_F_GRO_HW; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0d1ed6e5dfbd..c437ca1c0fd3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -14280,7 +14280,7 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) if (netif_running(dev)) bnxt_close_nic(bp, true, false); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); bnxt_set_ring_params(bp); if (netif_running(dev)) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index e6ff3c9bd7e5..1589a49b876c 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14295,7 +14295,7 @@ static void tg3_set_rx_mode(struct net_device *dev) static inline void tg3_set_mtu(struct net_device *dev, struct tg3 *tp, int new_mtu) { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); if (new_mtu > ETH_DATA_LEN) { if (tg3_flag(tp, 5780_CLASS)) { diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index c32174484a96..fe121d36112d 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -3276,7 +3276,7 @@ bnad_change_mtu(struct net_device *netdev, int new_mtu) mutex_lock(&bnad->conf_mutex); mtu = netdev->mtu; - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); frame = BNAD_FRAME_SIZE(mtu); new_frame = BNAD_FRAME_SIZE(new_mtu); diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 898debfd4db3..241ce9a2fa99 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3022,7 +3022,7 @@ static int macb_change_mtu(struct net_device *dev, int new_mtu) if (netif_running(dev)) return -EBUSY; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 5e97f1e4e38e..a71b320fd030 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1358,7 +1358,7 @@ static int xgmac_change_mtu(struct net_device *dev, int new_mtu) /* Bring interface down, change mtu and bring interface back up */ xgmac_stop(dev); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return xgmac_open(dev); } diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index f38d31bfab1b..674c54831875 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -1262,7 +1262,7 @@ int liquidio_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); lio->mtu = new_mtu; WRITE_ONCE(sc->caller_is_done, true); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index aa6c0dfb6f1c..96c6ea12279f 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -218,7 +218,7 @@ lio_vf_rep_change_mtu(struct net_device *ndev, int new_mtu) return -EIO; } - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 007d4b06819e..744f2434f7fa 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -649,7 +649,7 @@ static int octeon_mgmt_change_mtu(struct net_device *netdev, int new_mtu) struct octeon_mgmt *p = netdev_priv(netdev); int max_packet = new_mtu + ETH_HLEN + ETH_FCS_LEN; - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); /* HW lifts the limit if the frame is VLAN tagged * (+4 bytes per each tag, up to two tags) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index eff350e0bc2a..aebb9fef3f6e 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1589,7 +1589,7 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (!netif_running(netdev)) return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index d2286adf09fe..7d7d3e0098df 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -844,7 +844,7 @@ static int t1_change_mtu(struct net_device *dev, int new_mtu) return -EOPNOTSUPP; if ((ret = mac->ops->set_mtu(mac, new_mtu))) return ret; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 2236f1d35f2b..f92a3550e480 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2559,7 +2559,7 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu) if ((ret = t3_mac_set_mtu(&pi->mac, new_mtu))) return ret; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); init_port_mtus(adapter); if (adapter->params.rev == 0 && offload_running(adapter)) t3_load_mtus(adapter, adapter->params.mtus, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 2eb33a727bba..2418645c8823 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3180,7 +3180,7 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu) ret = t4_set_rxmode(pi->adapter, pi->adapter->mbox, pi->viid, pi->viid_mirror, new_mtu, -1, -1, -1, -1, true); if (!ret) - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return ret; } diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 9ba0864592e8..2fbe0f059a0b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1169,7 +1169,7 @@ static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu) ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu, -1, -1, -1, -1, true); if (!ret) - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return ret; } diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index d266a87297a5..f604119efc80 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2027,7 +2027,7 @@ static int _enic_change_mtu(struct net_device *netdev, int new_mtu) return err; } - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (running) { err = enic_open(netdev); diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 705c3eb19cd3..2f98f644b9d7 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1978,7 +1978,7 @@ static int gmac_change_mtu(struct net_device *netdev, int new_mtu) gmac_disable_tx_rx(netdev); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); gmac_update_config0_reg(netdev, max_len << CONFIG0_MAXLEN_SHIFT, CONFIG0_MAXLEN_MASK); diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index aaf0eda96292..8af5ecec7d61 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -708,7 +708,7 @@ static int change_mtu(struct net_device *dev, int new_mtu) { if (netif_running(dev)) return -EBUSY; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 003bc9a45c65..1047c805054e 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -1092,7 +1092,7 @@ static int ftmac100_change_mtu(struct net_device *netdev, int mtu) } iowrite32(maccr, priv->base + FTMAC100_OFFSET_MACCR); - netdev->mtu = mtu; + WRITE_ONCE(netdev->mtu, mtu); return 0; } diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index dcbc598b11c6..baa0b3c2ce6f 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2995,7 +2995,7 @@ static int dpaa_change_mtu(struct net_device *net_dev, int new_mtu) if (priv->xdp_prog && !xdp_validate_mtu(priv, new_mtu)) return -EINVAL; - net_dev->mtu = new_mtu; + WRITE_ONCE(net_dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 40e881829595..6866807973da 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -2698,7 +2698,7 @@ static int dpaa2_eth_change_mtu(struct net_device *dev, int new_mtu) return err; out: - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index f3543a2df68d..a71f848adc05 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -590,7 +590,7 @@ static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu) return err; } - netdev->mtu = mtu; + WRITE_ONCE(netdev->mtu, mtu); return 0; } diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index a811238c018d..2baef59f741d 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2026,7 +2026,7 @@ static int gfar_change_mtu(struct net_device *dev, int new_mtu) if (dev->flags & IFF_UP) stop_gfar(dev); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); if (dev->flags & IFF_UP) startup_gfar(dev); diff --git a/drivers/net/ethernet/fungible/funeth/funeth_main.c b/drivers/net/ethernet/fungible/funeth/funeth_main.c index df86770731ad..ac86179a0a81 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_main.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_main.c @@ -927,7 +927,7 @@ static int fun_change_mtu(struct net_device *netdev, int new_mtu) rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MTU, new_mtu); if (!rc) - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); return rc; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 8a713eed4465..fd32e15cadcb 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1777,7 +1777,7 @@ static int hns_nic_change_mtu(struct net_device *ndev, int new_mtu) } /* finally, set new mtu to netdevice */ - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); out: if (if_running) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 19668a8d22f7..dfdc0e032c07 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2761,7 +2761,7 @@ static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu) netdev_err(netdev, "failed to change MTU in hardware %d\n", ret); else - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); return ret; } diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 499c657d37a9..890f213da8d1 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -581,7 +581,7 @@ static int hinic_change_mtu(struct net_device *netdev, int new_mtu) if (err) netif_err(nic_dev, drv, netdev, "Failed to set port mtu\n"); else - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); return err; } diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index e6e47b1842ea..a19d098f2e2b 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -1098,7 +1098,7 @@ static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) /* This is to prevent starting RX channel in emac_rx_enable() */ set_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags); - dev->ndev->mtu = new_mtu; + WRITE_ONCE(dev->ndev->mtu, new_mtu); emac_full_tx_reset(dev); } @@ -1130,7 +1130,7 @@ static int emac_change_mtu(struct net_device *ndev, int new_mtu) } if (!ret) { - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); dev->rx_skb_size = emac_rx_skb_size(new_mtu); dev->rx_sync_size = emac_rx_sync_size(new_mtu); } diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index b5aef0b29efe..4c9d9badd698 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1537,7 +1537,7 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) adapter->rx_buff_pool[i].active = 1; if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size) { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); vio_cmo_set_dev_desired(viodev, ibmveth_get_desired_dma (viodev)); diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 5b43f9b194fc..60fff9a6c53e 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -3569,7 +3569,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) e1000_up(adapter); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index bef65ee4c549..220d62fca55d 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6038,7 +6038,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) adapter->max_frame_size = max_frame; netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); pm_runtime_get_sync(netdev->dev.parent); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2cc7bec0557b..143e37ae88ef 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2961,7 +2961,7 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu) netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) i40e_vsi_reinit_locked(vsi); set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index d4f4fd6a1001..d4699c4c535d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4296,7 +4296,7 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) { iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 06549dae4cca..125c53c51898 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7770,7 +7770,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) return -EBUSY; } - netdev->mtu = (unsigned int)new_mtu; + WRITE_ONCE(netdev->mtu, (unsigned int)new_mtu); err = ice_down_up(vsi); if (err) return err; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 5d3532c27d57..52ceda6306a3 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -2234,7 +2234,7 @@ static int idpf_change_mtu(struct net_device *netdev, int new_mtu) idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); err = idpf_initiate_soft_reset(vport, IDPF_SR_MTU_CHANGE); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 74a998fcaa6f..7d9389040e40 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6641,7 +6641,7 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) igb_up(adapter); diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 40ccd24ffc53..7661edd7d0f2 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2434,7 +2434,7 @@ static int igbvf_change_mtu(struct net_device *netdev, int new_mtu) netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) igbvf_up(adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 34c257a51ed1..c30f0f572600 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5275,7 +5275,7 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu) igc_down(adapter); netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) igc_up(adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 43e7e75ae18c..094653e81b97 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6847,7 +6847,7 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) netdev->mtu, new_mtu); /* must set new MTU before calling down or up */ - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) ixgbe_reinit_locked(adapter); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 3161a13079fe..b938dc06045d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4292,7 +4292,7 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) netdev->mtu, new_mtu); /* must set new MTU before calling down or up */ - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) ixgbevf_reinit_locked(adapter); diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 1732ec3c3dbd..b06e24562973 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2301,7 +2301,7 @@ jme_change_mtu(struct net_device *netdev, int new_mtu) { struct jme_adapter *jme = netdev_priv(netdev); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); netdev_update_features(netdev); jme_restart_rx_engine(jme); diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 1d5b7bb6380f..5352fee62d2b 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -519,7 +519,7 @@ ltq_etop_change_mtu(struct net_device *dev, int new_mtu) struct ltq_etop_priv *priv = netdev_priv(dev); unsigned long flags; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); spin_lock_irqsave(&priv->lock, flags); ltq_etop_w32((ETOP_PLEN_UNDER << 16) | new_mtu, LTQ_ETOP_IGPLEN); diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 8bd4def3622e..07904a528f21 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -419,7 +419,7 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu) int curr_desc; int ret = 0; - net_dev->mtu = new_mtu; + WRITE_ONCE(net_dev->mtu, new_mtu); priv->rx_buf_size = xrx200_buffer_size(new_mtu); priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size); @@ -440,7 +440,7 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu) buff = ch_rx->rx_buff[ch_rx->dma.desc]; ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag); if (ret) { - net_dev->mtu = old_mtu; + WRITE_ONCE(net_dev->mtu, old_mtu); priv->rx_buf_size = xrx200_buffer_size(old_mtu); priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size); break; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index f0bdc06d253d..f35ae2c88091 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2562,7 +2562,7 @@ static int mv643xx_eth_change_mtu(struct net_device *dev, int new_mtu) { struct mv643xx_eth_private *mp = netdev_priv(dev); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); mv643xx_eth_recalc_skb_size(mp); tx_set_rate(mp, 1000000000, 16777216); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 26bf5d47ba02..41894834fb53 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3861,7 +3861,7 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) return -EINVAL; } - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); if (!netif_running(dev)) { if (pp->bm_priv) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 19253a0fb4fe..e91486c48de3 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -1375,7 +1375,7 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu) } out_set: - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); dev->wanted_features = dev->features; netdev_update_features(dev); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 7c9faa714a10..549436efc204 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1096,7 +1096,7 @@ static int octep_change_mtu(struct net_device *netdev, int new_mtu) true); if (!err) { oct->link_info.mtu = new_mtu; - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); } return err; diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c index dd49d0b8b494..7e6771c9cdbb 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c @@ -881,7 +881,7 @@ static int octep_vf_change_mtu(struct net_device *netdev, int new_mtu) err = octep_vf_mbox_set_mtu(oct, new_mtu); if (!err) { oct->link_info.mtu = new_mtu; - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); } return err; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 14bccff0ee5c..f5bce3e326cc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -67,7 +67,7 @@ static int otx2_change_mtu(struct net_device *netdev, int new_mtu) netdev_info(netdev, "Changing MTU from %d to %d\n", netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (if_up) err = otx2_open(netdev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index cf0aa16d7540..99fcc5661674 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -456,7 +456,7 @@ static int otx2vf_change_mtu(struct net_device *netdev, int new_mtu) netdev_info(netdev, "Changing MTU from %d to %d\n", netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (if_up) err = otx2vf_open(netdev); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index ba6d53ac7f55..63ae01954dfc 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -489,7 +489,7 @@ static int prestera_port_change_mtu(struct net_device *dev, int mtu) if (err) return err; - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); return 0; } diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index dd6ca2e4fd51..1a59c952aa01 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1188,7 +1188,7 @@ static int pxa168_eth_change_mtu(struct net_device *dev, int mtu) { struct pxa168_eth_private *pep = netdev_priv(dev); - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); set_port_config_ext(pep); if (!netif_running(dev)) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 1b43704baceb..fcfb34561882 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -2905,13 +2905,13 @@ static int skge_change_mtu(struct net_device *dev, int new_mtu) int err; if (!netif_running(dev)) { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } skge_down(dev); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); err = skge_up(dev); if (err) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index f3f7f4cc27b3..a7a16eac1891 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -2384,7 +2384,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) u32 imask; if (!netif_running(dev)) { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); netdev_update_features(dev); return 0; } @@ -2407,7 +2407,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) sky2_rx_stop(sky2); sky2_rx_clean(sky2); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); netdev_update_features(dev); mode = DATA_BLIND_VAL(DATA_BLIND_DEF) | GM_SMOD_VLAN_ENA; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index d7a96dc11c07..179c0230655a 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4055,7 +4055,7 @@ static int mtk_change_mtu(struct net_device *dev, int new_mtu) } mtk_set_mcr_max_rx(mac, length); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 5d3fde63b273..4c089cfa027a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1649,7 +1649,7 @@ int mlx4_en_start_port(struct net_device *dev) sizeof(struct ethtool_flow_id) * MAX_NUM_OF_FS_RULES); /* Calculate Rx buf size */ - dev->mtu = min(dev->mtu, priv->max_mtu); + WRITE_ONCE(dev->mtu, min(dev->mtu, priv->max_mtu)); mlx4_en_calc_rx_buf(dev); en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_skb_size); @@ -2394,7 +2394,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) !mlx4_en_check_xdp_mtu(dev, new_mtu)) return -EOPNOTSUPP; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); if (netif_running(dev)) { mutex_lock(&mdev->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3bd0695845c7..ffe8919494d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4525,7 +4525,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, reset); out: - netdev->mtu = params->sw_mtu; + WRITE_ONCE(netdev->mtu, params->sw_mtu); mutex_unlock(&priv->state_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index d77be1b4dd9c..8e0404c0d1ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -531,7 +531,7 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) if (err) goto out; - netdev->mtu = new_params.sw_mtu; + WRITE_ONCE(netdev->mtu, new_params.sw_mtu); out: mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index f87471306f6b..028a76944d82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -280,7 +280,7 @@ static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu) struct mlx5e_priv *priv = mlx5i_epriv(netdev); mutex_lock(&priv->state_lock); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); mutex_unlock(&priv->state_lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index bb642e9bb6cf..030ed71f945d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -825,7 +825,7 @@ static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu) err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu); if (err) goto err_port_mtu_set; - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); return 0; err_port_mtu_set: diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index c5aeeb964c17..dc1d9f774565 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -5427,7 +5427,7 @@ static int netdev_change_mtu(struct net_device *dev, int new_mtu) } hw_mtu = (hw_mtu + 3) & ~3; hw_priv->mtu = hw_mtu; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index cee47729d022..6be8a43c908a 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3184,7 +3184,7 @@ static int lan743x_netdev_change_mtu(struct net_device *netdev, int new_mtu) ret = lan743x_mac_set_mtu(adapter, new_mtu); if (!ret) - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); return ret; } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index b7e75da65834..b12d3b8a64fd 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -402,7 +402,7 @@ static int lan966x_port_change_mtu(struct net_device *dev, int new_mtu) lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(LAN966X_HW_MTU(new_mtu)), lan966x, DEV_MAC_MAXLEN_CFG(port->chip_port)); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); if (!lan966x->fdma) return 0; diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index f2fae659bf3b..d087cf954f75 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -690,12 +690,12 @@ static int mana_change_mtu(struct net_device *ndev, int new_mtu) goto out; } - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); err = mana_attach(ndev); if (err) { netdev_err(ndev, "mana_attach failed: %d\n", err); - ndev->mtu = old_mtu; + WRITE_ONCE(ndev->mtu, old_mtu); } out: diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 7b7e1c5b00f4..b7d9657a7af3 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -3036,11 +3036,11 @@ static int myri10ge_change_mtu(struct net_device *dev, int new_mtu) /* if we change the mtu on an active device, we must * reset the device so the firmware sees the change */ myri10ge_close(dev); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); myri10ge_open(dev); - } else - dev->mtu = new_mtu; - + } else { + WRITE_ONCE(dev->mtu, new_mtu); + } return 0; } diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 650a5a166070..ad0c14849115 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -2526,7 +2526,7 @@ static void __set_rx_mode(struct net_device *dev) static int natsemi_change_mtu(struct net_device *dev, int new_mtu) { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); /* synchronized against open : rtnl_lock() held by caller */ if (netif_running(dev)) { diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 55408f16fbbc..f235e76e4ce9 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -6637,7 +6637,7 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu) struct s2io_nic *sp = netdev_priv(dev); int ret = 0; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); if (netif_running(dev)) { s2io_stop_all_tx_queue(sp); s2io_card_down(sp); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 997cc4fcffdb..182ba0a8b095 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1526,7 +1526,7 @@ static void nfp_net_dp_swap(struct nfp_net *nn, struct nfp_net_dp *dp) *dp = nn->dp; nn->dp = new_dp; - nn->dp.netdev->mtu = new_dp.mtu; + WRITE_ONCE(nn->dp.netdev->mtu, new_dp.mtu); if (!netif_is_rxfh_configured(nn->dp.netdev)) nfp_net_rss_init_itbl(nn); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 3af1229a3f08..eee0bfc41074 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -177,7 +177,7 @@ static int nfp_repr_change_mtu(struct net_device *netdev, int new_mtu) if (err) return err; - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index fa1f78b03cb2..2aa4ad9cf96e 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -946,7 +946,7 @@ static int nixge_change_mtu(struct net_device *ndev, int new_mtu) NIXGE_MAX_JUMBO_FRAME_SIZE) return -EINVAL; - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 31f896c4aa26..720f577929db 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -3098,7 +3098,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) int old_mtu; old_mtu = dev->mtu; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); /* return early if the buffer sizes will not change */ if (old_mtu <= ETH_DATA_LEN && new_mtu <= ETH_DATA_LEN) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 28b7cec485ef..4ac29cd59f2b 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2184,7 +2184,7 @@ static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu) } } else { pch_gbe_reset(adapter); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); adapter->hw.mac.max_frame_size = max_frame; } diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index ed7dd0a04235..62ba269da902 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1639,7 +1639,7 @@ static int pasemi_mac_change_mtu(struct net_device *dev, int new_mtu) reg |= PAS_MAC_CFG_MACCFG_MAXF(new_mtu + ETH_HLEN + 4); write_mac_reg(mac, PAS_MAC_CFG_MACCFG, reg); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); /* MTU + ETH_HLEN + VLAN_HLEN + 2 64B cachelines */ mac->bufsz = new_mtu + ETH_HLEN + ETH_FCS_LEN + LOCAL_SKB_ALIGN + 128; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 7f0c6cdc375e..24870da3f484 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1761,13 +1761,13 @@ static int ionic_change_mtu(struct net_device *netdev, int new_mtu) /* if we're not running, nothing more to do */ if (!netif_running(netdev)) { - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); return 0; } mutex_lock(&lif->queue_lock); ionic_stop_queues_reconfig(lif); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); err = ionic_start_queues_reconfig(lif); mutex_unlock(&lif->queue_lock); diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c index 6e12cd21ac90..89c8b2349694 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c @@ -960,7 +960,7 @@ int netxen_nic_change_mtu(struct net_device *netdev, int mtu) rc = adapter->set_mtu(adapter, mtu); if (!rc) - netdev->mtu = mtu; + WRITE_ONCE(netdev->mtu, mtu); return rc; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index ae3ebf0cf999..f497f6ca1018 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1026,7 +1026,7 @@ static int qede_get_regs_len(struct net_device *ndev) static void qede_update_mtu(struct qede_dev *edev, struct qede_reload_args *args) { - edev->ndev->mtu = args->u.mtu; + WRITE_ONCE(edev->ndev->mtu, args->u.mtu); } /* Netdevice NDOs */ diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index 4b8bc46f55c2..ae4ee0326ee1 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -1015,7 +1015,7 @@ int qlcnic_change_mtu(struct net_device *netdev, int mtu) rc = qlcnic_fw_cmd_set_mtu(adapter, mtu); if (!rc) - netdev->mtu = mtu; + WRITE_ONCE(netdev->mtu, mtu); return rc; } diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 4c06f55878de..99d4647bf245 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -216,7 +216,7 @@ static int emac_change_mtu(struct net_device *netdev, int new_mtu) netif_dbg(adpt, hw, adpt->netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) return emac_reinit_locked(adpt); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 9d2a9562c96f..f1e40aade127 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -90,7 +90,7 @@ static int rmnet_vnd_change_mtu(struct net_device *rmnet_dev, int new_mtu) new_mtu > (priv->real_dev->mtu - headroom)) return -EINVAL; - rmnet_dev->mtu = new_mtu; + WRITE_ONCE(rmnet_dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index f5786d78ed23..5652da8a178c 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1277,14 +1277,14 @@ static int cp_change_mtu(struct net_device *dev, int new_mtu) /* if network interface not up, no need for complexity */ if (!netif_running(dev)) { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); cp_set_rxbufsize(cp); /* set new rx buf size */ return 0; } /* network IS up, close it, reset MTU, and come up again. */ cp_close(dev); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); cp_set_rxbufsize(cp); return cp_open(dev); } diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index fab21d2bc4ff..5abbea91bc07 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -3924,7 +3924,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu) { struct rtl8169_private *tp = netdev_priv(dev); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); netdev_update_features(dev); rtl_jumbo_config(tp); rtl_set_eee_txidle_timer(tp); diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 384ddad65aaf..4d100283c30f 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2423,7 +2423,7 @@ static int ravb_change_mtu(struct net_device *ndev, int new_mtu) { struct ravb_private *priv = netdev_priv(ndev); - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); if (netif_running(ndev)) { synchronize_irq(priv->emac_irq); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 0786eb0da391..7a25903e35c3 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2624,7 +2624,7 @@ static int sh_eth_change_mtu(struct net_device *ndev, int new_mtu) if (netif_running(ndev)) return -EBUSY; - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); netdev_update_features(ndev); return 0; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 755db89db909..e097ce3e69ea 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1967,7 +1967,7 @@ static int rocker_port_change_mtu(struct net_device *dev, int new_mtu) rocker_port_stop(dev); netdev_info(dev, "MTU change from %d to %d\n", dev->mtu, new_mtu); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); err = rocker_cmd_set_port_settings_mtu(rocker_port, new_mtu); if (err) diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index ecbe3994f2b1..12c8396b6942 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -1804,7 +1804,7 @@ static int sxgbe_set_features(struct net_device *dev, */ static int sxgbe_change_mtu(struct net_device *dev, int new_mtu) { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); if (!netif_running(dev)) return 0; diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index 551f890db90a..4ebd5ae23eca 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -302,7 +302,7 @@ int efx_change_mtu(struct net_device *net_dev, int new_mtu) efx_stop_all(efx); mutex_lock(&efx->mac_lock); - net_dev->mtu = new_mtu; + WRITE_ONCE(net_dev->mtu, new_mtu); efx_mac_reconfigure(efx, true); mutex_unlock(&efx->mac_lock); diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index 1cb32aedd89c..8925745f1c17 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2125,7 +2125,7 @@ static int ef4_change_mtu(struct net_device *net_dev, int new_mtu) ef4_stop_all(efx); mutex_lock(&efx->mac_lock); - net_dev->mtu = new_mtu; + WRITE_ONCE(net_dev->mtu, new_mtu); ef4_mac_reconfigure(efx); mutex_unlock(&efx->mac_lock); diff --git a/drivers/net/ethernet/sfc/siena/efx_common.c b/drivers/net/ethernet/sfc/siena/efx_common.c index 88e5bc347a44..cf195162e270 100644 --- a/drivers/net/ethernet/sfc/siena/efx_common.c +++ b/drivers/net/ethernet/sfc/siena/efx_common.c @@ -306,7 +306,7 @@ int efx_siena_change_mtu(struct net_device *net_dev, int new_mtu) efx_siena_stop_all(efx); mutex_lock(&efx->mac_lock); - net_dev->mtu = new_mtu; + WRITE_ONCE(net_dev->mtu, new_mtu); efx_siena_mac_reconfigure(efx, true); mutex_unlock(&efx->mac_lock); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 59bf83904b62..3d828904db0d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5909,7 +5909,7 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) stmmac_set_rx_mode(dev); } - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); netdev_update_features(dev); return 0; diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 8f1f43dbb76d..b8948d5b779a 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -3804,7 +3804,7 @@ static int cas_change_mtu(struct net_device *dev, int new_mtu) { struct cas *cp = netdev_priv(dev); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); if (!netif_running(dev) || !netif_device_present(dev)) return 0; diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index f68aa813d4fb..41a27ae58ced 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6751,7 +6751,7 @@ static int niu_change_mtu(struct net_device *dev, int new_mtu) orig_jumbo = (dev->mtu > ETH_DATA_LEN); new_jumbo = (new_mtu > ETH_DATA_LEN); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); if (!netif_running(dev) || (orig_jumbo == new_jumbo)) diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 9bd1df8308d2..4aa356e2f299 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -2499,7 +2499,7 @@ static int gem_change_mtu(struct net_device *dev, int new_mtu) { struct gem *gp = netdev_priv(dev); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); /* We'll just catch it later when the device is up'd or resumed */ if (!netif_running(dev) || !netif_device_present(dev)) diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c index 36b948820c1e..d1793b6154c7 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c @@ -823,7 +823,7 @@ static int xlgmac_change_mtu(struct net_device *netdev, int mtu) return ret; pdata->rx_buf_size = ret; - netdev->mtu = mtu; + WRITE_ONCE(netdev->mtu, mtu); xlgmac_restart_dev(pdata); diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index ca409515ead5..ede5f7890fb4 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -756,7 +756,7 @@ static int bdx_change_mtu(struct net_device *ndev, int new_mtu) { ENTER; - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); if (netif_running(ndev)) { bdx_close(ndev); bdx_open(ndev); diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 1c6b2a9bba08..55fff4d0d380 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2294,7 +2294,7 @@ static int velocity_change_mtu(struct net_device *dev, int new_mtu) int ret = 0; if (!netif_running(dev)) { - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); goto out_0; } @@ -2336,7 +2336,7 @@ static int velocity_change_mtu(struct net_device *dev, int new_mtu) tmp_vptr->rx = rx; tmp_vptr->tx = tx; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); velocity_init_registers(vptr, VELOCITY_INIT_COLD); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 945c13d1a982..3662483bfe2e 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1408,7 +1408,7 @@ int wx_change_mtu(struct net_device *netdev, int new_mtu) { struct wx *wx = netdev_priv(netdev); - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); wx_set_rx_buffer_len(wx); return 0; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index aaf780fd4f5e..c29809cd9201 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1641,7 +1641,7 @@ static int axienet_change_mtu(struct net_device *ndev, int new_mtu) XAE_TRL_SIZE) > lp->rxmem) return -EINVAL; - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index e0d26148dfd9..8aff6a73ca0a 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -1233,7 +1233,7 @@ static int ixp4xx_eth_change_mtu(struct net_device *dev, int new_mtu) return ret; } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 5fbe33a09bb0..6411da359e5a 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -811,7 +811,7 @@ static int fjes_change_mtu(struct net_device *netdev, int new_mtu) netif_tx_stop_all_queues(netdev); } - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); if (running) { for (epidx = 0; epidx < hw->max_epid; epidx++) { diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index f918ca6146c8..51495cb4b9be 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1059,7 +1059,7 @@ static int geneve_change_mtu(struct net_device *dev, int new_mtu) else if (new_mtu < dev->min_mtu) new_mtu = dev->min_mtu; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 11831a1c9762..44142245343d 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1233,14 +1233,14 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) if (ret) goto rollback_vf; - ndev->mtu = mtu; + WRITE_ONCE(ndev->mtu, mtu); ret = netvsc_attach(ndev, device_info); if (!ret) goto out; /* Attempt rollback to original MTU */ - ndev->mtu = orig_mtu; + WRITE_ONCE(ndev->mtu, orig_mtu); if (netvsc_attach(ndev, device_info)) netdev_err(ndev, "restoring mtu failed\n"); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index ff016c11b4a0..2da70bc3dd86 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3753,7 +3753,7 @@ static int macsec_change_mtu(struct net_device *dev, int new_mtu) if (macsec->real_dev->mtu - extra < new_mtu) return -ERANGE; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 0cec2783a3e7..67b7ef2d463f 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -865,7 +865,7 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu) if (vlan->lowerdev->mtu < new_mtu) return -EINVAL; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index d0c916a53d7c..963d8b4af28d 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -231,7 +231,7 @@ static int net_failover_change_mtu(struct net_device *dev, int new_mtu) } } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index d127856f8f36..e3a385a32269 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -74,7 +74,7 @@ static int nsim_change_mtu(struct net_device *dev, int new_mtu) if (ns->xdp.prog && new_mtu > NSIM_XDP_MAX_MTU) return -EBUSY; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 536bd6564f8b..ffe7f463e16e 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -306,7 +306,7 @@ static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu) return -EINVAL; if (!netif_running(ndev)) { - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); return 0; } @@ -335,7 +335,7 @@ static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu) } } - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); ntb_transport_link_up(dev->qp); diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 0aba3569ccc0..fb362ee248ff 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -286,7 +286,7 @@ static int sl_realloc_bufs(struct slip *sl, int mtu) } } sl->mtu = mtu; - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); sl->buffsize = len; err = 0; diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 8c7dbaf7c22e..ab1935a4aa2c 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -1831,7 +1831,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu) team->port_mtu_change_allowed = false; mutex_unlock(&team->lock); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index 7b8afa589a53..74571c8e7691 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -424,7 +424,7 @@ static int aqc111_change_mtu(struct net_device *net, int new_mtu) u16 reg16 = 0; u8 buf[5]; - net->mtu = new_mtu; + WRITE_ONCE(net->mtu, new_mtu); dev->hard_mtu = net->mtu + net->hard_header_len; aqc111_read16_cmd(dev, AQ_ACCESS_MAC, SFR_MEDIUM_STATUS_MODE, diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index f7cff58fe044..57d6e5abc30e 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -1230,7 +1230,7 @@ static int ax88178_change_mtu(struct net_device *net, int new_mtu) if ((ll_mtu % dev->maxpacket) == 0) return -EDOM; - net->mtu = new_mtu; + WRITE_ONCE(net->mtu, new_mtu); dev->hard_mtu = net->mtu + net->hard_header_len; ax88178_set_mfb(dev); diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index b25535aa15ae..c5196b5313c0 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -943,7 +943,7 @@ static int ax88179_change_mtu(struct net_device *net, int new_mtu) struct usbnet *dev = netdev_priv(net); u16 tmp16; - net->mtu = new_mtu; + WRITE_ONCE(net->mtu, new_mtu); dev->hard_mtu = net->mtu + net->hard_header_len; if (net->mtu > 1500) { diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index db05622f1f70..bf76ecccc2e6 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -798,7 +798,7 @@ int cdc_ncm_change_mtu(struct net_device *net, int new_mtu) { struct usbnet *dev = netdev_priv(net); - net->mtu = new_mtu; + WRITE_ONCE(net->mtu, new_mtu); cdc_ncm_set_dgram_size(dev, new_mtu + cdc_ncm_eth_hlen(dev)); return 0; diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 0030be502daa..5a2c38b63012 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2528,7 +2528,7 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu) ret = lan78xx_set_rx_max_frame_length(dev, max_frame_len); if (!ret) - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); usb_autopm_put_interface(dev->intf); diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 51e9f5b2dccf..19df1cd9f072 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9365,7 +9365,7 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu) case RTL_VER_01: case RTL_VER_02: case RTL_VER_07: - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; default: break; @@ -9377,7 +9377,7 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu) mutex_lock(&tp->control); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); if (netif_running(dev)) { if (tp->rtl_ops.change_mtu) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index f3f7f686fe9c..9fd516e8bb10 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -398,7 +398,7 @@ int usbnet_change_mtu (struct net_device *net, int new_mtu) // no second zero-length packet read wanted after mtu-sized packets if ((ll_mtu % dev->maxpacket) == 0) return -EDOM; - net->mtu = new_mtu; + WRITE_ONCE(net->mtu, new_mtu); dev->hard_mtu = net->mtu + net->hard_header_len; if (dev->rx_urb_size == old_hard_mtu) { diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 0578864792b6..89ca6e75fcc6 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3457,7 +3457,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) struct vmxnet3_adapter *adapter = netdev_priv(netdev); int err = 0; - netdev->mtu = new_mtu; + WRITE_ONCE(netdev->mtu, new_mtu); /* * Reset_work may be in the middle of resetting the device, wait for its diff --git a/drivers/net/vsockmon.c b/drivers/net/vsockmon.c index a1ba5169ed5d..4c260074c091 100644 --- a/drivers/net/vsockmon.c +++ b/drivers/net/vsockmon.c @@ -58,7 +58,7 @@ static int vsockmon_change_mtu(struct net_device *dev, int new_mtu) if (!vsockmon_is_valid_mtu(new_mtu)) return -EINVAL; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 7e3a7d1f2018..f78dd0438843 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -3181,7 +3181,7 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 65db5f14465f..325fcb3d1075 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -358,7 +358,7 @@ static int xenvif_change_mtu(struct net_device *dev, int mtu) if (mtu > max) return -EINVAL; - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); return 0; } diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 8d2aee88526c..4265c1cd0ff7 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1376,7 +1376,7 @@ static int xennet_change_mtu(struct net_device *dev, int mtu) if (mtu > max) return -EINVAL; - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); return 0; } diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index 878fe3ce53ad..b93c2eb45916 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -996,7 +996,7 @@ static int ctcm_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; dev->hard_header_len = LL_HEADER_LENGTH + 2; } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 39876eff51d2..3efba4f857ac 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -149,7 +149,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) if (max_mtu < new_mtu) return -ERANGE; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 89c51b3cf430..30ecbc2ef1fd 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -159,7 +159,7 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) if (new_mtu < ETH_MIN_MTU || new_mtu > batadv_hardif_min_mtu(dev)) return -EINVAL; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); bat_priv->mtu_set_by_user = new_mtu; return 0; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ab4d33e02014..71b50001ca58 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -197,7 +197,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu) { struct net_bridge *br = netdev_priv(dev); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); /* this flag will be cleared if the MTU was automatically adjusted */ br_opt_toggle(br, BROPT_MTU_SET_BY_USER, true); diff --git a/net/dsa/user.c b/net/dsa/user.c index c94b868855aa..1f7b8f21db9e 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -2120,7 +2120,7 @@ int dsa_user_change_mtu(struct net_device *dev, int new_mtu) if (err) goto out_port_failed; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); dsa_bridge_mtu_normalization(dp); diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 86127300b102..26d5282a6496 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -123,7 +123,7 @@ static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index 9756e657bab9..d7ae32473c41 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -96,7 +96,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, break; /* Handled in ndo_change_mtu() */ mtu_max = hsr_get_max_mtu(port->hsr); master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER); - master->dev->mtu = mtu_max; + WRITE_ONCE(master->dev->mtu, mtu_max); break; case NETDEV_UNREGISTER: if (!is_hsr_master(dev)) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c3af965dc407..ba205473522e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -793,7 +793,7 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) dev->needed_headroom += len; if (set_mtu) - dev->mtu = max_t(int, dev->mtu - len, 68); + WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68)); if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) || (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) && diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index f1c5f6c3f2f8..bccef2fcf620 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -897,7 +897,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, t->fwmark = fwmark; mtu = ip_tunnel_bind_dev(dev); if (set_mtu) - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); } dst_cache_reset(&t->dst_cache); netdev_state_change(dev); @@ -1082,7 +1082,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) new_mtu = max_mtu; } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5aec79c2af1a..9dee0c127955 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1746,7 +1746,7 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) if (new_mtu > IP_MAX_MTU - dev->hard_header_len) return -EINVAL; } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } EXPORT_SYMBOL(ip6_tnl_change_mtu); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 78344cf3867e..590737c27537 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -666,7 +666,8 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu) dev->flags &= ~IFF_POINTOPOINT; if (keep_mtu && dev->mtu) { - dev->mtu = clamp(dev->mtu, dev->min_mtu, dev->max_mtu); + WRITE_ONCE(dev->mtu, + clamp(dev->mtu, dev->min_mtu, dev->max_mtu)); return; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 29850d0f0733..8badec6d82a2 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -424,7 +424,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) } while ((q = NEXT_SLAVE(q)) != m->slaves); } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } -- cgit v1.2.3 From abb45a2477f533cd4aab3085defdff131e2e8c4f Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Mon, 6 May 2024 14:32:46 +0200 Subject: net: stmmac: dwmac-ipq806x: account for rgmii-txid/rxid/id phy-mode Currently the ipq806x dwmac driver is almost always used attached to the CPU port of a switch and phy-mode was always set to "rgmii" or "sgmii". Some device came up with a special configuration where the PHY is directly attached to the GMAC port and in those case phy-mode needs to be set to "rgmii-id" to make the PHY correctly work and receive packets. Since the driver supports only "rgmii" and "sgmii" mode, when "rgmii-id" (or variants) mode is set, the mode is rejected and probe fails. Add support also for these phy-modes to correctly setup PHYs that requires delay applied to tx/rx. Signed-off-by: Christian Marangi Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 281687d7083b..4ba15873d5b1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -171,6 +171,9 @@ static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, unsigned int speed) switch (gmac->phy_mode) { case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: div = get_clk_div_rgmii(gmac, speed); clk_bits = NSS_COMMON_CLK_GATE_RGMII_RX_EN(gmac->id) | NSS_COMMON_CLK_GATE_RGMII_TX_EN(gmac->id); @@ -410,6 +413,9 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) val |= NSS_COMMON_GMAC_CTL_CSYS_REQ; switch (gmac->phy_mode) { case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: val |= NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL; break; case PHY_INTERFACE_MODE_SGMII: @@ -425,6 +431,9 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) val &= ~(1 << NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id)); switch (gmac->phy_mode) { case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: val |= NSS_COMMON_CLK_SRC_CTRL_RGMII(gmac->id) << NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id); break; @@ -442,6 +451,9 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) val |= NSS_COMMON_CLK_GATE_PTP_EN(gmac->id); switch (gmac->phy_mode) { case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: val |= NSS_COMMON_CLK_GATE_RGMII_RX_EN(gmac->id) | NSS_COMMON_CLK_GATE_RGMII_TX_EN(gmac->id); break; -- cgit v1.2.3 From 174ee5bcfeb7a194b107a8e1a6fa9a1b98de399a Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Tue, 16 Apr 2024 14:43:19 +0000 Subject: i40e: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 143e37ae88ef..1f188c052828 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8671,6 +8671,10 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi, flow_rule_match_control(rule, &match); addr_type = match.key->addr_type; + + if (flow_rule_has_control_flags(match.mask->flags, + f->common.extack)) + return -EOPNOTSUPP; } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { -- cgit v1.2.3 From c7b9c49442469b536829e4d1238ce8cf352bd149 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Tue, 16 Apr 2024 14:43:25 +0000 Subject: iavf: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index d4699c4c535d..c6dff0963053 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3757,6 +3757,10 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, flow_rule_match_control(rule, &match); addr_type = match.key->addr_type; + + if (flow_rule_has_control_flags(match.mask->flags, + f->common.extack)) + return -EOPNOTSUPP; } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { -- cgit v1.2.3 From 21e1fe9e84f4f2fa672add77a9f9dae7e9d18b6e Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Tue, 16 Apr 2024 14:43:30 +0000 Subject: ice: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 8553c56dc95d..8bd24b33f3a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -1658,6 +1658,10 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, flow_rule_match_control(rule, &match); addr_type = match.key->addr_type; + + if (flow_rule_has_control_flags(match.mask->flags, + fltr->extack)) + return -EOPNOTSUPP; } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { -- cgit v1.2.3 From fb324f2b22a6aee38d649aa3dad80cc77cfc7070 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Tue, 16 Apr 2024 14:43:35 +0000 Subject: igb: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_match_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_match_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 7d9389040e40..fce2930ae6af 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2597,6 +2597,9 @@ static int igb_parse_cls_flower(struct igb_adapter *adapter, return -EOPNOTSUPP; } + if (flow_rule_match_has_control_flags(rule, extack)) + return -EOPNOTSUPP; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct flow_match_eth_addrs match; -- cgit v1.2.3 From 8e3a90f2e3aa642d0e7e8259f069ca539cecf549 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 23 Apr 2024 16:36:32 +0200 Subject: ice: remove correct filters during eswitch release ice_clear_dflt_vsi() is only removing default rule. Both default RX and TX rule should be removed during release. If it isn't switching to switchdev, second time results in error, because TX filter is already there. Fix it by removing the correct set of rules. Fixes: 50d62022f455 ("ice: default Tx rule instead of to queue") Reviewed-by: Wojciech Drewek Signed-off-by: Michal Swiatkowski Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index c902848cf88e..b102db8b829a 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -246,7 +246,10 @@ static void ice_eswitch_release_env(struct ice_pf *pf) ice_vsi_update_local_lb(uplink_vsi, false); ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); vlan_ops->ena_rx_filtering(uplink_vsi); - ice_clear_dflt_vsi(uplink_vsi); + ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, + ICE_FLTR_TX); + ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, + ICE_FLTR_RX); ice_fltr_add_mac_and_broadcast(uplink_vsi, uplink_vsi->port_info->mac.perm_addr, ICE_FWD_TO_VSI); -- cgit v1.2.3 From 86167183a17e03ec77198897975e9fdfbd53cb0b Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Tue, 23 Apr 2024 12:24:54 +0200 Subject: igc: fix a log entry using uninitialized netdev During successful probe, igc logs this: [ 5.133667] igc 0000:01:00.0 (unnamed net_device) (uninitialized): PHC added ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The reason is that igc_ptp_init() is called very early, even before register_netdev() has been called. So the netdev_info() call works on a partially uninitialized netdev. Fix this by calling igc_ptp_init() after register_netdev(), right after the media autosense check, just as in igb. Add a comment, just as in igb. Now the log message is fine: [ 5.200987] igc 0000:01:00.0 eth0: PHC added Signed-off-by: Corinna Vinschen Reviewed-by: Hariprasad Kelam Acked-by: Vinicius Costa Gomes Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index c30f0f572600..b5bcabab7a1d 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7028,8 +7028,6 @@ static int igc_probe(struct pci_dev *pdev, device_set_wakeup_enable(&adapter->pdev->dev, adapter->flags & IGC_FLAG_WOL_SUPPORTED); - igc_ptp_init(adapter); - igc_tsn_clear_schedule(adapter); /* reset the hardware with the new settings */ @@ -7051,6 +7049,9 @@ static int igc_probe(struct pci_dev *pdev, /* Check if Media Autosense is enabled */ adapter->ei = *ei; + /* do hw tstamp init after resetting */ + igc_ptp_init(adapter); + /* print pcie link status and MAC address */ pcie_print_link_status(pdev); netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr); -- cgit v1.2.3 From 6918107e25407ed0b5846af70449547263dcfef0 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 23 Apr 2024 17:40:59 +0300 Subject: net: e1000e & ixgbe: Remove PCI_HEADER_TYPE_MFD duplicates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCI_HEADER_TYPE_MULTIFUNC is define by e1000e and ixgbe and both are unused. There is already PCI_HEADER_TYPE_MFD in pci_regs.h anyway which should be used instead so remove the duplicated defines of it. Signed-off-by: Ilpo Järvinen Reviewed-by: Hariprasad Kelam Reviewed-by: Jesse Brandeburg Acked-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/defines.h | 2 -- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 1 - 2 files changed, 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index 23a58cada43a..5e2cfa73f889 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -679,8 +679,6 @@ /* PCI/PCI-X/PCI-EX Config space */ #define PCI_HEADER_TYPE_REGISTER 0x0E -#define PCI_HEADER_TYPE_MULTIFUNC 0x80 - #define PHY_REVISION_MASK 0xFFFFFFF0 #define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ #define MAX_PHY_MULTI_PAGE_REG 0xF diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index ed440dd0c4f9..897fe357b65b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -2179,7 +2179,6 @@ enum { #define IXGBE_PCI_LINK_SPEED_5000 0x2 #define IXGBE_PCI_LINK_SPEED_8000 0x3 #define IXGBE_PCI_HEADER_TYPE_REGISTER 0x0E -#define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80 #define IXGBE_PCI_DEVICE_CONTROL2_16ms 0x0005 #define IXGBE_PCIDEVCTRL2_TIMEO_MASK 0xf -- cgit v1.2.3 From 8d8b1a422c4644891e1f8a3ea10b544b65cd0cc6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 May 2024 18:41:44 +0000 Subject: net: annotate data-races around dev->if_port Various ndo_set_config() methods can change dev->if_port dev->if_port is going to be read locklessly from rtnl_fill_link_ifmap(). Add corresponding WRITE_ONCE() on writer sides. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240507184144.1230469-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/3com/3c589_cs.c | 2 +- drivers/net/ethernet/8390/etherh.c | 2 +- drivers/net/ethernet/8390/pcnet_cs.c | 2 +- drivers/net/ethernet/amd/nmclan_cs.c | 2 +- drivers/net/ethernet/sis/sis900.c | 6 +++--- drivers/net/ethernet/smsc/smc91c92_cs.c | 2 +- drivers/net/ethernet/xircom/xirc2ps_cs.c | 4 ++-- 7 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c index 5267e9dcd87e..be58dac0502a 100644 --- a/drivers/net/ethernet/3com/3c589_cs.c +++ b/drivers/net/ethernet/3com/3c589_cs.c @@ -502,7 +502,7 @@ static int el3_config(struct net_device *dev, struct ifmap *map) { if ((map->port != (u_char)(-1)) && (map->port != dev->if_port)) { if (map->port <= 3) { - dev->if_port = map->port; + WRITE_ONCE(dev->if_port, map->port); netdev_info(dev, "switched to %s port\n", if_names[dev->if_port]); tc589_set_xcvr(dev, dev->if_port); } else { diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c index 05d39ecb97ff..e876fe52399a 100644 --- a/drivers/net/ethernet/8390/etherh.c +++ b/drivers/net/ethernet/8390/etherh.c @@ -258,7 +258,7 @@ static int etherh_set_config(struct net_device *dev, struct ifmap *map) * media type, turn off automedia detection. */ dev->flags &= ~IFF_AUTOMEDIA; - dev->if_port = map->port; + WRITE_ONCE(dev->if_port, map->port); break; default: diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index 9bd5e991f1e5..780fb4afb6af 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -994,7 +994,7 @@ static int set_config(struct net_device *dev, struct ifmap *map) return -EOPNOTSUPP; else if ((map->port < 1) || (map->port > 2)) return -EINVAL; - dev->if_port = map->port; + WRITE_ONCE(dev->if_port, map->port); netdev_info(dev, "switched to %s port\n", if_names[dev->if_port]); NS8390_init(dev, 1); } diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c index 0dd391c84c13..37054a670407 100644 --- a/drivers/net/ethernet/amd/nmclan_cs.c +++ b/drivers/net/ethernet/amd/nmclan_cs.c @@ -760,7 +760,7 @@ static int mace_config(struct net_device *dev, struct ifmap *map) { if ((map->port != (u_char)(-1)) && (map->port != dev->if_port)) { if (map->port <= 2) { - dev->if_port = map->port; + WRITE_ONCE(dev->if_port, map->port); netdev_info(dev, "switched to %s port\n", if_names[dev->if_port]); } else return -EINVAL; diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index cb7fec226cab..85b850372efe 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -2273,7 +2273,7 @@ static int sis900_set_config(struct net_device *dev, struct ifmap *map) * (which seems to be different from the ifport(pcmcia) definition) */ switch(map->port){ case IF_PORT_UNKNOWN: /* use auto here */ - dev->if_port = map->port; + WRITE_ONCE(dev->if_port, map->port); /* we are going to change the media type, so the Link * will be temporary down and we need to reflect that * here. When the Link comes up again, it will be @@ -2294,7 +2294,7 @@ static int sis900_set_config(struct net_device *dev, struct ifmap *map) break; case IF_PORT_10BASET: /* 10BaseT */ - dev->if_port = map->port; + WRITE_ONCE(dev->if_port, map->port); /* we are going to change the media type, so the Link * will be temporary down and we need to reflect that @@ -2315,7 +2315,7 @@ static int sis900_set_config(struct net_device *dev, struct ifmap *map) case IF_PORT_100BASET: /* 100BaseT */ case IF_PORT_100BASETX: /* 100BaseTx */ - dev->if_port = map->port; + WRITE_ONCE(dev->if_port, map->port); /* we are going to change the media type, so the Link * will be temporary down and we need to reflect that diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index 29bb19f42de9..86e3ec25df07 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -1595,7 +1595,7 @@ static int s9k_config(struct net_device *dev, struct ifmap *map) return -EOPNOTSUPP; else if (map->port > 2) return -EINVAL; - dev->if_port = map->port; + WRITE_ONCE(dev->if_port, map->port); netdev_info(dev, "switched to %s port\n", if_names[dev->if_port]); smc_reset(dev); } diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index e9bc38fd2025..a31d5d5e6593 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -1366,10 +1366,10 @@ do_config(struct net_device *dev, struct ifmap *map) return -EINVAL; if (!map->port) { local->probe_port = 1; - dev->if_port = 1; + WRITE_ONCE(dev->if_port, 1); } else { local->probe_port = 0; - dev->if_port = map->port; + WRITE_ONCE(dev->if_port, map->port); } netdev_info(dev, "switching to %s port\n", if_names[dev->if_port]); do_reset(dev,1); /* not the fine way :-) */ -- cgit v1.2.3 From 38155539a16ebb79dbb7a2e058138d70be68d245 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 9 May 2024 15:18:33 +0000 Subject: bnxt_en: silence clang build warning Clang build brings a warning: ../drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c:133:12: warning: comparison of distinct pointer types ('typeof (tmo_us) *' (aka 'unsigned int *') and 'typeof (65535) *' (aka 'int *')) [-Wcompare-distinct-pointer-types] 133 | tmo_us = min(tmo_us, BNXT_PTP_QTS_MAX_TMO_US); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix it by specifying proper type for BNXT_PTP_QTS_MAX_TMO_US. Fixes: 7de3c2218eed ("bnxt_en: Add a timeout parameter to bnxt_hwrm_port_ts_query()") Signed-off-by: Vadim Fedorenko Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20240509151833.12579-1-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 6a2bba3f9e2d..2c3415c8fc03 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -24,7 +24,7 @@ #define BNXT_PTP_DFLT_TX_TMO 1000 /* ms */ #define BNXT_PTP_QTS_TIMEOUT 1000 -#define BNXT_PTP_QTS_MAX_TMO_US 65535 +#define BNXT_PTP_QTS_MAX_TMO_US 65535U #define BNXT_PTP_QTS_TX_ENABLES (PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID | \ PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT | \ PORT_TS_QUERY_REQ_ENABLES_PTP_HDR_OFFSET) -- cgit v1.2.3 From ebb8308eac84787c891483af50091a295cdd54ea Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 8 May 2024 09:32:19 +0100 Subject: gve: Avoid unnecessary use of comma operator Although it does not seem to have any untoward side-effects, the use of ';' to separate to assignments seems more appropriate than ','. Flagged by clang-18 -Wcomma No functional change intended. Compile tested only. Reviewed-by: Shailend Chand Reviewed-by: Larysa Zaremba Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20240508-gve-comma-v2-1-1ac919225f13@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_adminq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 3df8243680d9..8ca0def176ef 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -650,9 +650,9 @@ static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv, GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id; cmd->create_rx_queue.rx_desc_ring_addr = - cpu_to_be64(rx->desc.bus), + cpu_to_be64(rx->desc.bus); cmd->create_rx_queue.rx_data_ring_addr = - cpu_to_be64(rx->data.data_bus), + cpu_to_be64(rx->data.data_bus); cmd->create_rx_queue.index = cpu_to_be32(queue_index); cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); cmd->create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size); -- cgit v1.2.3 From ba8bcb012b7d6efbac30038d82bd67b70e4597d7 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 8 May 2024 09:32:20 +0100 Subject: gve: Use ethtool_sprintf/puts() to fill stats strings Make use of standard helpers to simplify filling in stats strings. The first two ethtool_puts() changes address the following fortification warnings flagged by W=1 builds with clang-18. (The last ethtool_puts change does not because the warning relates to writing beyond the first element of an array, and gve_gstrings_priv_flags only has one element.) .../fortify-string.h:562:4: warning: call to '__read_overflow2_field' declared with 'warning' attribute: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 562 | __read_overflow2_field(q_size_field, size); | ^ .../fortify-string.h:562:4: warning: call to '__read_overflow2_field' declared with 'warning' attribute: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning] Likewise, the same changes resolve the same problems flagged by Smatch. .../gve_ethtool.c:100 gve_get_strings() error: __builtin_memcpy() '*gve_gstrings_main_stats' too small (32 vs 576) .../gve_ethtool.c:120 gve_get_strings() error: __builtin_memcpy() '*gve_gstrings_adminq_stats' too small (32 vs 512) Compile tested only. Reviewed-by: Shailend Chand Reviewed-by: Larysa Zaremba Signed-off-by: Simon Horman Acked-by: Justin Stitt Link: https://lore.kernel.org/r/20240508-gve-comma-v2-2-1ac919225f13@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ethtool.c | 42 +++++++++++---------------- 1 file changed, 17 insertions(+), 25 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 156b7e128b53..fe1741d482b4 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -90,42 +90,34 @@ static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = { static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct gve_priv *priv = netdev_priv(netdev); - char *s = (char *)data; + u8 *s = (char *)data; int num_tx_queues; int i, j; num_tx_queues = gve_num_tx_queues(priv); switch (stringset) { case ETH_SS_STATS: - memcpy(s, *gve_gstrings_main_stats, - sizeof(gve_gstrings_main_stats)); - s += sizeof(gve_gstrings_main_stats); - - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - for (j = 0; j < NUM_GVE_RX_CNTS; j++) { - snprintf(s, ETH_GSTRING_LEN, - gve_gstrings_rx_stats[j], i); - s += ETH_GSTRING_LEN; - } - } + for (i = 0; i < ARRAY_SIZE(gve_gstrings_main_stats); i++) + ethtool_puts(&s, gve_gstrings_main_stats[i]); - for (i = 0; i < num_tx_queues; i++) { - for (j = 0; j < NUM_GVE_TX_CNTS; j++) { - snprintf(s, ETH_GSTRING_LEN, - gve_gstrings_tx_stats[j], i); - s += ETH_GSTRING_LEN; - } - } + for (i = 0; i < priv->rx_cfg.num_queues; i++) + for (j = 0; j < NUM_GVE_RX_CNTS; j++) + ethtool_sprintf(&s, gve_gstrings_rx_stats[j], + i); + + for (i = 0; i < num_tx_queues; i++) + for (j = 0; j < NUM_GVE_TX_CNTS; j++) + ethtool_sprintf(&s, gve_gstrings_tx_stats[j], + i); + + for (i = 0; i < ARRAY_SIZE(gve_gstrings_adminq_stats); i++) + ethtool_puts(&s, gve_gstrings_adminq_stats[i]); - memcpy(s, *gve_gstrings_adminq_stats, - sizeof(gve_gstrings_adminq_stats)); - s += sizeof(gve_gstrings_adminq_stats); break; case ETH_SS_PRIV_FLAGS: - memcpy(s, *gve_gstrings_priv_flags, - sizeof(gve_gstrings_priv_flags)); - s += sizeof(gve_gstrings_priv_flags); + for (i = 0; i < ARRAY_SIZE(gve_gstrings_priv_flags); i++) + ethtool_puts(&s, gve_gstrings_priv_flags[i]); break; default: -- cgit v1.2.3 From 04fb71cc5f1866f391a9c21ea634217e065ae525 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Wed, 8 May 2024 12:39:35 +0530 Subject: octeontx2-pf: Reuse Transmit queue/Send queue index of HTB class Real number of Transmit queues are incremented when user enables HTB class and vice versa. Depending on SKB priority driver returns transmit queue (Txq). Transmit queues and Send queues are one-to-one mapped. In few scenarios, Driver is returning transmit queue value which is greater than real number of transmit queue and Stack detects this as error and overwrites transmit queue value. For example user has added two classes and real number of queues are incremented accordingly - tc class add dev eth1 parent 1: classid 1:1 htb rate 100Mbit ceil 100Mbit prio 1 quantum 1024 - tc class add dev eth1 parent 1: classid 1:2 htb rate 100Mbit ceil 200Mbit prio 7 quantum 1024 now if user deletes the class with id 1:1, driver decrements the real number of queues - tc class del dev eth1 classid 1:1 But for the class with id 1:2, driver is returning transmit queue value which is higher than real number of transmit queue leading to below error eth1 selects TX queue x, but real number of TX queues is x This patch solves the problem by assigning deleted class transmit queue/send queue to active class. Signed-off-by: Hariprasad Kelam Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508070935.11501-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/qos.c | 80 +++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c index 1723e9912ae0..070711df612e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c @@ -545,6 +545,20 @@ otx2_qos_sw_create_leaf_node(struct otx2_nic *pfvf, return node; } +static struct otx2_qos_node +*otx2_sw_node_find_by_qid(struct otx2_nic *pfvf, u16 qid) +{ + struct otx2_qos_node *node = NULL; + int bkt; + + hash_for_each(pfvf->qos.qos_hlist, bkt, node, hlist) { + if (node->qid == qid) + break; + } + + return node; +} + static struct otx2_qos_node * otx2_sw_node_find(struct otx2_nic *pfvf, u32 classid) { @@ -917,6 +931,7 @@ static void otx2_qos_enadis_sq(struct otx2_nic *pfvf, otx2_qos_disable_sq(pfvf, qid); pfvf->qos.qid_to_sqmap[qid] = node->schq; + otx2_qos_txschq_config(pfvf, node); otx2_qos_enable_sq(pfvf, qid); } @@ -1475,13 +1490,45 @@ out: return ret; } +static int otx2_qos_cur_leaf_nodes(struct otx2_nic *pfvf) +{ + int last = find_last_bit(pfvf->qos.qos_sq_bmap, pfvf->hw.tc_tx_queues); + + return last == pfvf->hw.tc_tx_queues ? 0 : last + 1; +} + +static void otx2_reset_qdisc(struct net_device *dev, u16 qid) +{ + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid); + struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); + + if (!qdisc) + return; + + spin_lock_bh(qdisc_lock(qdisc)); + qdisc_reset(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); +} + +static void otx2_cfg_smq(struct otx2_nic *pfvf, struct otx2_qos_node *node, + int qid) +{ + struct otx2_qos_node *tmp; + + list_for_each_entry(tmp, &node->child_schq_list, list) + if (tmp->level == NIX_TXSCH_LVL_MDQ) { + otx2_qos_txschq_config(pfvf, tmp); + pfvf->qos.qid_to_sqmap[qid] = tmp->schq; + } +} + static int otx2_qos_leaf_del(struct otx2_nic *pfvf, u16 *classid, struct netlink_ext_ack *extack) { struct otx2_qos_node *node, *parent; int dwrr_del_node = false; + u16 qid, moved_qid; u64 prio; - u16 qid; netdev_dbg(pfvf->netdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid); @@ -1517,6 +1564,37 @@ static int otx2_qos_leaf_del(struct otx2_nic *pfvf, u16 *classid, if (!parent->child_static_cnt) parent->max_static_prio = 0; + moved_qid = otx2_qos_cur_leaf_nodes(pfvf); + + /* last node just deleted */ + if (moved_qid == 0 || moved_qid == qid) + return 0; + + moved_qid--; + + node = otx2_sw_node_find_by_qid(pfvf, moved_qid); + if (!node) + return 0; + + /* stop traffic to the old queue and disable + * SQ associated with it + */ + node->qid = OTX2_QOS_QID_INNER; + __clear_bit(moved_qid, pfvf->qos.qos_sq_bmap); + otx2_qos_disable_sq(pfvf, moved_qid); + + otx2_reset_qdisc(pfvf->netdev, pfvf->hw.tx_queues + moved_qid); + + /* enable SQ associated with qid and + * update the node + */ + otx2_cfg_smq(pfvf, node, qid); + + otx2_qos_enable_sq(pfvf, qid); + __set_bit(qid, pfvf->qos.qos_sq_bmap); + node->qid = qid; + + *classid = node->classid; return 0; } -- cgit v1.2.3 From 84c8b7ad5e748c0b93415b060c7071f8c524f4f5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 May 2024 14:45:19 +0300 Subject: net: ethernet: adi: adin1110: Replace linux/gpio.h by proper one linux/gpio.h is deprecated and subject to remove. The driver doesn't use it directly, replace it with what is really being used. Signed-off-by: Andy Shevchenko Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508114519.972082-1-andriy.shevchenko@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/adi/adin1110.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c index 8b4ef5121308..0713f1e2c7f3 100644 --- a/drivers/net/ethernet/adi/adin1110.c +++ b/drivers/net/ethernet/adi/adin1110.c @@ -11,10 +11,10 @@ #include #include #include +#include #include #include #include -#include #include #include #include -- cgit v1.2.3 From a7c9540e967b6f114c5d491b6a4d6f76a2ec8736 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:33:49 +0000 Subject: net: qede: use extack in qede_flow_parse_ports() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert qede_flow_parse_ports to use extack, and drop the edev argument. Convert DP_NOTICE call to use NL_SET_ERR_MSG_MOD instead. In calls to qede_flow_parse_ports(), use NULL as extack for now, until a subsequent patch makes extack available. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-2-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index ded48523c383..600b7dc7ad56 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1700,8 +1700,8 @@ static int qede_parse_actions(struct qede_dev *edev, } static int -qede_flow_parse_ports(struct qede_dev *edev, struct flow_rule *rule, - struct qede_arfs_tuple *t) +qede_flow_parse_ports(struct flow_rule *rule, struct qede_arfs_tuple *t, + struct netlink_ext_ack *extack) { if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { struct flow_match_ports match; @@ -1709,7 +1709,8 @@ qede_flow_parse_ports(struct qede_dev *edev, struct flow_rule *rule, flow_rule_match_ports(rule, &match); if ((match.key->src && match.mask->src != htons(U16_MAX)) || (match.key->dst && match.mask->dst != htons(U16_MAX))) { - DP_NOTICE(edev, "Do not support ports masks\n"); + NL_SET_ERR_MSG_MOD(extack, + "Do not support ports masks"); return -EINVAL; } @@ -1747,7 +1748,7 @@ qede_flow_parse_v6_common(struct qede_dev *edev, struct flow_rule *rule, memcpy(&t->dst_ipv6, &match.key->dst, sizeof(addr)); } - err = qede_flow_parse_ports(edev, rule, t); + err = qede_flow_parse_ports(rule, t, NULL); if (err) return err; @@ -1774,7 +1775,7 @@ qede_flow_parse_v4_common(struct qede_dev *edev, struct flow_rule *rule, t->dst_ipv4 = match.key->dst; } - err = qede_flow_parse_ports(edev, rule, t); + err = qede_flow_parse_ports(rule, t, NULL); if (err) return err; -- cgit v1.2.3 From 6f88f1257a40edc4892427a84a2d482aa0dadb0e Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:33:50 +0000 Subject: net: qede: use extack in qede_set_v6_tuple_to_profile() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert qede_set_v6_tuple_to_profile() to take extack, and drop the edev argument. Convert DP_INFO call to use NL_SET_ERR_MSG_MOD instead. In calls to qede_set_v6_tuple_to_profile(), use NULL as extack for now, until a subsequent patch makes extack available. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-3-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 600b7dc7ad56..0f951d00b10e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1549,9 +1549,9 @@ static int qede_set_v4_tuple_to_profile(struct qede_dev *edev, return 0; } -static int qede_set_v6_tuple_to_profile(struct qede_dev *edev, - struct qede_arfs_tuple *t, - struct in6_addr *zaddr) +static int qede_set_v6_tuple_to_profile(struct qede_arfs_tuple *t, + struct in6_addr *zaddr, + struct netlink_ext_ack *extack) { /* We must have Only 4-tuples/l4 port/src ip/dst ip * as an input. @@ -1573,7 +1573,7 @@ static int qede_set_v6_tuple_to_profile(struct qede_dev *edev, !memcmp(&t->src_ipv6, zaddr, sizeof(struct in6_addr))) { t->mode = QED_FILTER_CONFIG_MODE_IP_DEST; } else { - DP_INFO(edev, "Invalid N-tuple\n"); + NL_SET_ERR_MSG_MOD(extack, "Invalid N-tuple"); return -EOPNOTSUPP; } @@ -1752,7 +1752,7 @@ qede_flow_parse_v6_common(struct qede_dev *edev, struct flow_rule *rule, if (err) return err; - return qede_set_v6_tuple_to_profile(edev, t, &zero_addr); + return qede_set_v6_tuple_to_profile(t, &zero_addr, NULL); } static int -- cgit v1.2.3 From f63a9dc507f9f31f1325cb42b21bea64fda5a525 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:33:51 +0000 Subject: net: qede: use extack in qede_set_v4_tuple_to_profile() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert qede_set_v4_tuple_to_profile() to take extack, and drop the edev argument. Convert DP_INFO call to use NL_SET_ERR_MSG_MOD instead. In calls to qede_set_v4_tuple_to_profile(), use NULL as extack for now, until a subsequent patch makes extack available. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-4-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 0f951d00b10e..6f4c4a5d6c77 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1520,8 +1520,8 @@ static int qede_flow_spec_validate_unused(struct qede_dev *edev, return 0; } -static int qede_set_v4_tuple_to_profile(struct qede_dev *edev, - struct qede_arfs_tuple *t) +static int qede_set_v4_tuple_to_profile(struct qede_arfs_tuple *t, + struct netlink_ext_ack *extack) { /* We must have Only 4-tuples/l4 port/src ip/dst ip * as an input. @@ -1538,7 +1538,7 @@ static int qede_set_v4_tuple_to_profile(struct qede_dev *edev, t->dst_ipv4 && !t->src_ipv4) { t->mode = QED_FILTER_CONFIG_MODE_IP_DEST; } else { - DP_INFO(edev, "Invalid N-tuple\n"); + NL_SET_ERR_MSG_MOD(extack, "Invalid N-tuple"); return -EOPNOTSUPP; } @@ -1779,7 +1779,7 @@ qede_flow_parse_v4_common(struct qede_dev *edev, struct flow_rule *rule, if (err) return err; - return qede_set_v4_tuple_to_profile(edev, t); + return qede_set_v4_tuple_to_profile(t, NULL); } static int -- cgit v1.2.3 From a62944d11ae1538f2676f1ae3bef425ca124be39 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:33:52 +0000 Subject: net: qede: use extack in qede_flow_parse_v6_common() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert qede_flow_parse_v6_common() to take extack, and drop the edev argument. Convert DP_NOTICE call to use NL_SET_ERR_MSG_MOD instead. Pass extack in calls to qede_flow_parse_ports() and qede_set_v6_tuple_to_profile(). In calls to qede_flow_parse_v6_common(), use NULL as extack for now, until a subsequent patch makes extack available. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-5-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 6f4c4a5d6c77..f5f8bbe8a64c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1722,8 +1722,9 @@ qede_flow_parse_ports(struct flow_rule *rule, struct qede_arfs_tuple *t, } static int -qede_flow_parse_v6_common(struct qede_dev *edev, struct flow_rule *rule, - struct qede_arfs_tuple *t) +qede_flow_parse_v6_common(struct flow_rule *rule, + struct qede_arfs_tuple *t, + struct netlink_ext_ack *extack) { struct in6_addr zero_addr, addr; int err; @@ -1739,8 +1740,8 @@ qede_flow_parse_v6_common(struct qede_dev *edev, struct flow_rule *rule, memcmp(&match.mask->src, &addr, sizeof(addr))) || (memcmp(&match.key->dst, &zero_addr, sizeof(addr)) && memcmp(&match.mask->dst, &addr, sizeof(addr)))) { - DP_NOTICE(edev, - "Do not support IPv6 address prefix/mask\n"); + NL_SET_ERR_MSG_MOD(extack, + "Do not support IPv6 address prefix/mask"); return -EINVAL; } @@ -1748,11 +1749,11 @@ qede_flow_parse_v6_common(struct qede_dev *edev, struct flow_rule *rule, memcpy(&t->dst_ipv6, &match.key->dst, sizeof(addr)); } - err = qede_flow_parse_ports(rule, t, NULL); + err = qede_flow_parse_ports(rule, t, extack); if (err) return err; - return qede_set_v6_tuple_to_profile(t, &zero_addr, NULL); + return qede_set_v6_tuple_to_profile(t, &zero_addr, extack); } static int @@ -1789,7 +1790,7 @@ qede_flow_parse_tcp_v6(struct qede_dev *edev, struct flow_rule *rule, tuple->ip_proto = IPPROTO_TCP; tuple->eth_proto = htons(ETH_P_IPV6); - return qede_flow_parse_v6_common(edev, rule, tuple); + return qede_flow_parse_v6_common(rule, tuple, NULL); } static int @@ -1809,7 +1810,7 @@ qede_flow_parse_udp_v6(struct qede_dev *edev, struct flow_rule *rule, tuple->ip_proto = IPPROTO_UDP; tuple->eth_proto = htons(ETH_P_IPV6); - return qede_flow_parse_v6_common(edev, rule, tuple); + return qede_flow_parse_v6_common(rule, tuple, NULL); } static int -- cgit v1.2.3 From f2f993835b26e9f6f8e8e2a3e0a3dcab1db2e73c Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:33:53 +0000 Subject: net: qede: use extack in qede_flow_parse_v4_common() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert qede_flow_parse_v4_common() to take extack, and drop the edev argument. Convert DP_NOTICE call to use NL_SET_ERR_MSG_MOD instead. Pass extack in calls to qede_flow_parse_ports() and qede_set_v4_tuple_to_profile(). In calls to qede_flow_parse_v4_common(), use NULL as extack for now, until a subsequent patch makes extack available. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-6-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index f5f8bbe8a64c..28e4d54dbca1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1757,8 +1757,9 @@ qede_flow_parse_v6_common(struct flow_rule *rule, } static int -qede_flow_parse_v4_common(struct qede_dev *edev, struct flow_rule *rule, - struct qede_arfs_tuple *t) +qede_flow_parse_v4_common(struct flow_rule *rule, + struct qede_arfs_tuple *t, + struct netlink_ext_ack *extack) { int err; @@ -1768,7 +1769,8 @@ qede_flow_parse_v4_common(struct qede_dev *edev, struct flow_rule *rule, flow_rule_match_ipv4_addrs(rule, &match); if ((match.key->src && match.mask->src != htonl(U32_MAX)) || (match.key->dst && match.mask->dst != htonl(U32_MAX))) { - DP_NOTICE(edev, "Do not support ipv4 prefix/masks\n"); + NL_SET_ERR_MSG_MOD(extack, + "Do not support ipv4 prefix/masks"); return -EINVAL; } @@ -1776,11 +1778,11 @@ qede_flow_parse_v4_common(struct qede_dev *edev, struct flow_rule *rule, t->dst_ipv4 = match.key->dst; } - err = qede_flow_parse_ports(rule, t, NULL); + err = qede_flow_parse_ports(rule, t, extack); if (err) return err; - return qede_set_v4_tuple_to_profile(t, NULL); + return qede_set_v4_tuple_to_profile(t, extack); } static int @@ -1800,7 +1802,7 @@ qede_flow_parse_tcp_v4(struct qede_dev *edev, struct flow_rule *rule, tuple->ip_proto = IPPROTO_TCP; tuple->eth_proto = htons(ETH_P_IP); - return qede_flow_parse_v4_common(edev, rule, tuple); + return qede_flow_parse_v4_common(rule, tuple, NULL); } static int @@ -1820,7 +1822,7 @@ qede_flow_parse_udp_v4(struct qede_dev *edev, struct flow_rule *rule, tuple->ip_proto = IPPROTO_UDP; tuple->eth_proto = htons(ETH_P_IP); - return qede_flow_parse_v4_common(edev, rule, tuple); + return qede_flow_parse_v4_common(rule, tuple, NULL); } static int -- cgit v1.2.3 From b1a18d5781d4df51ffb2988f91db9faf471e7a1c Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:33:54 +0000 Subject: net: qede: use extack in qede_flow_parse_tcp_v6() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert qede_flow_parse_tcp_v6() to take extack, and drop the edev argument. Pass extack in call to qede_flow_parse_v6_common(). In call to qede_flow_parse_tcp_v6(), use NULL as extack for now, until a subsequent patch makes extack available. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-7-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 28e4d54dbca1..ddf1d6b0fc83 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1786,13 +1786,13 @@ qede_flow_parse_v4_common(struct flow_rule *rule, } static int -qede_flow_parse_tcp_v6(struct qede_dev *edev, struct flow_rule *rule, - struct qede_arfs_tuple *tuple) +qede_flow_parse_tcp_v6(struct flow_rule *rule, struct qede_arfs_tuple *tuple, + struct netlink_ext_ack *extack) { tuple->ip_proto = IPPROTO_TCP; tuple->eth_proto = htons(ETH_P_IPV6); - return qede_flow_parse_v6_common(rule, tuple, NULL); + return qede_flow_parse_v6_common(rule, tuple, extack); } static int @@ -1862,7 +1862,7 @@ qede_parse_flow_attr(struct qede_dev *edev, __be16 proto, if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IP)) rc = qede_flow_parse_tcp_v4(edev, rule, tuple); else if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IPV6)) - rc = qede_flow_parse_tcp_v6(edev, rule, tuple); + rc = qede_flow_parse_tcp_v6(rule, tuple, NULL); else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IP)) rc = qede_flow_parse_udp_v4(edev, rule, tuple); else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IPV6)) -- cgit v1.2.3 From f84d52776ccf4d72a1936940019937303ef5dfcd Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:33:55 +0000 Subject: net: qede: use extack in qede_flow_parse_tcp_v4() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert qede_flow_parse_tcp_v4() to take extack, and drop the edev argument. Pass extack in call to qede_flow_parse_v4_common(). In call to qede_flow_parse_tcp_v4(), use NULL as extack for now, until a subsequent patch makes extack available. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-8-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index ddf1d6b0fc83..9fd08ee252ae 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1796,13 +1796,13 @@ qede_flow_parse_tcp_v6(struct flow_rule *rule, struct qede_arfs_tuple *tuple, } static int -qede_flow_parse_tcp_v4(struct qede_dev *edev, struct flow_rule *rule, - struct qede_arfs_tuple *tuple) +qede_flow_parse_tcp_v4(struct flow_rule *rule, struct qede_arfs_tuple *tuple, + struct netlink_ext_ack *extack) { tuple->ip_proto = IPPROTO_TCP; tuple->eth_proto = htons(ETH_P_IP); - return qede_flow_parse_v4_common(rule, tuple, NULL); + return qede_flow_parse_v4_common(rule, tuple, extack); } static int @@ -1860,7 +1860,7 @@ qede_parse_flow_attr(struct qede_dev *edev, __be16 proto, } if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IP)) - rc = qede_flow_parse_tcp_v4(edev, rule, tuple); + rc = qede_flow_parse_tcp_v4(rule, tuple, NULL); else if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IPV6)) rc = qede_flow_parse_tcp_v6(rule, tuple, NULL); else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IP)) -- cgit v1.2.3 From b73ad5c7a72ebc769cef909058b29f993461dcd2 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:33:56 +0000 Subject: net: qede: use extack in qede_flow_parse_udp_v6() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert qede_flow_parse_udp_v6() to take extack, and drop the edev argument. Pass extack in call to qede_flow_parse_v6_common(). In call to qede_flow_parse_udp_v6(), use NULL as extack for now, until a subsequent patch makes extack available. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-9-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 9fd08ee252ae..abeb873f58f3 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1806,13 +1806,13 @@ qede_flow_parse_tcp_v4(struct flow_rule *rule, struct qede_arfs_tuple *tuple, } static int -qede_flow_parse_udp_v6(struct qede_dev *edev, struct flow_rule *rule, - struct qede_arfs_tuple *tuple) +qede_flow_parse_udp_v6(struct flow_rule *rule, struct qede_arfs_tuple *tuple, + struct netlink_ext_ack *extack) { tuple->ip_proto = IPPROTO_UDP; tuple->eth_proto = htons(ETH_P_IPV6); - return qede_flow_parse_v6_common(rule, tuple, NULL); + return qede_flow_parse_v6_common(rule, tuple, extack); } static int @@ -1866,7 +1866,7 @@ qede_parse_flow_attr(struct qede_dev *edev, __be16 proto, else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IP)) rc = qede_flow_parse_udp_v4(edev, rule, tuple); else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IPV6)) - rc = qede_flow_parse_udp_v6(edev, rule, tuple); + rc = qede_flow_parse_udp_v6(rule, tuple, NULL); else DP_NOTICE(edev, "Invalid protocol request\n"); -- cgit v1.2.3 From 9c8f5ed8849cec7ea9973cd25a53218e0d20a8a9 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:33:57 +0000 Subject: net: qede: use extack in qede_flow_parse_udp_v4() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert qede_flow_parse_udp_v4() to take extack, and drop the edev argument. Pass extack in call to qede_flow_parse_v4_common(). In call to qede_flow_parse_udp_v4(), use NULL as extack for now, until a subsequent patch makes extack available. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-10-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index abeb873f58f3..69dbd615b653 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1816,13 +1816,13 @@ qede_flow_parse_udp_v6(struct flow_rule *rule, struct qede_arfs_tuple *tuple, } static int -qede_flow_parse_udp_v4(struct qede_dev *edev, struct flow_rule *rule, - struct qede_arfs_tuple *tuple) +qede_flow_parse_udp_v4(struct flow_rule *rule, struct qede_arfs_tuple *tuple, + struct netlink_ext_ack *extack) { tuple->ip_proto = IPPROTO_UDP; tuple->eth_proto = htons(ETH_P_IP); - return qede_flow_parse_v4_common(rule, tuple, NULL); + return qede_flow_parse_v4_common(rule, tuple, extack); } static int @@ -1864,7 +1864,7 @@ qede_parse_flow_attr(struct qede_dev *edev, __be16 proto, else if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IPV6)) rc = qede_flow_parse_tcp_v6(rule, tuple, NULL); else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IP)) - rc = qede_flow_parse_udp_v4(edev, rule, tuple); + rc = qede_flow_parse_udp_v4(rule, tuple, NULL); else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IPV6)) rc = qede_flow_parse_udp_v6(rule, tuple, NULL); else -- cgit v1.2.3 From f833a6555e9eca9f613be115e88167aebc251d74 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:33:58 +0000 Subject: net: qede: add extack in qede_add_tc_flower_fltr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define extack locally, to reduce line lengths and aid future users. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-11-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 69dbd615b653..09ffcb86924b 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1876,6 +1876,7 @@ qede_parse_flow_attr(struct qede_dev *edev, __be16 proto, int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, struct flow_cls_offload *f) { + struct netlink_ext_ack *extack = f->common.extack; struct qede_arfs_fltr_node *n; struct qede_arfs_tuple t; int min_hlen, rc; @@ -1903,7 +1904,7 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse tc actions and get the vf_id */ - rc = qede_parse_actions(edev, &f->rule->action, f->common.extack); + rc = qede_parse_actions(edev, &f->rule->action, extack); if (rc) goto unlock; -- cgit v1.2.3 From d6883bceb2541209b348f93eb38ed7f134693fc0 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:33:59 +0000 Subject: net: qede: use extack in qede_parse_flow_attr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert qede_parse_flow_attr() to take extack, and drop the edev argument. Convert DP_NOTICE calls to use NL_SET_ERR_MSG_* instead. Pass extack in calls to qede_flow_parse_{tcp,udp}_v{4,6}(). In calls to qede_parse_flow_attr(), if extack is unavailable, then use NULL for now, until a subsequent patch makes extack available. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-12-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 09ffcb86924b..8c1c15b73125 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1826,8 +1826,9 @@ qede_flow_parse_udp_v4(struct flow_rule *rule, struct qede_arfs_tuple *tuple, } static int -qede_parse_flow_attr(struct qede_dev *edev, __be16 proto, - struct flow_rule *rule, struct qede_arfs_tuple *tuple) +qede_parse_flow_attr(__be16 proto, struct flow_rule *rule, + struct qede_arfs_tuple *tuple, + struct netlink_ext_ack *extack) { struct flow_dissector *dissector = rule->match.dissector; int rc = -EINVAL; @@ -1841,14 +1842,15 @@ qede_parse_flow_attr(struct qede_dev *edev, __be16 proto, BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT_ULL(FLOW_DISSECTOR_KEY_PORTS))) { - DP_NOTICE(edev, "Unsupported key set:0x%llx\n", - dissector->used_keys); + NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported key used: 0x%llx", + dissector->used_keys); return -EOPNOTSUPP; } if (proto != htons(ETH_P_IP) && proto != htons(ETH_P_IPV6)) { - DP_NOTICE(edev, "Unsupported proto=0x%x\n", proto); + NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported proto=0x%x", + proto); return -EPROTONOSUPPORT; } @@ -1860,15 +1862,15 @@ qede_parse_flow_attr(struct qede_dev *edev, __be16 proto, } if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IP)) - rc = qede_flow_parse_tcp_v4(rule, tuple, NULL); + rc = qede_flow_parse_tcp_v4(rule, tuple, extack); else if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IPV6)) - rc = qede_flow_parse_tcp_v6(rule, tuple, NULL); + rc = qede_flow_parse_tcp_v6(rule, tuple, extack); else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IP)) - rc = qede_flow_parse_udp_v4(rule, tuple, NULL); + rc = qede_flow_parse_udp_v4(rule, tuple, extack); else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IPV6)) - rc = qede_flow_parse_udp_v6(rule, tuple, NULL); + rc = qede_flow_parse_udp_v6(rule, tuple, extack); else - DP_NOTICE(edev, "Invalid protocol request\n"); + NL_SET_ERR_MSG_MOD(extack, "Invalid protocol request"); return rc; } @@ -1889,7 +1891,7 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse flower attribute and prepare filter */ - rc = qede_parse_flow_attr(edev, proto, f->rule, &t); + rc = qede_parse_flow_attr(proto, f->rule, &t, extack); if (rc) goto unlock; @@ -2015,7 +2017,7 @@ static int qede_flow_spec_to_rule(struct qede_dev *edev, if (IS_ERR(flow)) return PTR_ERR(flow); - err = qede_parse_flow_attr(edev, proto, flow->rule, t); + err = qede_parse_flow_attr(proto, flow->rule, t, NULL); if (err) goto err_out; -- cgit v1.2.3 From eb705d7345255316f3a25ba60c4c7ee215c92c1a Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:34:00 +0000 Subject: net: qede: use faked extack in qede_flow_spec_to_rule() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since qede_parse_flow_attr() now does error reporting through extack, then give it a fake extack and extract the error message afterwards if one was set. The extracted error message is then passed on through DP_NOTICE(), including messages that was earlier issued with DP_INFO(). This fake extack approach is already used by mlxsw_env_linecard_modules_power_mode_apply() in drivers/net/ethernet/mellanox/mlxsw/core_env.c Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-13-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 8c1c15b73125..b83432744a03 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1990,6 +1990,7 @@ static int qede_flow_spec_to_rule(struct qede_dev *edev, { struct ethtool_rx_flow_spec_input input = {}; struct ethtool_rx_flow_rule *flow; + struct netlink_ext_ack extack; __be16 proto; int err; @@ -2017,7 +2018,7 @@ static int qede_flow_spec_to_rule(struct qede_dev *edev, if (IS_ERR(flow)) return PTR_ERR(flow); - err = qede_parse_flow_attr(proto, flow->rule, t, NULL); + err = qede_parse_flow_attr(proto, flow->rule, t, &extack); if (err) goto err_out; @@ -2025,6 +2026,8 @@ static int qede_flow_spec_to_rule(struct qede_dev *edev, err = qede_flow_spec_validate(edev, &flow->rule->action, t, fs->location); err_out: + if (extack._msg) + DP_NOTICE(edev, "%s\n", extack._msg); ethtool_rx_flow_rule_destroy(flow); return err; -- cgit v1.2.3 From d2a437efd017d4d515917a8c8c5439f412b0043c Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:34:01 +0000 Subject: net: qede: propagate extack through qede_flow_spec_validate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass extack to qede_flow_spec_validate() when called in qede_flow_spec_to_rule(). Pass extack to qede_parse_actions(). Not converting qede_flow_spec_validate() to use extack for errors, as it's only called from qede_flow_spec_to_rule(), where extack is faked into a DP_NOTICE anyway, so opting to keep DP_VERBOSE/DP_NOTICE usage. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-14-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index b83432744a03..e616855d8891 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1953,7 +1953,8 @@ unlock: static int qede_flow_spec_validate(struct qede_dev *edev, struct flow_action *flow_action, struct qede_arfs_tuple *t, - __u32 location) + __u32 location, + struct netlink_ext_ack *extack) { int err; @@ -1977,7 +1978,7 @@ static int qede_flow_spec_validate(struct qede_dev *edev, return -EINVAL; } - err = qede_parse_actions(edev, flow_action, NULL); + err = qede_parse_actions(edev, flow_action, extack); if (err) return err; @@ -2024,7 +2025,7 @@ static int qede_flow_spec_to_rule(struct qede_dev *edev, /* Make sure location is valid and filter isn't already set */ err = qede_flow_spec_validate(edev, &flow->rule->action, t, - fs->location); + fs->location, &extack); err_out: if (extack._msg) DP_NOTICE(edev, "%s\n", extack._msg); -- cgit v1.2.3 From 841548793bd6a4f394034137e65d9c221492ff5e Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Wed, 8 May 2024 14:34:02 +0000 Subject: net: qede: use extack in qede_parse_actions() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert DP_NOTICE/DP_INFO to NL_SET_ERR_MSG_MOD. Keep edev around for use with QEDE_RSS_COUNT(). Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240508143404.95901-15-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index e616855d8891..9c72febc6a42 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1671,7 +1671,7 @@ static int qede_parse_actions(struct qede_dev *edev, int i; if (!flow_action_has_entries(flow_action)) { - DP_NOTICE(edev, "No actions received\n"); + NL_SET_ERR_MSG_MOD(extack, "No actions received"); return -EINVAL; } @@ -1687,7 +1687,8 @@ static int qede_parse_actions(struct qede_dev *edev, break; if (act->queue.index >= QEDE_RSS_COUNT(edev)) { - DP_INFO(edev, "Queue out-of-bounds\n"); + NL_SET_ERR_MSG_MOD(extack, + "Queue out-of-bounds"); return -EINVAL; } break; -- cgit v1.2.3 From 62a261f6c1dda0d0b26918cf31da8053c846a0a5 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Sun, 12 May 2024 13:46:33 +0000 Subject: net: ena: Add a counter for driver's reset failures This patch adds a counter to the ena_adapter struct in order to keep track of reset failures. The counter is incremented every time either ena_restore_device() or ena_destroy_device() fail. Signed-off-by: Osama Abboud Signed-off-by: David Arinzon Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240512134637.25299-2-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 + drivers/net/ethernet/amazon/ena/ena_netdev.c | 18 ++++++++++++------ drivers/net/ethernet/amazon/ena/ena_netdev.h | 1 + 3 files changed, 14 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 0cb6cc1cef56..28583db848e2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -49,6 +49,7 @@ static const struct ena_stats ena_stats_global_strings[] = { ENA_STAT_GLOBAL_ENTRY(interface_up), ENA_STAT_GLOBAL_ENTRY(interface_down), ENA_STAT_GLOBAL_ENTRY(admin_q_pause), + ENA_STAT_GLOBAL_ENTRY(reset_fail), }; static const struct ena_stats ena_stats_eni_strings[] = { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 28eaedaf713d..6a9d1b6d91c9 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -42,7 +42,7 @@ MODULE_DEVICE_TABLE(pci, ena_pci_tbl); static int ena_rss_init_default(struct ena_adapter *adapter); static void check_for_admin_com_state(struct ena_adapter *adapter); -static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); +static int ena_destroy_device(struct ena_adapter *adapter, bool graceful); static int ena_restore_device(struct ena_adapter *adapter); static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) @@ -3235,14 +3235,15 @@ err_disable_msix: return rc; } -static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) +static int ena_destroy_device(struct ena_adapter *adapter, bool graceful) { struct net_device *netdev = adapter->netdev; struct ena_com_dev *ena_dev = adapter->ena_dev; bool dev_up; + int rc = 0; if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) - return; + return 0; netif_carrier_off(netdev); @@ -3260,7 +3261,7 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) * and device is up, ena_down() already reset the device. */ if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) - ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); + rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); ena_free_mgmnt_irq(adapter); @@ -3279,6 +3280,8 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); + + return rc; } static int ena_restore_device(struct ena_adapter *adapter) @@ -3355,14 +3358,17 @@ err: static void ena_fw_reset_device(struct work_struct *work) { + int rc = 0; + struct ena_adapter *adapter = container_of(work, struct ena_adapter, reset_task); rtnl_lock(); if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { - ena_destroy_device(adapter, false); - ena_restore_device(adapter); + rc |= ena_destroy_device(adapter, false); + rc |= ena_restore_device(adapter); + adapter->dev_stats.reset_fail += !!rc; dev_err(&adapter->pdev->dev, "Device reset completed successfully\n"); } diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 6d2cc20210cc..d59509747d1a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -290,6 +290,7 @@ struct ena_stats_dev { u64 admin_q_pause; u64 rx_drops; u64 tx_drops; + u64 reset_fail; }; enum ena_flags_t { -- cgit v1.2.3 From 48673ef444317f44c80da4fe98442dd56ed78cac Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Sun, 12 May 2024 13:46:34 +0000 Subject: net: ena: Reduce holes in ena_com structures This patch makes two changes in order to fill holes and reduce ther overall size of the structures ena_com_dev and ena_com_rx_ctx. Signed-off-by: Shahar Itzko Signed-off-by: David Arinzon Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240512134637.25299-3-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_com.h | 4 ++-- drivers/net/ethernet/amazon/ena/ena_eth_com.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index fea57eb8e58b..fdae0cc9ce9b 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -305,6 +305,8 @@ struct ena_com_dev { u16 stats_func; /* Selected function for extended statistic dump */ u16 stats_queue; /* Selected queue for extended statistic dump */ + u32 ena_min_poll_delay_us; + struct ena_com_mmio_read mmio_read; struct ena_rss rss; @@ -325,8 +327,6 @@ struct ena_com_dev { struct ena_intr_moder_entry *intr_moder_tbl; struct ena_com_llq_info llq_info; - - u32 ena_min_poll_delay_us; }; struct ena_com_dev_get_features_ctx { diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h index 72b019758caa..449bc4960ccc 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h @@ -47,7 +47,7 @@ struct ena_com_rx_ctx { bool frag; u32 hash; u16 descs; - int max_bufs; + u16 max_bufs; u8 pkt_offset; }; -- cgit v1.2.3 From b37b98a3a0c1198bafe8c2d9ce0bc845b4e7a9a7 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Sun, 12 May 2024 13:46:35 +0000 Subject: net: ena: Add validation for completion descriptors consistency Validate that `first` flag is set only for the first descriptor in multi-buffer packets. In case of an invalid descriptor, a reset will occur. A new reset reason for RX data corruption has been added. Signed-off-by: Shahar Itzko Signed-off-by: David Arinzon Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240512134637.25299-4-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_eth_com.c | 37 ++++++++++++++++++------- drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 ++ drivers/net/ethernet/amazon/ena/ena_regs_defs.h | 1 + 3 files changed, 30 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c index 933e619b3a31..4c6e07aa4bbb 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -229,30 +229,43 @@ static struct ena_eth_io_rx_cdesc_base * idx * io_cq->cdesc_entry_size_in_bytes); } -static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, - u16 *first_cdesc_idx) +static int ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, + u16 *first_cdesc_idx, + u16 *num_descs) { + u16 count = io_cq->cur_rx_pkt_cdesc_count, head_masked; struct ena_eth_io_rx_cdesc_base *cdesc; - u16 count = 0, head_masked; u32 last = 0; do { + u32 status; + cdesc = ena_com_get_next_rx_cdesc(io_cq); if (!cdesc) break; + status = READ_ONCE(cdesc->status); ena_com_cq_inc_head(io_cq); + if (unlikely((status & ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT && count != 0)) { + struct ena_com_dev *dev = ena_com_io_cq_to_ena_dev(io_cq); + + netdev_err(dev->net_device, + "First bit is on in descriptor #%d on q_id: %d, req_id: %u\n", + count, io_cq->qid, cdesc->req_id); + return -EFAULT; + } count++; - last = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >> - ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT; + last = (status & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT; } while (!last); if (last) { *first_cdesc_idx = io_cq->cur_rx_pkt_cdesc_start_idx; - count += io_cq->cur_rx_pkt_cdesc_count; head_masked = io_cq->head & (io_cq->q_depth - 1); + *num_descs = count; io_cq->cur_rx_pkt_cdesc_count = 0; io_cq->cur_rx_pkt_cdesc_start_idx = head_masked; @@ -260,11 +273,11 @@ static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, "ENA q_id: %d packets were completed. first desc idx %u descs# %d\n", io_cq->qid, *first_cdesc_idx, count); } else { - io_cq->cur_rx_pkt_cdesc_count += count; - count = 0; + io_cq->cur_rx_pkt_cdesc_count = count; + *num_descs = 0; } - return count; + return 0; } static int ena_com_create_meta(struct ena_com_io_sq *io_sq, @@ -539,10 +552,14 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, u16 cdesc_idx = 0; u16 nb_hw_desc; u16 i = 0; + int rc; WARN(io_cq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type"); - nb_hw_desc = ena_com_cdesc_rx_pkt_get(io_cq, &cdesc_idx); + rc = ena_com_cdesc_rx_pkt_get(io_cq, &cdesc_idx, &nb_hw_desc); + if (unlikely(rc != 0)) + return -EFAULT; + if (nb_hw_desc == 0) { ena_rx_ctx->descs = nb_hw_desc; return 0; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 6a9d1b6d91c9..7398bc615866 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1347,6 +1347,8 @@ error: if (rc == -ENOSPC) { ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1, &rx_ring->syncp); ena_reset_device(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS); + } else if (rc == -EFAULT) { + ena_reset_device(adapter, ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED); } else { ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp); diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h index 2c3d6a77ea79..a2efebafd686 100644 --- a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h +++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h @@ -22,6 +22,7 @@ enum ena_regs_reset_reason_types { ENA_REGS_RESET_GENERIC = 13, ENA_REGS_RESET_MISS_INTERRUPT = 14, ENA_REGS_RESET_SUSPECTED_POLL_STARVATION = 15, + ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED = 16, }; /* ena_registers offsets */ -- cgit v1.2.3 From 97776caf6c6e3a932d8e1410e6810cbfcb69d42d Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Sun, 12 May 2024 13:46:36 +0000 Subject: net: ena: Changes around strscpy calls strscpy copies as much of the string as possible, meaning that the destination string will be truncated in case of no space. As this is a non-critical error in our case, adding a debug level print for indication. This patch also removes a -1 which was added to ensure enough space for NUL, but strscpy destination string is guaranteed to be NUL-terminted, therefore, the -1 is not needed. Signed-off-by: David Arinzon Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240512134637.25299-5-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 16 ++++++++++++---- drivers/net/ethernet/amazon/ena/ena_netdev.c | 17 +++++++++++++---- 2 files changed, 25 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 28583db848e2..b24cc3f05248 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -460,10 +460,18 @@ static void ena_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct ena_adapter *adapter = netdev_priv(dev); - - strscpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver)); - strscpy(info->bus_info, pci_name(adapter->pdev), - sizeof(info->bus_info)); + ssize_t ret = 0; + + ret = strscpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver)); + if (ret < 0) + netif_dbg(adapter, drv, dev, + "module name will be truncated, status = %zd\n", ret); + + ret = strscpy(info->bus_info, pci_name(adapter->pdev), + sizeof(info->bus_info)); + if (ret < 0) + netif_dbg(adapter, drv, dev, + "bus info will be truncated, status = %zd\n", ret); } static void ena_get_ringparam(struct net_device *netdev, diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 7398bc615866..184b6e6cbed4 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2703,6 +2703,7 @@ static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pd { struct device *dev = &pdev->dev; struct ena_admin_host_info *host_info; + ssize_t ret; int rc; /* Allocate only the host info */ @@ -2717,11 +2718,19 @@ static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pd host_info->bdf = pci_dev_id(pdev); host_info->os_type = ENA_ADMIN_OS_LINUX; host_info->kernel_ver = LINUX_VERSION_CODE; - strscpy(host_info->kernel_ver_str, utsname()->version, - sizeof(host_info->kernel_ver_str) - 1); + ret = strscpy(host_info->kernel_ver_str, utsname()->version, + sizeof(host_info->kernel_ver_str)); + if (ret < 0) + dev_dbg(dev, + "kernel version string will be truncated, status = %zd\n", ret); + host_info->os_dist = 0; - strscpy(host_info->os_dist_str, utsname()->release, - sizeof(host_info->os_dist_str)); + ret = strscpy(host_info->os_dist_str, utsname()->release, + sizeof(host_info->os_dist_str)); + if (ret < 0) + dev_dbg(dev, + "OS distribution string will be truncated, status = %zd\n", ret); + host_info->driver_version = (DRV_MODULE_GEN_MAJOR) | (DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | -- cgit v1.2.3 From 1cc0a47daa7a2778ac8ab6e2699b605193602607 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Sun, 12 May 2024 13:46:37 +0000 Subject: net: ena: Change initial rx_usec interval For the purpose of obtaining better CPU utilization, minimum rx moderation interval is set to 20 usec. Signed-off-by: Osama Abboud Signed-off-by: David Arinzon Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240512134637.25299-6-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_com.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index fdae0cc9ce9b..924f03f5a6c7 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -47,7 +47,7 @@ /* ENA adaptive interrupt moderation settings */ #define ENA_INTR_INITIAL_TX_INTERVAL_USECS 64 -#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 0 +#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 20 #define ENA_DEFAULT_INTR_DELAY_RESOLUTION 1 #define ENA_HASH_KEY_SIZE 40 -- cgit v1.2.3 From e0e6adfe8c20f1b633017e4dafec1b06117da2df Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 12 May 2024 15:43:03 +0300 Subject: net/mlx5: Enable 8 ports LAG This patch adds to mlx5 drivers support for 8 ports HCAs. Starting with ConnectX-8 HCAs with 8 ports are possible. As most driver parts aren't affected by such configuration most driver code is unchanged. Specially the only affected areas are: - Lag - Multiport E-Switch - Single FDB E-Switch All of the above are already factored in generic way, and LAG and VF LAG are tested, so all that left is to change a #define and remove checks which are no longer needed. However, Multiport E-Switch is not tested yet, so it is left untouched. This patch will allow to create hardware LAG/VF LAG when all 8 ports are added to the same bond device. for example, In order to activate the hardware lag a user can execute the following: ip link add bond0 type bond ip link set bond0 type bond miimon 100 mode 2 ip link set eth2 master bond0 ip link set eth3 master bond0 ip link set eth4 master bond0 ip link set eth5 master bond0 ip link set eth6 master bond0 ip link set eth7 master bond0 ip link set eth8 master bond0 ip link set eth9 master bond0 Where eth2, eth3, eth4, eth5, eth6, eth7, eth8 and eth9 are the PFs of the same HCA. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240512124306.740898-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 3 --- include/linux/mlx5/driver.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 69d482f7c5a2..5e2171ff0a89 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -713,7 +713,6 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) return 0; } -#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 4 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { #ifdef CONFIG_MLX5_ESWITCH @@ -739,8 +738,6 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) return false; - if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports > MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) - return false; #else for (i = 0; i < ldev->ports; i++) if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bf9324a31ae9..8218588688b5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -85,7 +85,7 @@ enum mlx5_sqp_t { }; enum { - MLX5_MAX_PORTS = 4, + MLX5_MAX_PORTS = 8, }; enum { -- cgit v1.2.3 From bcee093751f87dff6ace6355da06e62fd04189a3 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Sun, 12 May 2024 15:43:04 +0300 Subject: net/mlx5e: Modifying channels number and updating TX queues It is not appropriate for the mlx5e_num_channels_changed function to be called solely for updating the TX queues, even if the channels number has not been changed. Move the code responsible for updating the TC and TX queues from mlx5e_num_channels_changed and produce a new function called mlx5e_update_tc_and_tx_queues. This new function should only be called when the channels number remains unchanged. Signed-off-by: Carolina Jubran Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240512124306.740898-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 95 ++++++++++------------ 3 files changed, 47 insertions(+), 51 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f8bd9dbf59cd..e85fb71bf0b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1102,6 +1102,7 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv, void *context, bool reset); int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv); int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); +int mlx5e_update_tc_and_tx_queues_ctx(struct mlx5e_priv *priv, void *context); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 1eb3a712930b..3320f12ba2db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -2292,7 +2292,7 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) */ err = mlx5e_safe_switch_params(priv, &new_params, - mlx5e_num_channels_changed_ctx, NULL, true); + mlx5e_update_tc_and_tx_queues_ctx, NULL, true); if (!err) priv->tx_ptp_opened = true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ffe8919494d5..0a3d1999ede5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3002,7 +3002,28 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) return err; } -static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) +static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, + struct mlx5e_params *params) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int num_comp_vectors, ix, irq; + + num_comp_vectors = mlx5_comp_vectors_max(mdev); + + for (ix = 0; ix < params->num_channels; ix++) { + cpumask_clear(priv->scratchpad.cpumask); + + for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { + int cpu = mlx5_comp_vector_get_cpu(mdev, irq); + + cpumask_set_cpu(cpu, priv->scratchpad.cpumask); + } + + netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix); + } +} + +static int mlx5e_update_tc_and_tx_queues(struct mlx5e_priv *priv) { struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq; struct net_device *netdev = priv->netdev; @@ -3026,22 +3047,10 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) err = mlx5e_update_tx_netdev_queues(priv); if (err) goto err_tcs; - err = netif_set_real_num_rx_queues(netdev, nch); - if (err) { - netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err); - goto err_txqs; - } + mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params); return 0; -err_txqs: - /* netif_set_real_num_rx_queues could fail only when nch increased. Only - * one of nch and ntc is changed in this function. That means, the call - * to netif_set_real_num_tx_queues below should not fail, because it - * decreases the number of TX queues. - */ - WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs)); - err_tcs: WARN_ON_ONCE(mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc, old_tc_to_txq)); @@ -3049,42 +3058,32 @@ err_out: return err; } -static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues); - -static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, - struct mlx5e_params *params) -{ - int ix; - - for (ix = 0; ix < params->num_channels; ix++) { - int num_comp_vectors, irq, vec_ix; - struct mlx5_core_dev *mdev; - - mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix); - num_comp_vectors = mlx5_comp_vectors_max(mdev); - cpumask_clear(priv->scratchpad.cpumask); - vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix); - - for (irq = vec_ix; irq < num_comp_vectors; irq += params->num_channels) { - int cpu = mlx5_comp_vector_get_cpu(mdev, irq); - - cpumask_set_cpu(cpu, priv->scratchpad.cpumask); - } - - netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix); - } -} +MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_tc_and_tx_queues); static int mlx5e_num_channels_changed(struct mlx5e_priv *priv) { u16 count = priv->channels.params.num_channels; + struct net_device *netdev = priv->netdev; + int old_num_rxqs; int err; - err = mlx5e_update_netdev_queues(priv); - if (err) + old_num_rxqs = netdev->real_num_rx_queues; + err = netif_set_real_num_rx_queues(netdev, count); + if (err) { + netdev_warn(netdev, "%s: netif_set_real_num_rx_queues failed, %d\n", + __func__, err); return err; - - mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params); + } + err = mlx5e_update_tc_and_tx_queues(priv); + if (err) { + /* mlx5e_update_tc_and_tx_queues can fail if channels or TCs number increases. + * Since channel number changed, it increased. That means, the call to + * netif_set_real_num_rx_queues below should not fail, because it + * decreases the number of RX queues. + */ + WARN_ON_ONCE(netif_set_real_num_rx_queues(netdev, old_num_rxqs)); + return err; + } /* This function may be called on attach, before priv->rx_res is created. */ if (priv->rx_res) { @@ -3617,7 +3616,7 @@ static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv, mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1); err = mlx5e_safe_switch_params(priv, &new_params, - mlx5e_num_channels_changed_ctx, NULL, true); + mlx5e_update_tc_and_tx_queues_ctx, NULL, true); if (!err && priv->mqprio_rl) { mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); @@ -3718,10 +3717,8 @@ static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, struct tc_mqprio_qopt_offload *mqprio) { - mlx5e_fp_preactivate preactivate; struct mlx5e_params new_params; struct mlx5e_mqprio_rl *rl; - bool nch_changed; int err; err = mlx5e_mqprio_channel_validate(priv, mqprio); @@ -3735,10 +3732,8 @@ static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, new_params = priv->channels.params; mlx5e_params_mqprio_channel_set(&new_params, mqprio, rl); - nch_changed = mlx5e_get_dcb_num_tc(&priv->channels.params) > 1; - preactivate = nch_changed ? mlx5e_num_channels_changed_ctx : - mlx5e_update_netdev_queues_ctx; - err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true); + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_update_tc_and_tx_queues_ctx, NULL, true); if (err) { if (rl) { mlx5e_mqprio_rl_cleanup(rl); -- cgit v1.2.3 From db5944e16cd80efcbfe0bf1d067fdcc2f710d246 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 12 May 2024 15:43:05 +0300 Subject: net/mlx5: Remove unused msix related exported APIs MSIX irq allocation and free APIs are no longer in use. Hence, remove the dead code. Signed-off-by: Parav Pandit Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Reviewed-by: Kalesh AP Link: https://lore.kernel.org/r/20240512124306.740898-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 52 ----------------------- include/linux/mlx5/driver.h | 7 --- 2 files changed, 59 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 6bac8ad70ba6..fb8787e30d3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -507,58 +507,6 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, return irq; } -/** - * mlx5_msix_alloc - allocate msix interrupt - * @dev: mlx5 device from which to request - * @handler: interrupt handler - * @affdesc: affinity descriptor - * @name: interrupt name - * - * Returns: struct msi_map with result encoded. - * Note: the caller must make sure to release the irq by calling - * mlx5_msix_free() if shutdown was initiated. - */ -struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev, - irqreturn_t (*handler)(int, void *), - const struct irq_affinity_desc *affdesc, - const char *name) -{ - struct msi_map map; - int err; - - if (!dev->pdev) { - map.virq = 0; - map.index = -EINVAL; - return map; - } - - map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc); - if (!map.virq) - return map; - - err = request_irq(map.virq, handler, 0, name, NULL); - if (err) { - mlx5_core_warn(dev, "err %d\n", err); - pci_msix_free_irq(dev->pdev, map); - map.virq = 0; - map.index = -ENOMEM; - } - return map; -} -EXPORT_SYMBOL(mlx5_msix_alloc); - -/** - * mlx5_msix_free - free a previously allocated msix interrupt - * @dev: mlx5 device associated with interrupt - * @map: map previously returned by mlx5_msix_alloc() - */ -void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map) -{ - free_irq(map.virq, NULL); - pci_msix_free_irq(dev->pdev, map); -} -EXPORT_SYMBOL(mlx5_msix_free); - /** * mlx5_irq_release_vector - release one IRQ back to the system. * @irq: the irq to release. diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8218588688b5..0aa15cac0308 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1374,11 +1374,4 @@ static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev) enum { MLX5_OCTWORD = 16, }; - -struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev, - irqreturn_t (*handler)(int, void *), - const struct irq_affinity_desc *affdesc, - const char *name); -void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map); - #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From 486ffc33c2dd9c611f14adc4b1a5c1dc1f02fd30 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 11 May 2024 07:37:03 +0000 Subject: net: qede: flower: validate control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver currently doesn't support any control flags. Use flow_rule_match_has_control_flags() to check for control flags, such as can be set through `tc flower ... ip_flags frag`. In case any control flags are masked, flow_rule_match_has_control_flags() sets a NL extended error message, and we return -EOPNOTSUPP. Only compile-tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240511073705.230507-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 9c72febc6a42..985026dd816f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1848,6 +1848,9 @@ qede_parse_flow_attr(__be16 proto, struct flow_rule *rule, return -EOPNOTSUPP; } + if (flow_rule_match_has_control_flags(rule, extack)) + return -EOPNOTSUPP; + if (proto != htons(ETH_P_IP) && proto != htons(ETH_P_IPV6)) { NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported proto=0x%x", -- cgit v1.2.3 From d5c50937d50f396ef97817e30361827ac5b166c2 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 13 May 2024 09:25:13 +0200 Subject: net: stmmac: Add dedicated XPCS cleanup method Currently the XPCS handler destruction is performed in the stmmac_mdio_unregister() method. It doesn't look good because the handler isn't originally created in the corresponding protagonist stmmac_mdio_unregister(), but in the stmmac_xpcs_setup() function. In order to have more coherent MDIO and XPCS setup/cleanup procedures, let's move the DW XPCS destruction to the dedicated stmmac_pcs_clean() method. This method will also be used to cleanup PCS hardware using the pcs_exit() callback that will be introduced to stmmac in a subsequent patch. Signed-off-by: Serge Semin Co-developed-by: Romain Gantois Signed-off-by: Romain Gantois Reviewed-by: Russell King (Oracle) Reviewed-by: Hariprasad Kelam Link: https://lore.kernel.org/r/20240513-rzn1-gmac1-v7-2-6acf58b5440d@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++++- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 14 +++++++++++--- 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index dddcaa9220cc..badfe686a570 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -361,6 +361,7 @@ int stmmac_mdio_unregister(struct net_device *ndev); int stmmac_mdio_register(struct net_device *ndev); int stmmac_mdio_reset(struct mii_bus *mii); int stmmac_xpcs_setup(struct mii_bus *mii); +void stmmac_pcs_clean(struct net_device *ndev); void stmmac_set_ethtool_ops(struct net_device *netdev); int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3d828904db0d..0ac99c132733 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7789,8 +7789,9 @@ int stmmac_dvr_probe(struct device *device, error_netdev_register: phylink_destroy(priv->phylink); -error_xpcs_setup: error_phy_setup: + stmmac_pcs_clean(ndev); +error_xpcs_setup: if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) stmmac_mdio_unregister(ndev); @@ -7832,6 +7833,9 @@ void stmmac_dvr_remove(struct device *dev) if (priv->plat->stmmac_rst) reset_control_assert(priv->plat->stmmac_rst); reset_control_assert(priv->plat->stmmac_ahb_rst); + + stmmac_pcs_clean(ndev); + if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) stmmac_mdio_unregister(ndev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 0542cfd1817e..73ba9901a443 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -523,6 +523,17 @@ int stmmac_xpcs_setup(struct mii_bus *bus) return 0; } +void stmmac_pcs_clean(struct net_device *ndev) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + + if (!priv->hw->xpcs) + return; + + xpcs_destroy(priv->hw->xpcs); + priv->hw->xpcs = NULL; +} + /** * stmmac_mdio_register * @ndev: net device structure @@ -679,9 +690,6 @@ int stmmac_mdio_unregister(struct net_device *ndev) if (!priv->mii) return 0; - if (priv->hw->xpcs) - xpcs_destroy(priv->hw->xpcs); - mdiobus_unregister(priv->mii); priv->mii->priv = NULL; mdiobus_free(priv->mii); -- cgit v1.2.3 From f9cdff1bdacc947bcd182ea69d94b9f5b32ff328 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 13 May 2024 09:25:14 +0200 Subject: net: stmmac: Make stmmac_xpcs_setup() generic to all PCS devices A pcs_init() callback will be introduced to stmmac in a future patch. This new function will be called during the hardware initialization phase. Instead of separately initializing XPCS and PCS components, let's group all PCS-related hardware initialization logic in the current stmmac_xpcs_setup() function. Rename stmmac_xpcs_setup() to stmmac_pcs_setup() and move the conditional call to stmmac_xpcs_setup() inside the function itself. Signed-off-by: Serge Semin Co-developed-by: Romain Gantois Signed-off-by: Romain Gantois Reviewed-by: Russell King (Oracle) Reviewed-by: Hariprasad Kelam Link: https://lore.kernel.org/r/20240513-rzn1-gmac1-v7-3-6acf58b5440d@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 +++----- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 30 ++++++++++++++--------- 3 files changed, 23 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index badfe686a570..ed38099ca740 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -360,7 +360,7 @@ enum stmmac_state { int stmmac_mdio_unregister(struct net_device *ndev); int stmmac_mdio_register(struct net_device *ndev); int stmmac_mdio_reset(struct mii_bus *mii); -int stmmac_xpcs_setup(struct mii_bus *mii); +int stmmac_pcs_setup(struct net_device *ndev); void stmmac_pcs_clean(struct net_device *ndev); void stmmac_set_ethtool_ops(struct net_device *netdev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0ac99c132733..ef285b3c56ab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7754,11 +7754,9 @@ int stmmac_dvr_probe(struct device *device, if (priv->plat->speed_mode_2500) priv->plat->speed_mode_2500(ndev, priv->plat->bsp_priv); - if (priv->plat->mdio_bus_data && priv->plat->mdio_bus_data->has_xpcs) { - ret = stmmac_xpcs_setup(priv->mii); - if (ret) - goto error_xpcs_setup; - } + ret = stmmac_pcs_setup(ndev); + if (ret) + goto error_pcs_setup; ret = stmmac_phy_setup(priv); if (ret) { @@ -7791,7 +7789,7 @@ error_netdev_register: phylink_destroy(priv->phylink); error_phy_setup: stmmac_pcs_clean(ndev); -error_xpcs_setup: +error_pcs_setup: if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) stmmac_mdio_unregister(ndev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 73ba9901a443..54708440e27b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -495,31 +495,37 @@ int stmmac_mdio_reset(struct mii_bus *bus) return 0; } -int stmmac_xpcs_setup(struct mii_bus *bus) +int stmmac_pcs_setup(struct net_device *ndev) { - struct net_device *ndev = bus->priv; + struct dw_xpcs *xpcs = NULL; struct stmmac_priv *priv; - struct dw_xpcs *xpcs; + int ret = -ENODEV; int mode, addr; priv = netdev_priv(ndev); mode = priv->plat->phy_interface; - /* Try to probe the XPCS by scanning all addresses. */ - for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - xpcs = xpcs_create_mdiodev(bus, addr, mode); - if (IS_ERR(xpcs)) - continue; + if (priv->plat->mdio_bus_data && priv->plat->mdio_bus_data->has_xpcs) { + /* Try to probe the XPCS by scanning all addresses */ + for (addr = 0; addr < PHY_MAX_ADDR; addr++) { + xpcs = xpcs_create_mdiodev(priv->mii, addr, mode); + if (IS_ERR(xpcs)) + continue; - priv->hw->xpcs = xpcs; - break; + ret = 0; + break; + } + } else { + return 0; } - if (!priv->hw->xpcs) { + if (ret) { dev_warn(priv->device, "No xPCS found\n"); - return -ENODEV; + return ret; } + priv->hw->xpcs = xpcs; + return 0; } -- cgit v1.2.3 From f0ef433fc264b70a415257dba126acbe3fda666b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 13 May 2024 09:25:15 +0200 Subject: net: stmmac: introduce pcs_init/pcs_exit stmmac operations Introduce a mechanism whereby platforms can create their PCS instances prior to the network device being published to userspace, but after some of the core stmmac initialisation has been completed. This means that the data structures that platforms need will be available. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Reviewed-by: Serge Semin Co-developed-by: Romain Gantois Signed-off-by: Romain Gantois Reviewed-by: Hariprasad Kelam Link: https://lore.kernel.org/r/20240513-rzn1-gmac1-v7-4-6acf58b5440d@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 8 +++++++- include/linux/stmmac.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 54708440e27b..aa43117134d3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -505,7 +505,10 @@ int stmmac_pcs_setup(struct net_device *ndev) priv = netdev_priv(ndev); mode = priv->plat->phy_interface; - if (priv->plat->mdio_bus_data && priv->plat->mdio_bus_data->has_xpcs) { + if (priv->plat->pcs_init) { + ret = priv->plat->pcs_init(priv); + } else if (priv->plat->mdio_bus_data && + priv->plat->mdio_bus_data->has_xpcs) { /* Try to probe the XPCS by scanning all addresses */ for (addr = 0; addr < PHY_MAX_ADDR; addr++) { xpcs = xpcs_create_mdiodev(priv->mii, addr, mode); @@ -533,6 +536,9 @@ void stmmac_pcs_clean(struct net_device *ndev) { struct stmmac_priv *priv = netdev_priv(ndev); + if (priv->plat->pcs_exit) + priv->plat->pcs_exit(priv); + if (!priv->hw->xpcs) return; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index dfa1828cd756..4a24a246c617 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -285,6 +285,8 @@ struct plat_stmmacenet_data { int (*crosststamp)(ktime_t *device, struct system_counterval_t *system, void *ctx); void (*dump_debug_regs)(void *priv); + int (*pcs_init)(struct stmmac_priv *priv); + void (*pcs_exit)(struct stmmac_priv *priv); void *bsp_priv; struct clk *stmmac_clk; struct clk *pclk; -- cgit v1.2.3 From 81b418a6565757128afc6f74dd997598f8adb1b2 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 13 May 2024 09:25:16 +0200 Subject: net: stmmac: dwmac-socfpga: use pcs_init/pcs_exit Use the newly introduced pcs_init() and pcs_exit() operations to create and destroy the PCS instance at a more appropriate moment during the driver lifecycle, thereby avoiding publishing a network device to userspace that has not yet finished its PCS initialisation. There are other similar issues with this driver which remain unaddressed, but these are out of scope for this patch. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier [rgantois: removed second parameters of new callbacks] Signed-off-by: Romain Gantois Reviewed-by: Hariprasad Kelam Link: https://lore.kernel.org/r/20240513-rzn1-gmac1-v7-5-6acf58b5440d@bootlin.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 107 ++++++++++----------- 1 file changed, 53 insertions(+), 54 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 12b4a80ea3aa..b3d45f9dfb55 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -379,6 +379,56 @@ static int socfpga_gen10_set_phy_mode(struct socfpga_dwmac *dwmac) return 0; } +static int socfpga_dwmac_pcs_init(struct stmmac_priv *priv) +{ + struct socfpga_dwmac *dwmac = priv->plat->bsp_priv; + struct regmap_config pcs_regmap_cfg = { + .reg_bits = 16, + .val_bits = 16, + .reg_shift = REGMAP_UPSHIFT(1), + }; + struct mdio_regmap_config mrc; + struct regmap *pcs_regmap; + struct phylink_pcs *pcs; + struct mii_bus *pcs_bus; + + if (!dwmac->tse_pcs_base) + return 0; + + pcs_regmap = devm_regmap_init_mmio(priv->device, dwmac->tse_pcs_base, + &pcs_regmap_cfg); + if (IS_ERR(pcs_regmap)) + return PTR_ERR(pcs_regmap); + + memset(&mrc, 0, sizeof(mrc)); + mrc.regmap = pcs_regmap; + mrc.parent = priv->device; + mrc.valid_addr = 0x0; + mrc.autoscan = false; + + /* Can't use ndev->name here because it will not have been initialised, + * and in any case, the user can rename network interfaces at runtime. + */ + snprintf(mrc.name, MII_BUS_ID_SIZE, "%s-pcs-mii", + dev_name(priv->device)); + pcs_bus = devm_mdio_regmap_register(priv->device, &mrc); + if (IS_ERR(pcs_bus)) + return PTR_ERR(pcs_bus); + + pcs = lynx_pcs_create_mdiodev(pcs_bus, 0); + if (IS_ERR(pcs)) + return PTR_ERR(pcs); + + priv->hw->phylink_pcs = pcs; + return 0; +} + +static void socfpga_dwmac_pcs_exit(struct stmmac_priv *priv) +{ + if (priv->hw->phylink_pcs) + lynx_pcs_destroy(priv->hw->phylink_pcs); +} + static int socfpga_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -426,6 +476,8 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) dwmac->ops = ops; plat_dat->bsp_priv = dwmac; plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed; + plat_dat->pcs_init = socfpga_dwmac_pcs_init; + plat_dat->pcs_exit = socfpga_dwmac_pcs_exit; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) @@ -444,48 +496,6 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) if (ret) goto err_dvr_remove; - /* Create a regmap for the PCS so that it can be used by the PCS driver, - * if we have such a PCS - */ - if (dwmac->tse_pcs_base) { - struct regmap_config pcs_regmap_cfg; - struct mdio_regmap_config mrc; - struct regmap *pcs_regmap; - struct mii_bus *pcs_bus; - - memset(&pcs_regmap_cfg, 0, sizeof(pcs_regmap_cfg)); - memset(&mrc, 0, sizeof(mrc)); - - pcs_regmap_cfg.reg_bits = 16; - pcs_regmap_cfg.val_bits = 16; - pcs_regmap_cfg.reg_shift = REGMAP_UPSHIFT(1); - - pcs_regmap = devm_regmap_init_mmio(&pdev->dev, dwmac->tse_pcs_base, - &pcs_regmap_cfg); - if (IS_ERR(pcs_regmap)) { - ret = PTR_ERR(pcs_regmap); - goto err_dvr_remove; - } - - mrc.regmap = pcs_regmap; - mrc.parent = &pdev->dev; - mrc.valid_addr = 0x0; - mrc.autoscan = false; - - snprintf(mrc.name, MII_BUS_ID_SIZE, "%s-pcs-mii", ndev->name); - pcs_bus = devm_mdio_regmap_register(&pdev->dev, &mrc); - if (IS_ERR(pcs_bus)) { - ret = PTR_ERR(pcs_bus); - goto err_dvr_remove; - } - - stpriv->hw->phylink_pcs = lynx_pcs_create_mdiodev(pcs_bus, 0); - if (IS_ERR(stpriv->hw->phylink_pcs)) { - ret = PTR_ERR(stpriv->hw->phylink_pcs); - goto err_dvr_remove; - } - } - return 0; err_dvr_remove: @@ -494,17 +504,6 @@ err_dvr_remove: return ret; } -static void socfpga_dwmac_remove(struct platform_device *pdev) -{ - struct net_device *ndev = platform_get_drvdata(pdev); - struct stmmac_priv *priv = netdev_priv(ndev); - struct phylink_pcs *pcs = priv->hw->phylink_pcs; - - stmmac_pltfr_remove(pdev); - - lynx_pcs_destroy(pcs); -} - #ifdef CONFIG_PM_SLEEP static int socfpga_dwmac_resume(struct device *dev) { @@ -576,7 +575,7 @@ MODULE_DEVICE_TABLE(of, socfpga_dwmac_match); static struct platform_driver socfpga_dwmac_driver = { .probe = socfpga_dwmac_probe, - .remove_new = socfpga_dwmac_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "socfpga-dwmac", .pm = &socfpga_dwmac_pm_ops, -- cgit v1.2.3 From f360446ec1d06df0e7d13db44945a0ffb9f2d17d Mon Sep 17 00:00:00 2001 From: Clément Léger Date: Mon, 13 May 2024 09:25:17 +0200 Subject: net: stmmac: add support for RZ/N1 GMAC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the Renesas RZ/N1 GMAC. This support can make use of a custom RZ/N1 PCS which is fetched by parsing the pcs-handle device tree property. Signed-off-by: Clément Léger Co-developed-by: Romain Gantois Signed-off-by: Romain Gantois Reviewed-by: Russell King (Oracle) Reviewed-by: Hariprasad Kelam Link: https://lore.kernel.org/r/20240513-rzn1-gmac1-v7-6-6acf58b5440d@bootlin.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 ++ drivers/net/ethernet/stmicro/stmmac/Kconfig | 12 ++++ drivers/net/ethernet/stmicro/stmmac/Makefile | 1 + drivers/net/ethernet/stmicro/stmmac/dwmac-rzn1.c | 86 ++++++++++++++++++++++++ 4 files changed, 105 insertions(+) create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-rzn1.c (limited to 'drivers/net/ethernet') diff --git a/MAINTAINERS b/MAINTAINERS index 757206d86048..40eb26e6c60c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18874,6 +18874,12 @@ F: include/dt-bindings/net/pcs-rzn1-miic.h F: include/linux/pcs-rzn1-miic.h F: net/dsa/tag_rzn1_a5psw.c +RENESAS RZ/N1 DWMAC GLUE LAYER +M: Romain Gantois +S: Maintained +F: Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml +F: drivers/net/ethernet/stmicro/stmmac/dwmac-rzn1.c + RENESAS RZ/N1 RTC CONTROLLER DRIVER M: Miquel Raynal L: linux-rtc@vger.kernel.org diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 4ec61f1ee71a..05cc07b8f48c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -142,6 +142,18 @@ config DWMAC_ROCKCHIP This selects the Rockchip RK3288 SoC glue layer support for the stmmac device driver. +config DWMAC_RZN1 + tristate "Renesas RZ/N1 dwmac support" + default ARCH_RZN1 + depends on OF && (ARCH_RZN1 || COMPILE_TEST) + select PCS_RZN1_MIIC + help + Support for Ethernet controller on Renesas RZ/N1 SoC family. + + This selects the Renesas RZ/N1 SoC glue layer support for + the stmmac device driver. This support can make use of a custom MII + converter PCS device. + config DWMAC_SOCFPGA tristate "SOCFPGA dwmac support" default ARCH_INTEL_SOCFPGA diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 26cad4344701..c2f0e91f6bf8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_DWMAC_MEDIATEK) += dwmac-mediatek.o obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o dwmac-meson8b.o obj-$(CONFIG_DWMAC_QCOM_ETHQOS) += dwmac-qcom-ethqos.o obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o +obj-$(CONFIG_DWMAC_RZN1) += dwmac-rzn1.o obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o obj-$(CONFIG_DWMAC_STARFIVE) += dwmac-starfive.o obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rzn1.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rzn1.c new file mode 100644 index 000000000000..848cf3c01f4a --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rzn1.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2024 Schneider-Electric + * + * Clément Léger + */ + +#include +#include +#include +#include + +#include "stmmac_platform.h" +#include "stmmac.h" + +static int rzn1_dwmac_pcs_init(struct stmmac_priv *priv) +{ + struct device_node *np = priv->device->of_node; + struct device_node *pcs_node; + struct phylink_pcs *pcs; + + pcs_node = of_parse_phandle(np, "pcs-handle", 0); + + if (pcs_node) { + pcs = miic_create(priv->device, pcs_node); + of_node_put(pcs_node); + if (IS_ERR(pcs)) + return PTR_ERR(pcs); + + priv->hw->phylink_pcs = pcs; + } + + return 0; +} + +static void rzn1_dwmac_pcs_exit(struct stmmac_priv *priv) +{ + if (priv->hw->phylink_pcs) + miic_destroy(priv->hw->phylink_pcs); +} + +static int rzn1_dwmac_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct device *dev = &pdev->dev; + int ret; + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return ret; + + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); + if (IS_ERR(plat_dat)) + return PTR_ERR(plat_dat); + + plat_dat->bsp_priv = plat_dat; + plat_dat->pcs_init = rzn1_dwmac_pcs_init; + plat_dat->pcs_exit = rzn1_dwmac_pcs_exit; + + ret = stmmac_dvr_probe(dev, plat_dat, &stmmac_res); + if (ret) + return ret; + + return 0; +} + +static const struct of_device_id rzn1_dwmac_match[] = { + { .compatible = "renesas,rzn1-gmac" }, + { } +}; +MODULE_DEVICE_TABLE(of, rzn1_dwmac_match); + +static struct platform_driver rzn1_dwmac_driver = { + .probe = rzn1_dwmac_probe, + .remove_new = stmmac_pltfr_remove, + .driver = { + .name = "rzn1-dwmac", + .of_match_table = rzn1_dwmac_match, + }, +}; +module_platform_driver(rzn1_dwmac_driver); + +MODULE_AUTHOR("Clément Léger "); +MODULE_DESCRIPTION("Renesas RZN1 DWMAC specific glue layer"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 86348d217661c7fe4f043df444658a6a9adcff29 Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Sun, 12 May 2024 18:10:27 +0200 Subject: net: prestera: Add flex arrays to some structs The "struct prestera_msg_vtcam_rule_add_req" uses a dynamically sized set of trailing elements. Specifically, it uses an array of structures of type "prestera_msg_acl_action actions_msg". The "struct prestera_msg_flood_domain_ports_set_req" also uses a dynamically sized set of trailing elements. Specifically, it uses an array of structures of type "prestera_msg_acl_action actions_msg". So, use the preferred way in the kernel declaring flexible arrays [1]. At the same time, prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). In this case, it is important to note that the attribute used is specifically __counted_by_le since the counters are of type __le32. The logic does not need to change since the counters for the flexible arrays are asigned before any access to the arrays. The order in which the structure prestera_msg_vtcam_rule_add_req and the structure prestera_msg_flood_domain_ports_set_req are defined must be changed to avoid incomplete type errors. Also, avoid the open-coded arithmetic in memory allocator functions [2] using the "struct_size" macro. Moreover, the new structure members also allow us to avoid the open- coded arithmetic on pointers. So, take advantage of this refactoring accordingly. This code was detected with the help of Coccinelle, and audited and modified manually. Link: https://www.kernel.org/doc/html/next/process/deprecated.html#zero-length-and-one-element-arrays [1] Link: https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [2] Signed-off-by: Erick Archer Reviewed-by: Simon Horman Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/AS8PR02MB7237E8469568A59795F1F0408BE12@AS8PR02MB7237.eurprd02.prod.outlook.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/prestera/prestera_hw.c | 83 ++++++++++------------ 1 file changed, 37 insertions(+), 46 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c index fc6f7d2746e8..197198ba61b1 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c @@ -419,15 +419,6 @@ struct prestera_msg_vtcam_destroy_req { __le32 vtcam_id; }; -struct prestera_msg_vtcam_rule_add_req { - struct prestera_msg_cmd cmd; - __le32 key[__PRESTERA_ACL_RULE_MATCH_TYPE_MAX]; - __le32 keymask[__PRESTERA_ACL_RULE_MATCH_TYPE_MAX]; - __le32 vtcam_id; - __le32 prio; - __le32 n_act; -}; - struct prestera_msg_vtcam_rule_del_req { struct prestera_msg_cmd cmd; __le32 vtcam_id; @@ -471,6 +462,16 @@ struct prestera_msg_acl_action { }; }; +struct prestera_msg_vtcam_rule_add_req { + struct prestera_msg_cmd cmd; + __le32 key[__PRESTERA_ACL_RULE_MATCH_TYPE_MAX]; + __le32 keymask[__PRESTERA_ACL_RULE_MATCH_TYPE_MAX]; + __le32 vtcam_id; + __le32 prio; + __le32 n_act; + struct prestera_msg_acl_action actions_msg[] __counted_by_le(n_act); +}; + struct prestera_msg_counter_req { struct prestera_msg_cmd cmd; __le32 client; @@ -702,12 +703,6 @@ struct prestera_msg_flood_domain_destroy_req { __le32 flood_domain_idx; }; -struct prestera_msg_flood_domain_ports_set_req { - struct prestera_msg_cmd cmd; - __le32 flood_domain_idx; - __le32 ports_num; -}; - struct prestera_msg_flood_domain_ports_reset_req { struct prestera_msg_cmd cmd; __le32 flood_domain_idx; @@ -725,6 +720,13 @@ struct prestera_msg_flood_domain_port { __le16 port_type; }; +struct prestera_msg_flood_domain_ports_set_req { + struct prestera_msg_cmd cmd; + __le32 flood_domain_idx; + __le32 ports_num; + struct prestera_msg_flood_domain_port ports[] __counted_by_le(ports_num); +}; + struct prestera_msg_mdb_create_req { struct prestera_msg_cmd cmd; __le32 flood_domain_idx; @@ -1371,23 +1373,18 @@ int prestera_hw_vtcam_rule_add(struct prestera_switch *sw, struct prestera_acl_hw_action_info *act, u8 n_act, u32 *rule_id) { - struct prestera_msg_acl_action *actions_msg; struct prestera_msg_vtcam_rule_add_req *req; struct prestera_msg_vtcam_resp resp; - void *buff; - u32 size; + size_t size; int err; u8 i; - size = sizeof(*req) + sizeof(*actions_msg) * n_act; - - buff = kzalloc(size, GFP_KERNEL); - if (!buff) + size = struct_size(req, actions_msg, n_act); + req = kzalloc(size, GFP_KERNEL); + if (!req) return -ENOMEM; - req = buff; req->n_act = __cpu_to_le32(n_act); - actions_msg = buff + sizeof(*req); /* put acl matches into the message */ memcpy(req->key, key, sizeof(req->key)); @@ -1395,7 +1392,7 @@ int prestera_hw_vtcam_rule_add(struct prestera_switch *sw, /* put acl actions into the message */ for (i = 0; i < n_act; i++) { - err = prestera_acl_rule_add_put_action(&actions_msg[i], + err = prestera_acl_rule_add_put_action(&req->actions_msg[i], &act[i]); if (err) goto free_buff; @@ -1411,7 +1408,7 @@ int prestera_hw_vtcam_rule_add(struct prestera_switch *sw, *rule_id = __le32_to_cpu(resp.rule_id); free_buff: - kfree(buff); + kfree(req); return err; } @@ -2461,14 +2458,13 @@ int prestera_hw_flood_domain_ports_set(struct prestera_flood_domain *domain) { struct prestera_flood_domain_port *flood_domain_port; struct prestera_msg_flood_domain_ports_set_req *req; - struct prestera_msg_flood_domain_port *ports; struct prestera_switch *sw = domain->sw; struct prestera_port *port; u32 ports_num = 0; - int buf_size; - void *buff; + size_t buf_size; u16 lag_id; int err; + int i = 0; list_for_each_entry(flood_domain_port, &domain->flood_domain_port_list, flood_domain_port_node) @@ -2477,15 +2473,11 @@ int prestera_hw_flood_domain_ports_set(struct prestera_flood_domain *domain) if (!ports_num) return -EINVAL; - buf_size = sizeof(*req) + sizeof(*ports) * ports_num; - - buff = kmalloc(buf_size, GFP_KERNEL); - if (!buff) + buf_size = struct_size(req, ports, ports_num); + req = kmalloc(buf_size, GFP_KERNEL); + if (!req) return -ENOMEM; - req = buff; - ports = buff + sizeof(*req); - req->flood_domain_idx = __cpu_to_le32(domain->idx); req->ports_num = __cpu_to_le32(ports_num); @@ -2494,31 +2486,30 @@ int prestera_hw_flood_domain_ports_set(struct prestera_flood_domain *domain) if (netif_is_lag_master(flood_domain_port->dev)) { if (prestera_lag_id(sw, flood_domain_port->dev, &lag_id)) { - kfree(buff); + kfree(req); return -EINVAL; } - ports->port_type = + req->ports[i].port_type = __cpu_to_le16(PRESTERA_HW_FLOOD_DOMAIN_PORT_TYPE_LAG); - ports->lag_id = __cpu_to_le16(lag_id); + req->ports[i].lag_id = __cpu_to_le16(lag_id); } else { port = prestera_port_dev_lower_find(flood_domain_port->dev); - ports->port_type = + req->ports[i].port_type = __cpu_to_le16(PRESTERA_HW_FDB_ENTRY_TYPE_REG_PORT); - ports->dev_num = __cpu_to_le32(port->dev_id); - ports->port_num = __cpu_to_le32(port->hw_id); + req->ports[i].dev_num = __cpu_to_le32(port->dev_id); + req->ports[i].port_num = __cpu_to_le32(port->hw_id); } - ports->vid = __cpu_to_le16(flood_domain_port->vid); - - ports++; + req->ports[i].vid = __cpu_to_le16(flood_domain_port->vid); + i++; } err = prestera_cmd(sw, PRESTERA_CMD_TYPE_FLOOD_DOMAIN_PORTS_SET, &req->cmd, buf_size); - kfree(buff); + kfree(req); return err; } -- cgit v1.2.3 From 40a1d11fc670ac03c5dc2e5a9724b330e74f38b0 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 13 May 2024 13:29:01 -0700 Subject: net: mana: Enable MANA driver on ARM64 with 4K page size Change the Kconfig dependency, so this driver can be built and run on ARM64 with 4K page size. 16/64K page sizes are not supported yet. Signed-off-by: Haiyang Zhang Link: https://lore.kernel.org/r/1715632141-8089-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig index 01eb7445ead9..286f0d5697a1 100644 --- a/drivers/net/ethernet/microsoft/Kconfig +++ b/drivers/net/ethernet/microsoft/Kconfig @@ -17,7 +17,8 @@ if NET_VENDOR_MICROSOFT config MICROSOFT_MANA tristate "Microsoft Azure Network Adapter (MANA) support" - depends on PCI_MSI && X86_64 + depends on PCI_MSI + depends on X86_64 || (ARM64 && !CPU_BIG_ENDIAN && ARM64_4K_PAGES) depends on PCI_HYPERV select AUXILIARY_BUS select PAGE_POOL -- cgit v1.2.3 From 36ac9e7f2e5786bd37c5cd91132e1f39c29b8197 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Mon, 13 May 2024 09:43:45 +0800 Subject: net: stmmac: move the EST lock to struct stmmac_priv Reinitialize the whole EST structure would also reset the mutex lock which is embedded in the EST structure, and then trigger the following warning. To address this, move the lock to struct stmmac_priv. We also need to reacquire the mutex lock when doing this initialization. DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 3 PID: 505 at kernel/locking/mutex.c:587 __mutex_lock+0xd84/0x1068 Modules linked in: CPU: 3 PID: 505 Comm: tc Not tainted 6.9.0-rc6-00053-g0106679839f7-dirty #29 Hardware name: NXP i.MX8MPlus EVK board (DT) pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __mutex_lock+0xd84/0x1068 lr : __mutex_lock+0xd84/0x1068 sp : ffffffc0864e3570 x29: ffffffc0864e3570 x28: ffffffc0817bdc78 x27: 0000000000000003 x26: ffffff80c54f1808 x25: ffffff80c9164080 x24: ffffffc080d723ac x23: 0000000000000000 x22: 0000000000000002 x21: 0000000000000000 x20: 0000000000000000 x19: ffffffc083bc3000 x18: ffffffffffffffff x17: ffffffc08117b080 x16: 0000000000000002 x15: ffffff80d2d40000 x14: 00000000000002da x13: ffffff80d2d404b8 x12: ffffffc082b5a5c8 x11: ffffffc082bca680 x10: ffffffc082bb2640 x9 : ffffffc082bb2698 x8 : 0000000000017fe8 x7 : c0000000ffffefff x6 : 0000000000000001 x5 : ffffff8178fe0d48 x4 : 0000000000000000 x3 : 0000000000000027 x2 : ffffff8178fe0d50 x1 : 0000000000000000 x0 : 0000000000000000 Call trace: __mutex_lock+0xd84/0x1068 mutex_lock_nested+0x28/0x34 tc_setup_taprio+0x118/0x68c stmmac_setup_tc+0x50/0xf0 taprio_change+0x868/0xc9c Fixes: b2aae654a479 ("net: stmmac: add mutex lock to protect est parameters") Signed-off-by: Xiaolei Wang Reviewed-by: Simon Horman Reviewed-by: Serge Semin Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/20240513014346.1718740-2-xiaolei.wang@windriver.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 ++ drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 8 ++++---- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 18 ++++++++++-------- include/linux/stmmac.h | 1 - 4 files changed, 16 insertions(+), 13 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index ed38099ca740..044e61a385e4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -261,6 +261,8 @@ struct stmmac_priv { struct stmmac_extra_stats xstats ____cacheline_aligned_in_smp; struct stmmac_safety_stats sstats; struct plat_stmmacenet_data *plat; + /* Protect est parameters */ + struct mutex est_lock; struct dma_features dma_cap; struct stmmac_counters mmc; int hw_cap_support; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index e04830a3a1fb..0c5aab6dd7a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -70,11 +70,11 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) /* If EST is enabled, disabled it before adjust ptp time. */ if (priv->plat->est && priv->plat->est->enable) { est_rst = true; - mutex_lock(&priv->plat->est->lock); + mutex_lock(&priv->est_lock); priv->plat->est->enable = false; stmmac_est_configure(priv, priv, priv->plat->est, priv->plat->clk_ptp_rate); - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); } write_lock_irqsave(&priv->ptp_lock, flags); @@ -87,7 +87,7 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) ktime_t current_time_ns, basetime; u64 cycle_time; - mutex_lock(&priv->plat->est->lock); + mutex_lock(&priv->est_lock); priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); current_time_ns = timespec64_to_ktime(current_time); time.tv_nsec = priv->plat->est->btr_reserve[0]; @@ -104,7 +104,7 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) priv->plat->est->enable = true; ret = stmmac_est_configure(priv, priv, priv->plat->est, priv->plat->clk_ptp_rate); - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); if (ret) netdev_err(priv->dev, "failed to configure EST\n"); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index cce00719937d..620c16e9be3a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -1004,17 +1004,19 @@ static int tc_taprio_configure(struct stmmac_priv *priv, if (!plat->est) return -ENOMEM; - mutex_init(&priv->plat->est->lock); + mutex_init(&priv->est_lock); } else { + mutex_lock(&priv->est_lock); memset(plat->est, 0, sizeof(*plat->est)); + mutex_unlock(&priv->est_lock); } size = qopt->num_entries; - mutex_lock(&priv->plat->est->lock); + mutex_lock(&priv->est_lock); priv->plat->est->gcl_size = size; priv->plat->est->enable = qopt->cmd == TAPRIO_CMD_REPLACE; - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); for (i = 0; i < size; i++) { s64 delta_ns = qopt->entries[i].interval; @@ -1045,7 +1047,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, priv->plat->est->gcl[i] = delta_ns | (gates << wid); } - mutex_lock(&priv->plat->est->lock); + mutex_lock(&priv->est_lock); /* Adjust for real system time */ priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); current_time_ns = timespec64_to_ktime(current_time); @@ -1068,7 +1070,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, tc_taprio_map_maxsdu_txq(priv, qopt); if (fpe && !priv->dma_cap.fpesel) { - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); return -EOPNOTSUPP; } @@ -1079,7 +1081,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, ret = stmmac_est_configure(priv, priv, priv->plat->est, priv->plat->clk_ptp_rate); - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); if (ret) { netdev_err(priv->dev, "failed to configure EST\n"); goto disable; @@ -1096,7 +1098,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, disable: if (priv->plat->est) { - mutex_lock(&priv->plat->est->lock); + mutex_lock(&priv->est_lock); priv->plat->est->enable = false; stmmac_est_configure(priv, priv, priv->plat->est, priv->plat->clk_ptp_rate); @@ -1105,7 +1107,7 @@ disable: priv->xstats.max_sdu_txq_drop[i] = 0; priv->xstats.mtl_est_txq_hlbf[i] = 0; } - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); } priv->plat->fpe_cfg->enable = false; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 4a24a246c617..c4caed4eef52 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -117,7 +117,6 @@ struct stmmac_axi { #define EST_GCL 1024 struct stmmac_est { - struct mutex lock; int enable; u32 btr_reserve[2]; u32 btr_offset[2]; -- cgit v1.2.3 From bd17382ac36ed97848f1712714488e84c93605ca Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Mon, 13 May 2024 09:43:46 +0800 Subject: net: stmmac: move the EST structure to struct stmmac_priv Move the EST structure to struct stmmac_priv, because the EST configs don't look like platform config, but EST is enabled in runtime with the settings retrieved for the TC TAPRIO feature also in runtime. So it's better to have the EST-data preserved in the driver private data instead of the platform data storage. Signed-off-by: Xiaolei Wang Reviewed-by: Simon Horman Reviewed-by: Serge Semin Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/20240513014346.1718740-3-xiaolei.wang@windriver.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 15 +++++++++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 18 +++++----- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 22 ++++++------- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 40 +++++++++++------------ include/linux/stmmac.h | 15 --------- 5 files changed, 54 insertions(+), 56 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 044e61a385e4..b23b920eedb1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -221,6 +221,20 @@ struct stmmac_dma_conf { unsigned int dma_tx_size; }; +#define EST_GCL 1024 +struct stmmac_est { + int enable; + u32 btr_reserve[2]; + u32 btr_offset[2]; + u32 btr[2]; + u32 ctr[2]; + u32 ter; + u32 gcl_unaligned[EST_GCL]; + u32 gcl[EST_GCL]; + u32 gcl_size; + u32 max_sdu[MTL_MAX_TX_QUEUES]; +}; + struct stmmac_priv { /* Frequently used values are kept adjacent for cache effect */ u32 tx_coal_frames[MTL_MAX_TX_QUEUES]; @@ -263,6 +277,7 @@ struct stmmac_priv { struct plat_stmmacenet_data *plat; /* Protect est parameters */ struct mutex est_lock; + struct stmmac_est *est; struct dma_features dma_cap; struct stmmac_counters mmc; int hw_cap_support; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ef285b3c56ab..2e9a2da605f6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2498,9 +2498,9 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) if (!xsk_tx_peek_desc(pool, &xdp_desc)) break; - if (priv->plat->est && priv->plat->est->enable && - priv->plat->est->max_sdu[queue] && - xdp_desc.len > priv->plat->est->max_sdu[queue]) { + if (priv->est && priv->est->enable && + priv->est->max_sdu[queue] && + xdp_desc.len > priv->est->max_sdu[queue]) { priv->xstats.max_sdu_txq_drop[queue]++; continue; } @@ -4538,9 +4538,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) return stmmac_tso_xmit(skb, dev); } - if (priv->plat->est && priv->plat->est->enable && - priv->plat->est->max_sdu[queue] && - skb->len > priv->plat->est->max_sdu[queue]){ + if (priv->est && priv->est->enable && + priv->est->max_sdu[queue] && + skb->len > priv->est->max_sdu[queue]){ priv->xstats.max_sdu_txq_drop[queue]++; goto max_sdu_err; } @@ -4919,9 +4919,9 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, if (stmmac_tx_avail(priv, queue) < STMMAC_TX_THRESH(priv)) return STMMAC_XDP_CONSUMED; - if (priv->plat->est && priv->plat->est->enable && - priv->plat->est->max_sdu[queue] && - xdpf->len > priv->plat->est->max_sdu[queue]) { + if (priv->est && priv->est->enable && + priv->est->max_sdu[queue] && + xdpf->len > priv->est->max_sdu[queue]) { priv->xstats.max_sdu_txq_drop[queue]++; return STMMAC_XDP_CONSUMED; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 0c5aab6dd7a7..a6b1de9a251d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -68,11 +68,11 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) nsec = reminder; /* If EST is enabled, disabled it before adjust ptp time. */ - if (priv->plat->est && priv->plat->est->enable) { + if (priv->est && priv->est->enable) { est_rst = true; mutex_lock(&priv->est_lock); - priv->plat->est->enable = false; - stmmac_est_configure(priv, priv, priv->plat->est, + priv->est->enable = false; + stmmac_est_configure(priv, priv, priv->est, priv->plat->clk_ptp_rate); mutex_unlock(&priv->est_lock); } @@ -90,19 +90,19 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) mutex_lock(&priv->est_lock); priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); current_time_ns = timespec64_to_ktime(current_time); - time.tv_nsec = priv->plat->est->btr_reserve[0]; - time.tv_sec = priv->plat->est->btr_reserve[1]; + time.tv_nsec = priv->est->btr_reserve[0]; + time.tv_sec = priv->est->btr_reserve[1]; basetime = timespec64_to_ktime(time); - cycle_time = (u64)priv->plat->est->ctr[1] * NSEC_PER_SEC + - priv->plat->est->ctr[0]; + cycle_time = (u64)priv->est->ctr[1] * NSEC_PER_SEC + + priv->est->ctr[0]; time = stmmac_calc_tas_basetime(basetime, current_time_ns, cycle_time); - priv->plat->est->btr[0] = (u32)time.tv_nsec; - priv->plat->est->btr[1] = (u32)time.tv_sec; - priv->plat->est->enable = true; - ret = stmmac_est_configure(priv, priv, priv->plat->est, + priv->est->btr[0] = (u32)time.tv_nsec; + priv->est->btr[1] = (u32)time.tv_sec; + priv->est->enable = true; + ret = stmmac_est_configure(priv, priv, priv->est, priv->plat->clk_ptp_rate); mutex_unlock(&priv->est_lock); if (ret) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 620c16e9be3a..222540b55480 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -918,7 +918,6 @@ struct timespec64 stmmac_calc_tas_basetime(ktime_t old_base_time, static void tc_taprio_map_maxsdu_txq(struct stmmac_priv *priv, struct tc_taprio_qopt_offload *qopt) { - struct plat_stmmacenet_data *plat = priv->plat; u32 num_tc = qopt->mqprio.qopt.num_tc; u32 offset, count, i, j; @@ -933,7 +932,7 @@ static void tc_taprio_map_maxsdu_txq(struct stmmac_priv *priv, count = qopt->mqprio.qopt.count[i]; for (j = offset; j < offset + count; j++) - plat->est->max_sdu[j] = qopt->max_sdu[i] + ETH_HLEN - ETH_TLEN; + priv->est->max_sdu[j] = qopt->max_sdu[i] + ETH_HLEN - ETH_TLEN; } } @@ -941,7 +940,6 @@ static int tc_taprio_configure(struct stmmac_priv *priv, struct tc_taprio_qopt_offload *qopt) { u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep; - struct plat_stmmacenet_data *plat = priv->plat; struct timespec64 time, current_time, qopt_time; ktime_t current_time_ns; bool fpe = false; @@ -998,24 +996,24 @@ static int tc_taprio_configure(struct stmmac_priv *priv, if (qopt->cycle_time_extension >= BIT(wid + 7)) return -ERANGE; - if (!plat->est) { - plat->est = devm_kzalloc(priv->device, sizeof(*plat->est), + if (!priv->est) { + priv->est = devm_kzalloc(priv->device, sizeof(*priv->est), GFP_KERNEL); - if (!plat->est) + if (!priv->est) return -ENOMEM; mutex_init(&priv->est_lock); } else { mutex_lock(&priv->est_lock); - memset(plat->est, 0, sizeof(*plat->est)); + memset(priv->est, 0, sizeof(*priv->est)); mutex_unlock(&priv->est_lock); } size = qopt->num_entries; mutex_lock(&priv->est_lock); - priv->plat->est->gcl_size = size; - priv->plat->est->enable = qopt->cmd == TAPRIO_CMD_REPLACE; + priv->est->gcl_size = size; + priv->est->enable = qopt->cmd == TAPRIO_CMD_REPLACE; mutex_unlock(&priv->est_lock); for (i = 0; i < size; i++) { @@ -1044,7 +1042,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, return -EOPNOTSUPP; } - priv->plat->est->gcl[i] = delta_ns | (gates << wid); + priv->est->gcl[i] = delta_ns | (gates << wid); } mutex_lock(&priv->est_lock); @@ -1054,18 +1052,18 @@ static int tc_taprio_configure(struct stmmac_priv *priv, time = stmmac_calc_tas_basetime(qopt->base_time, current_time_ns, qopt->cycle_time); - priv->plat->est->btr[0] = (u32)time.tv_nsec; - priv->plat->est->btr[1] = (u32)time.tv_sec; + priv->est->btr[0] = (u32)time.tv_nsec; + priv->est->btr[1] = (u32)time.tv_sec; qopt_time = ktime_to_timespec64(qopt->base_time); - priv->plat->est->btr_reserve[0] = (u32)qopt_time.tv_nsec; - priv->plat->est->btr_reserve[1] = (u32)qopt_time.tv_sec; + priv->est->btr_reserve[0] = (u32)qopt_time.tv_nsec; + priv->est->btr_reserve[1] = (u32)qopt_time.tv_sec; ctr = qopt->cycle_time; - priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC); - priv->plat->est->ctr[1] = (u32)ctr; + priv->est->ctr[0] = do_div(ctr, NSEC_PER_SEC); + priv->est->ctr[1] = (u32)ctr; - priv->plat->est->ter = qopt->cycle_time_extension; + priv->est->ter = qopt->cycle_time_extension; tc_taprio_map_maxsdu_txq(priv, qopt); @@ -1079,7 +1077,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, */ priv->plat->fpe_cfg->enable = fpe; - ret = stmmac_est_configure(priv, priv, priv->plat->est, + ret = stmmac_est_configure(priv, priv, priv->est, priv->plat->clk_ptp_rate); mutex_unlock(&priv->est_lock); if (ret) { @@ -1097,10 +1095,10 @@ static int tc_taprio_configure(struct stmmac_priv *priv, return 0; disable: - if (priv->plat->est) { + if (priv->est) { mutex_lock(&priv->est_lock); - priv->plat->est->enable = false; - stmmac_est_configure(priv, priv, priv->plat->est, + priv->est->enable = false; + stmmac_est_configure(priv, priv, priv->est, priv->plat->clk_ptp_rate); /* Reset taprio status */ for (i = 0; i < priv->plat->tx_queues_to_use; i++) { diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index c4caed4eef52..f92c195c76ed 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -115,20 +115,6 @@ struct stmmac_axi { bool axi_rb; }; -#define EST_GCL 1024 -struct stmmac_est { - int enable; - u32 btr_reserve[2]; - u32 btr_offset[2]; - u32 btr[2]; - u32 ctr[2]; - u32 ter; - u32 gcl_unaligned[EST_GCL]; - u32 gcl[EST_GCL]; - u32 gcl_size; - u32 max_sdu[MTL_MAX_TX_QUEUES]; -}; - struct stmmac_rxq_cfg { u8 mode_to_use; u32 chan; @@ -245,7 +231,6 @@ struct plat_stmmacenet_data { struct fwnode_handle *port_node; struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; - struct stmmac_est *est; struct stmmac_fpe_cfg *fpe_cfg; struct stmmac_safety_feature_cfg *safety_feat_cfg; int clk_csr; -- cgit v1.2.3